diff --git a/src/bot/bot.py b/src/bot/bot.py index eeae995..af763ee 100644 --- a/src/bot/bot.py +++ b/src/bot/bot.py @@ -1,4 +1,5 @@ from aiogram import Bot, Dispatcher +from aiohttp_socks import ProxyConnector from aiogram.client.default import DefaultBotProperties from src.bot.handlers import get_router from src.storage.base import IVectorStore @@ -8,7 +9,10 @@ def setup_bot(token: str, storage: IVectorStore, processor: ILLMProvider, allowe """ Setup the aiogram Bot and Dispatcher with handlers. """ - bot = Bot(token=token, default=DefaultBotProperties(parse_mode="HTML")) + connector = ProxyConnector.from_url("socks5://127.0.0.1:1080", rdns=True) + bot = Bot(token=token, + default=DefaultBotProperties(parse_mode="HTML"), + connector=connector) dp = Dispatcher() dp.include_router(get_router(storage, processor, allowed_chat_id)) return bot, dp diff --git a/tests/crawlers/test_factory.py b/tests/crawlers/test_factory.py index d29944d..560892f 100644 --- a/tests/crawlers/test_factory.py +++ b/tests/crawlers/test_factory.py @@ -10,6 +10,7 @@ from src.crawlers.microsoft_research_crawler import MicrosoftResearchCrawler from src.crawlers.static_crawler import StaticCrawler from src.crawlers.skolkovo_crawler import SkolkovoCrawler from src.crawlers.cppconf_crawler import CppConfCrawler +from src.crawlers.github_crawler import GitHubTrendingCrawler VALID_YAML = """ crawlers: @@ -125,7 +126,7 @@ def test_integration_load_actual_config(): # Verify types and mandatory fields for all loaded crawlers for crawler in crawlers: - assert isinstance(crawler, (RSSCrawler, PlaywrightCrawler, StaticCrawler, SkolkovoCrawler, CppConfCrawler, SciRateCrawler, ScholarCrawler, MicrosoftResearchCrawler)) + assert isinstance(crawler, (RSSCrawler, PlaywrightCrawler, StaticCrawler, SkolkovoCrawler, CppConfCrawler, SciRateCrawler, ScholarCrawler, MicrosoftResearchCrawler, GitHubTrendingCrawler)) if not isinstance(crawler, ScholarCrawler): assert crawler.url.startswith("http") assert crawler.source diff --git a/tests/crawlers/test_new_crawlers.py b/tests/crawlers/test_new_crawlers.py index 51eff7f..dcfc08e 100644 --- a/tests/crawlers/test_new_crawlers.py +++ b/tests/crawlers/test_new_crawlers.py @@ -1,27 +1,69 @@ import pytest import aiohttp +from datetime import datetime, timezone +from unittest.mock import patch, MagicMock, AsyncMock from src.crawlers.static_crawler import StaticCrawler from src.crawlers.skolkovo_crawler import SkolkovoCrawler from src.crawlers.dto import NewsItemDTO @pytest.mark.asyncio async def test_static_crawler_addmeto(): - crawler = StaticCrawler(url="https://t.me/s/addmeto", source="Telegram: Addmeto", selector=".tgme_widget_message_text") - items = await crawler.fetch_latest() - assert len(items) > 0 - assert items[0].source == "Telegram: Addmeto" + html_content = """ +
+ Message Link + Some content text about AI. +
+ """ + with patch("aiohttp.ClientSession.get") as mock_get: + mock_response = MagicMock() + mock_response.status = 200 + mock_response.text = AsyncMock(return_value=html_content) + mock_response.__aenter__.return_value = mock_response + mock_get.return_value = mock_response + + crawler = StaticCrawler(url="https://t.me/s/addmeto", source="Telegram: Addmeto", selector=".tgme_widget_message_text") + items = await crawler.fetch_latest() + assert len(items) > 0 + assert items[0].source == "Telegram: Addmeto" + assert "t.me/addmeto/123" in items[0].url + @pytest.mark.asyncio async def test_static_crawler_rsf(): - crawler = StaticCrawler(url="https://rscf.ru/en/news/", source="RSF", selector=".news-item") - items = await crawler.fetch_latest() - assert len(items) > 0 - assert items[0].source == "RSF" - assert "rscf.ru" in items[0].url + html_content = """ +
+ RSF News Title + Description of news. +
+ """ + with patch("aiohttp.ClientSession.get") as mock_get: + mock_response = MagicMock() + mock_response.status = 200 + mock_response.text = AsyncMock(return_value=html_content) + mock_response.__aenter__.return_value = mock_response + mock_get.return_value = mock_response + + crawler = StaticCrawler(url="https://rscf.ru/en/news/", source="RSF", selector=".news-item") + items = await crawler.fetch_latest() + assert len(items) > 0 + assert items[0].source == "RSF" + assert "rscf.ru/en/news/123" in items[0].url @pytest.mark.asyncio async def test_skolkovo_crawler(): - crawler = SkolkovoCrawler(url="https://sk.ru/news/", source="Skolkovo") - items = await crawler.fetch_latest() - assert len(items) > 0 - assert items[0].source == "Skolkovo" - assert "sk.ru" in items[0].url + html_content = """ +
+
+ Skolkovo News +
+
+ """ + with patch("src.crawlers.playwright_crawler.PlaywrightCrawler.fetch_latest") as mock_fetch: + mock_fetch.return_value = [ + NewsItemDTO(title="Skolkovo News", url="https://sk.ru/news/123", content_text="Text", source="Skolkovo", timestamp=datetime.now(timezone.utc)) + ] + crawler = SkolkovoCrawler(url="https://sk.ru/news/", source="Skolkovo") + items = await crawler.fetch_latest() + assert len(items) > 0 + assert items[0].source == "Skolkovo" + assert "sk.ru" in items[0].url +