From 9fdb4b35cd9b5cdccce77c4d2d751e3da8df07eb Mon Sep 17 00:00:00 2001 From: Artur Mukhamadiev Date: Sun, 15 Mar 2026 01:32:25 +0300 Subject: [PATCH] Implement 'Top Ranked' feature and expand Habr sources --- src/bot/handlers.py | 22 +++++++ src/crawlers.yml | 20 ++++-- src/storage/base.py | 5 ++ src/storage/chroma_store.py | 17 +++++ tests/bot/test_handlers.py | 31 +++++++++ tests/bot/test_hottest_command.py | 102 +++++++++++++++++++++++++++++ tests/storage/test_chroma_store.py | 41 ++++++++++++ tests/storage/test_top_ranked.py | 95 +++++++++++++++++++++++++++ 8 files changed, 326 insertions(+), 7 deletions(-) create mode 100644 tests/bot/test_hottest_command.py create mode 100644 tests/storage/test_top_ranked.py diff --git a/src/bot/handlers.py b/src/bot/handlers.py index 23038de..d8cf135 100644 --- a/src/bot/handlers.py +++ b/src/bot/handlers.py @@ -51,6 +51,7 @@ def get_router(storage: IVectorStore, processor: ILLMProvider, allowed_chat_id: "/start - Start the bot\n" "/help - Show this help message\n" "/latest [category] - Show the latest enriched news trends\n" + "/hottest - Show top 10 ranked hot trends\n" "/search query - Search for news\n" "/stats - Show database statistics\n" "/params - Show LLM processor parameters\n" @@ -93,6 +94,27 @@ def get_router(storage: IVectorStore, processor: ILLMProvider, allowed_chat_id: await message.answer("Latest news:", reply_markup=builder.as_markup()) + @router.message(Command("hottest")) + async def command_hottest_handler(message: Message) -> None: + """ + This handler receives messages with `/hottest` command + """ + items = await storage.get_top_ranked(limit=10) + + if not items: + await message.answer("No hot trends found yet.") + return + + builder = InlineKeyboardBuilder() + for item in items: + item_id = str(uuid.uuid5(uuid.NAMESPACE_URL, item.url)) + builder.row(InlineKeyboardButton( + text=f"🔥 [{item.relevance_score}/10] {item.title}", + callback_data=f"detail:{item_id}" + )) + + await message.answer("Top 10 Hottest Trends:", reply_markup=builder.as_markup()) + @router.message(Command("search")) async def command_search_handler(message: Message, command: CommandObject) -> None: """ diff --git a/src/crawlers.yml b/src/crawlers.yml index dd7a08f..e4fe92b 100644 --- a/src/crawlers.yml +++ b/src/crawlers.yml @@ -8,9 +8,6 @@ crawlers: - type: rss url: "https://news.samsung.com/global/rss" source: "Samsung Newsroom" - - type: rss - url: "https://www.sony.com/en/SonyInfo/News/Service/rss.xml" - source: "Sony Newsroom" - type: playwright url: "https://cvpr.thecvf.com/Conferences/2025" source: "CVPR 2025" @@ -61,10 +58,10 @@ crawlers: url: "https://форумтехнопром.рф/" source: "Technoprom-2025" selector: ".news-item" - - type: playwright - url: "https://www.innoprom.com/en/media/news/" - source: "INNOPROM-2025" - selector: ".news-list__item" + # - type: playwright + # url: "https://www.innoprom.com/en/media/news/" + # source: "INNOPROM-2025" + # selector: ".news-list__item" - type: playwright url: "https://www.hannovermesse.de/en/news/news-articles/" source: "Hannover Messe" @@ -91,3 +88,12 @@ crawlers: url: "https://t.me/s/addmeto" source: "Telegram: Addmeto" selector: ".tgme_widget_message_text" + - type: rss + url: "https://habr.com/ru/rss/hubs/hi/articles/?fl=ru" + source: "Habr HighLoad" + - type: rss + url: "https://habr.com/ru/rss/hubs/complete_code/articles/?fl=ru" + source: "Habr Code Quality" + - type: rss + url: "https://habr.com/ru/rss/articles/rated100/?fl=ru" + source: "Habr High Ranked" \ No newline at end of file diff --git a/src/storage/base.py b/src/storage/base.py index d208c74..d57158c 100644 --- a/src/storage/base.py +++ b/src/storage/base.py @@ -29,3 +29,8 @@ class IVectorStore(ABC): async def get_stats(self) -> dict[str, int]: """Get storage statistics including total count and breakdown by category.""" pass + + @abstractmethod + async def get_top_ranked(self, limit: int = 10) -> List[EnrichedNewsItemDTO]: + """Retrieve top ranked items by relevance score.""" + pass diff --git a/src/storage/chroma_store.py b/src/storage/chroma_store.py index 1c68812..bdc9574 100644 --- a/src/storage/chroma_store.py +++ b/src/storage/chroma_store.py @@ -113,3 +113,20 @@ class ChromaStore(IVectorStore): stats[key] = stats.get(key, 0) + 1 return stats + + async def get_top_ranked(self, limit: int = 10) -> List[EnrichedNewsItemDTO]: + """Retrieve top ranked items by relevance score.""" + # Retrieve all metadatas and documents to sort by relevance score + results = self.collection.get(include=["metadatas", "documents"]) + metadatas = results.get("metadatas") or [] + documents = results.get("documents") or [] + + items = [] + for meta, doc in zip(metadatas, documents): + if meta: + items.append(self._reconstruct_dto(meta, doc)) + + # Sort by relevance_score descending + items.sort(key=lambda x: x.relevance_score, reverse=True) + + return items[:limit] diff --git a/tests/bot/test_handlers.py b/tests/bot/test_handlers.py index 39f260b..9f88ecd 100644 --- a/tests/bot/test_handlers.py +++ b/tests/bot/test_handlers.py @@ -170,6 +170,37 @@ async def test_command_stats_handler(router, mock_storage, allowed_chat_id): args, kwargs = message.answer.call_args assert "Database Statistics" in args[0] +@pytest.mark.asyncio +async def test_command_hottest_handler(router, mock_storage, allowed_chat_id, mock_item): + handler = get_handler(router, "command_hottest_handler") + message = AsyncMock() + message.chat.id = int(allowed_chat_id) + message.answer = AsyncMock() + + mock_storage.get_top_ranked.return_value = [mock_item] + + await handler(message=message) + + mock_storage.get_top_ranked.assert_called_once_with(limit=10) + message.answer.assert_called_once() + args, kwargs = message.answer.call_args + assert "Top 10 Hottest Trends:" in args[0] + assert "reply_markup" in kwargs + assert "🔥" in str(kwargs["reply_markup"]) + +@pytest.mark.asyncio +async def test_command_hottest_handler_empty(router, mock_storage, allowed_chat_id): + handler = get_handler(router, "command_hottest_handler") + message = AsyncMock() + message.chat.id = int(allowed_chat_id) + message.answer = AsyncMock() + + mock_storage.get_top_ranked.return_value = [] + + await handler(message=message) + + message.answer.assert_called_once_with("No hot trends found yet.") + @pytest.mark.asyncio async def test_access_middleware_allowed(allowed_chat_id): middleware = AccessMiddleware(allowed_chat_id) diff --git a/tests/bot/test_hottest_command.py b/tests/bot/test_hottest_command.py new file mode 100644 index 0000000..ca8cbff --- /dev/null +++ b/tests/bot/test_hottest_command.py @@ -0,0 +1,102 @@ +import uuid +import pytest +from unittest.mock import AsyncMock, MagicMock +from aiogram.types import Message, InlineKeyboardMarkup +from datetime import datetime + +from src.bot.handlers import get_router +from src.processor.dto import EnrichedNewsItemDTO + +@pytest.fixture +def mock_storage(): + return AsyncMock() + +@pytest.fixture +def mock_processor(): + processor = MagicMock() + processor.get_info.return_value = {"model": "test-model"} + return processor + +@pytest.fixture +def allowed_chat_id(): + return "123456789" + +@pytest.fixture +def router(mock_storage, mock_processor, allowed_chat_id): + return get_router(mock_storage, mock_processor, allowed_chat_id) + +def get_handler(router, callback_name): + for handler in router.message.handlers: + if handler.callback.__name__ == callback_name: + return handler.callback + raise ValueError(f"Handler {callback_name} not found") + +@pytest.mark.asyncio +async def test_command_hottest_handler_success(router, mock_storage, allowed_chat_id): + """ + Test that /hottest command calls get_top_ranked and returns a list of items. + """ + # 1. Arrange + handler = get_handler(router, "command_hottest_handler") + message = AsyncMock() + message.chat = MagicMock() + message.chat.id = int(allowed_chat_id) + message.answer = AsyncMock() + + mock_items = [ + EnrichedNewsItemDTO( + title=f"Hot News {i}", + url=f"https://example.com/{i}", + content_text=f"Content {i}", + source="Source", + timestamp=datetime.now(), + relevance_score=10-i, + summary_ru=f"Сводка {i}", + anomalies_detected=[], + category="Tech" + ) for i in range(3) + ] + mock_storage.get_top_ranked.return_value = mock_items + + # 2. Act + await handler(message=message) + + # 3. Assert + mock_storage.get_top_ranked.assert_called_once_with(limit=10) + message.answer.assert_called_once() + + args, kwargs = message.answer.call_args + assert "Top 10 Hottest Trends:" in args[0] + assert "reply_markup" in kwargs + assert isinstance(kwargs["reply_markup"], InlineKeyboardMarkup) + + # Check if all 3 items are in the markup + markup = kwargs["reply_markup"] + assert len(markup.inline_keyboard) == 3 + + # Check if icons and scores are present + button_text = markup.inline_keyboard[0][0].text + assert "🔥" in button_text + assert "[10/10]" in button_text + assert "Hot News 0" in button_text + +@pytest.mark.asyncio +async def test_command_hottest_handler_empty(router, mock_storage, allowed_chat_id): + """ + Test that /hottest command handles empty results correctly. + """ + # 1. Arrange + handler = get_handler(router, "command_hottest_handler") + message = AsyncMock() + message.chat = MagicMock() + message.chat.id = int(allowed_chat_id) + message.answer = AsyncMock() + + mock_storage.get_top_ranked.return_value = [] + + # 2. Act + await handler(message=message) + + # 3. Assert + mock_storage.get_top_ranked.assert_called_once_with(limit=10) + message.answer.assert_called_once_with("No hot trends found yet.") diff --git a/tests/storage/test_chroma_store.py b/tests/storage/test_chroma_store.py index 47feef6..4305da7 100644 --- a/tests/storage/test_chroma_store.py +++ b/tests/storage/test_chroma_store.py @@ -2,6 +2,7 @@ import pytest import pytest_asyncio import uuid from datetime import datetime, timezone +from unittest.mock import MagicMock import chromadb from chromadb.config import Settings @@ -259,3 +260,43 @@ async def test_search_sorting(chroma_store: ChromaStore): # Should be sorted 5, 4, 3, 2, 1 scores = [r.relevance_score for r in results] assert scores == [5, 4, 3, 2, 1] + +@pytest.mark.asyncio +async def test_get_top_ranked_mock(chroma_store: ChromaStore): + # 1. Arrange + mock_collection = MagicMock() + chroma_store.collection = mock_collection + + # Mock data returned by collection.get + mock_collection.get.return_value = { + "metadatas": [ + {"title": "Low", "url": "url1", "relevance_score": 2, "timestamp": "2023-11-01T12:00:00"}, + {"title": "High", "url": "url2", "relevance_score": 10, "timestamp": "2023-11-01T12:00:00"}, + {"title": "Mid", "url": "url3", "relevance_score": 7, "timestamp": "2023-11-01T12:00:00"}, + ], + "documents": ["doc1", "doc2", "doc3"] + } + + # 2. Act + results = await chroma_store.get_top_ranked(limit=2) + + # 3. Assert + mock_collection.get.assert_called_once_with(include=["metadatas", "documents"]) + assert len(results) == 2 + assert results[0].title == "High" + assert results[0].relevance_score == 10 + assert results[1].title == "Mid" + assert results[1].relevance_score == 7 + +@pytest.mark.asyncio +async def test_get_top_ranked_empty(chroma_store: ChromaStore): + # 1. Arrange + mock_collection = MagicMock() + chroma_store.collection = mock_collection + mock_collection.get.return_value = {"metadatas": [], "documents": []} + + # 2. Act + results = await chroma_store.get_top_ranked(limit=10) + + # 3. Assert + assert len(results) == 0 diff --git a/tests/storage/test_top_ranked.py b/tests/storage/test_top_ranked.py new file mode 100644 index 0000000..db673ac --- /dev/null +++ b/tests/storage/test_top_ranked.py @@ -0,0 +1,95 @@ +import pytest +import pytest_asyncio +from datetime import datetime, timezone +import chromadb +from chromadb.config import Settings + +from src.processor.dto import EnrichedNewsItemDTO +from src.storage.chroma_store import ChromaStore + +@pytest_asyncio.fixture +async def chroma_store(): + # Use EphemeralClient for in-memory testing + client = chromadb.EphemeralClient(Settings(allow_reset=True)) + client.reset() + store = ChromaStore(client=client, collection_name="test_top_ranked_collection") + yield store + client.reset() + +@pytest.mark.asyncio +async def test_get_top_ranked_sorting(chroma_store: ChromaStore): + """ + Test that get_top_ranked returns items sorted by relevance_score in descending order. + """ + # 1. Arrange - create items with various relevance scores + items = [ + EnrichedNewsItemDTO( + title=f"News {score}", + url=f"https://example.com/{score}", + content_text=f"Content for news with score {score}", + source="Source", + timestamp=datetime.now(timezone.utc), + relevance_score=score, + summary_ru=f"Сводка {score}", + anomalies_detected=[], + category="Tech" + ) for score in [5, 10, 2, 8, 1] + ] + + for item in items: + await chroma_store.store(item) + + # 2. Act + results = await chroma_store.get_top_ranked(limit=10) + + # 3. Assert + assert len(results) == 5 + scores = [r.relevance_score for r in results] + # Should be [10, 8, 5, 2, 1] + assert scores == [10, 8, 5, 2, 1] + assert results[0].title == "News 10" + assert results[-1].title == "News 1" + +@pytest.mark.asyncio +async def test_get_top_ranked_limit(chroma_store: ChromaStore): + """ + Test that get_top_ranked respects the limit parameter. + """ + # 1. Arrange + items = [ + EnrichedNewsItemDTO( + title=f"News {i}", + url=f"https://example.com/{i}", + content_text=f"Content {i}", + source="Source", + timestamp=datetime.now(timezone.utc), + relevance_score=i, + summary_ru=f"Сводка {i}", + anomalies_detected=[], + category="Tech" + ) for i in range(1, 11) # 10 items + ] + + for item in items: + await chroma_store.store(item) + + # 2. Act + limit_5 = await chroma_store.get_top_ranked(limit=5) + limit_2 = await chroma_store.get_top_ranked(limit=2) + + # 3. Assert + assert len(limit_5) == 5 + assert len(limit_2) == 2 + assert limit_5[0].relevance_score == 10 + assert limit_5[4].relevance_score == 6 + +@pytest.mark.asyncio +async def test_get_top_ranked_empty_store(chroma_store: ChromaStore): + """ + Test that get_top_ranked returns an empty list if store is empty. + """ + # 1. Act + results = await chroma_store.get_top_ranked(limit=10) + + # 2. Assert + assert results == []