Implement 'Top Ranked' feature and expand Habr sources
This commit is contained in:
parent
019d9161de
commit
9fdb4b35cd
@ -51,6 +51,7 @@ def get_router(storage: IVectorStore, processor: ILLMProvider, allowed_chat_id:
|
||||
"/start - Start the bot\n"
|
||||
"/help - Show this help message\n"
|
||||
"/latest [category] - Show the latest enriched news trends\n"
|
||||
"/hottest - Show top 10 ranked hot trends\n"
|
||||
"/search query - Search for news\n"
|
||||
"/stats - Show database statistics\n"
|
||||
"/params - Show LLM processor parameters\n"
|
||||
@ -93,6 +94,27 @@ def get_router(storage: IVectorStore, processor: ILLMProvider, allowed_chat_id:
|
||||
|
||||
await message.answer("Latest news:", reply_markup=builder.as_markup())
|
||||
|
||||
@router.message(Command("hottest"))
|
||||
async def command_hottest_handler(message: Message) -> None:
|
||||
"""
|
||||
This handler receives messages with `/hottest` command
|
||||
"""
|
||||
items = await storage.get_top_ranked(limit=10)
|
||||
|
||||
if not items:
|
||||
await message.answer("No hot trends found yet.")
|
||||
return
|
||||
|
||||
builder = InlineKeyboardBuilder()
|
||||
for item in items:
|
||||
item_id = str(uuid.uuid5(uuid.NAMESPACE_URL, item.url))
|
||||
builder.row(InlineKeyboardButton(
|
||||
text=f"🔥 [{item.relevance_score}/10] {item.title}",
|
||||
callback_data=f"detail:{item_id}"
|
||||
))
|
||||
|
||||
await message.answer("Top 10 Hottest Trends:", reply_markup=builder.as_markup())
|
||||
|
||||
@router.message(Command("search"))
|
||||
async def command_search_handler(message: Message, command: CommandObject) -> None:
|
||||
"""
|
||||
|
||||
@ -8,9 +8,6 @@ crawlers:
|
||||
- type: rss
|
||||
url: "https://news.samsung.com/global/rss"
|
||||
source: "Samsung Newsroom"
|
||||
- type: rss
|
||||
url: "https://www.sony.com/en/SonyInfo/News/Service/rss.xml"
|
||||
source: "Sony Newsroom"
|
||||
- type: playwright
|
||||
url: "https://cvpr.thecvf.com/Conferences/2025"
|
||||
source: "CVPR 2025"
|
||||
@ -61,10 +58,10 @@ crawlers:
|
||||
url: "https://форумтехнопром.рф/"
|
||||
source: "Technoprom-2025"
|
||||
selector: ".news-item"
|
||||
- type: playwright
|
||||
url: "https://www.innoprom.com/en/media/news/"
|
||||
source: "INNOPROM-2025"
|
||||
selector: ".news-list__item"
|
||||
# - type: playwright
|
||||
# url: "https://www.innoprom.com/en/media/news/"
|
||||
# source: "INNOPROM-2025"
|
||||
# selector: ".news-list__item"
|
||||
- type: playwright
|
||||
url: "https://www.hannovermesse.de/en/news/news-articles/"
|
||||
source: "Hannover Messe"
|
||||
@ -91,3 +88,12 @@ crawlers:
|
||||
url: "https://t.me/s/addmeto"
|
||||
source: "Telegram: Addmeto"
|
||||
selector: ".tgme_widget_message_text"
|
||||
- type: rss
|
||||
url: "https://habr.com/ru/rss/hubs/hi/articles/?fl=ru"
|
||||
source: "Habr HighLoad"
|
||||
- type: rss
|
||||
url: "https://habr.com/ru/rss/hubs/complete_code/articles/?fl=ru"
|
||||
source: "Habr Code Quality"
|
||||
- type: rss
|
||||
url: "https://habr.com/ru/rss/articles/rated100/?fl=ru"
|
||||
source: "Habr High Ranked"
|
||||
@ -29,3 +29,8 @@ class IVectorStore(ABC):
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""Get storage statistics including total count and breakdown by category."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_top_ranked(self, limit: int = 10) -> List[EnrichedNewsItemDTO]:
|
||||
"""Retrieve top ranked items by relevance score."""
|
||||
pass
|
||||
|
||||
@ -113,3 +113,20 @@ class ChromaStore(IVectorStore):
|
||||
stats[key] = stats.get(key, 0) + 1
|
||||
|
||||
return stats
|
||||
|
||||
async def get_top_ranked(self, limit: int = 10) -> List[EnrichedNewsItemDTO]:
|
||||
"""Retrieve top ranked items by relevance score."""
|
||||
# Retrieve all metadatas and documents to sort by relevance score
|
||||
results = self.collection.get(include=["metadatas", "documents"])
|
||||
metadatas = results.get("metadatas") or []
|
||||
documents = results.get("documents") or []
|
||||
|
||||
items = []
|
||||
for meta, doc in zip(metadatas, documents):
|
||||
if meta:
|
||||
items.append(self._reconstruct_dto(meta, doc))
|
||||
|
||||
# Sort by relevance_score descending
|
||||
items.sort(key=lambda x: x.relevance_score, reverse=True)
|
||||
|
||||
return items[:limit]
|
||||
|
||||
@ -170,6 +170,37 @@ async def test_command_stats_handler(router, mock_storage, allowed_chat_id):
|
||||
args, kwargs = message.answer.call_args
|
||||
assert "Database Statistics" in args[0]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_command_hottest_handler(router, mock_storage, allowed_chat_id, mock_item):
|
||||
handler = get_handler(router, "command_hottest_handler")
|
||||
message = AsyncMock()
|
||||
message.chat.id = int(allowed_chat_id)
|
||||
message.answer = AsyncMock()
|
||||
|
||||
mock_storage.get_top_ranked.return_value = [mock_item]
|
||||
|
||||
await handler(message=message)
|
||||
|
||||
mock_storage.get_top_ranked.assert_called_once_with(limit=10)
|
||||
message.answer.assert_called_once()
|
||||
args, kwargs = message.answer.call_args
|
||||
assert "Top 10 Hottest Trends:" in args[0]
|
||||
assert "reply_markup" in kwargs
|
||||
assert "🔥" in str(kwargs["reply_markup"])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_command_hottest_handler_empty(router, mock_storage, allowed_chat_id):
|
||||
handler = get_handler(router, "command_hottest_handler")
|
||||
message = AsyncMock()
|
||||
message.chat.id = int(allowed_chat_id)
|
||||
message.answer = AsyncMock()
|
||||
|
||||
mock_storage.get_top_ranked.return_value = []
|
||||
|
||||
await handler(message=message)
|
||||
|
||||
message.answer.assert_called_once_with("No hot trends found yet.")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_access_middleware_allowed(allowed_chat_id):
|
||||
middleware = AccessMiddleware(allowed_chat_id)
|
||||
|
||||
102
tests/bot/test_hottest_command.py
Normal file
102
tests/bot/test_hottest_command.py
Normal file
@ -0,0 +1,102 @@
|
||||
import uuid
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
from aiogram.types import Message, InlineKeyboardMarkup
|
||||
from datetime import datetime
|
||||
|
||||
from src.bot.handlers import get_router
|
||||
from src.processor.dto import EnrichedNewsItemDTO
|
||||
|
||||
@pytest.fixture
|
||||
def mock_storage():
|
||||
return AsyncMock()
|
||||
|
||||
@pytest.fixture
|
||||
def mock_processor():
|
||||
processor = MagicMock()
|
||||
processor.get_info.return_value = {"model": "test-model"}
|
||||
return processor
|
||||
|
||||
@pytest.fixture
|
||||
def allowed_chat_id():
|
||||
return "123456789"
|
||||
|
||||
@pytest.fixture
|
||||
def router(mock_storage, mock_processor, allowed_chat_id):
|
||||
return get_router(mock_storage, mock_processor, allowed_chat_id)
|
||||
|
||||
def get_handler(router, callback_name):
|
||||
for handler in router.message.handlers:
|
||||
if handler.callback.__name__ == callback_name:
|
||||
return handler.callback
|
||||
raise ValueError(f"Handler {callback_name} not found")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_command_hottest_handler_success(router, mock_storage, allowed_chat_id):
|
||||
"""
|
||||
Test that /hottest command calls get_top_ranked and returns a list of items.
|
||||
"""
|
||||
# 1. Arrange
|
||||
handler = get_handler(router, "command_hottest_handler")
|
||||
message = AsyncMock()
|
||||
message.chat = MagicMock()
|
||||
message.chat.id = int(allowed_chat_id)
|
||||
message.answer = AsyncMock()
|
||||
|
||||
mock_items = [
|
||||
EnrichedNewsItemDTO(
|
||||
title=f"Hot News {i}",
|
||||
url=f"https://example.com/{i}",
|
||||
content_text=f"Content {i}",
|
||||
source="Source",
|
||||
timestamp=datetime.now(),
|
||||
relevance_score=10-i,
|
||||
summary_ru=f"Сводка {i}",
|
||||
anomalies_detected=[],
|
||||
category="Tech"
|
||||
) for i in range(3)
|
||||
]
|
||||
mock_storage.get_top_ranked.return_value = mock_items
|
||||
|
||||
# 2. Act
|
||||
await handler(message=message)
|
||||
|
||||
# 3. Assert
|
||||
mock_storage.get_top_ranked.assert_called_once_with(limit=10)
|
||||
message.answer.assert_called_once()
|
||||
|
||||
args, kwargs = message.answer.call_args
|
||||
assert "Top 10 Hottest Trends:" in args[0]
|
||||
assert "reply_markup" in kwargs
|
||||
assert isinstance(kwargs["reply_markup"], InlineKeyboardMarkup)
|
||||
|
||||
# Check if all 3 items are in the markup
|
||||
markup = kwargs["reply_markup"]
|
||||
assert len(markup.inline_keyboard) == 3
|
||||
|
||||
# Check if icons and scores are present
|
||||
button_text = markup.inline_keyboard[0][0].text
|
||||
assert "🔥" in button_text
|
||||
assert "[10/10]" in button_text
|
||||
assert "Hot News 0" in button_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_command_hottest_handler_empty(router, mock_storage, allowed_chat_id):
|
||||
"""
|
||||
Test that /hottest command handles empty results correctly.
|
||||
"""
|
||||
# 1. Arrange
|
||||
handler = get_handler(router, "command_hottest_handler")
|
||||
message = AsyncMock()
|
||||
message.chat = MagicMock()
|
||||
message.chat.id = int(allowed_chat_id)
|
||||
message.answer = AsyncMock()
|
||||
|
||||
mock_storage.get_top_ranked.return_value = []
|
||||
|
||||
# 2. Act
|
||||
await handler(message=message)
|
||||
|
||||
# 3. Assert
|
||||
mock_storage.get_top_ranked.assert_called_once_with(limit=10)
|
||||
message.answer.assert_called_once_with("No hot trends found yet.")
|
||||
@ -2,6 +2,7 @@ import pytest
|
||||
import pytest_asyncio
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import MagicMock
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
|
||||
@ -259,3 +260,43 @@ async def test_search_sorting(chroma_store: ChromaStore):
|
||||
# Should be sorted 5, 4, 3, 2, 1
|
||||
scores = [r.relevance_score for r in results]
|
||||
assert scores == [5, 4, 3, 2, 1]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_top_ranked_mock(chroma_store: ChromaStore):
|
||||
# 1. Arrange
|
||||
mock_collection = MagicMock()
|
||||
chroma_store.collection = mock_collection
|
||||
|
||||
# Mock data returned by collection.get
|
||||
mock_collection.get.return_value = {
|
||||
"metadatas": [
|
||||
{"title": "Low", "url": "url1", "relevance_score": 2, "timestamp": "2023-11-01T12:00:00"},
|
||||
{"title": "High", "url": "url2", "relevance_score": 10, "timestamp": "2023-11-01T12:00:00"},
|
||||
{"title": "Mid", "url": "url3", "relevance_score": 7, "timestamp": "2023-11-01T12:00:00"},
|
||||
],
|
||||
"documents": ["doc1", "doc2", "doc3"]
|
||||
}
|
||||
|
||||
# 2. Act
|
||||
results = await chroma_store.get_top_ranked(limit=2)
|
||||
|
||||
# 3. Assert
|
||||
mock_collection.get.assert_called_once_with(include=["metadatas", "documents"])
|
||||
assert len(results) == 2
|
||||
assert results[0].title == "High"
|
||||
assert results[0].relevance_score == 10
|
||||
assert results[1].title == "Mid"
|
||||
assert results[1].relevance_score == 7
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_top_ranked_empty(chroma_store: ChromaStore):
|
||||
# 1. Arrange
|
||||
mock_collection = MagicMock()
|
||||
chroma_store.collection = mock_collection
|
||||
mock_collection.get.return_value = {"metadatas": [], "documents": []}
|
||||
|
||||
# 2. Act
|
||||
results = await chroma_store.get_top_ranked(limit=10)
|
||||
|
||||
# 3. Assert
|
||||
assert len(results) == 0
|
||||
|
||||
95
tests/storage/test_top_ranked.py
Normal file
95
tests/storage/test_top_ranked.py
Normal file
@ -0,0 +1,95 @@
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from datetime import datetime, timezone
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
|
||||
from src.processor.dto import EnrichedNewsItemDTO
|
||||
from src.storage.chroma_store import ChromaStore
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def chroma_store():
|
||||
# Use EphemeralClient for in-memory testing
|
||||
client = chromadb.EphemeralClient(Settings(allow_reset=True))
|
||||
client.reset()
|
||||
store = ChromaStore(client=client, collection_name="test_top_ranked_collection")
|
||||
yield store
|
||||
client.reset()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_top_ranked_sorting(chroma_store: ChromaStore):
|
||||
"""
|
||||
Test that get_top_ranked returns items sorted by relevance_score in descending order.
|
||||
"""
|
||||
# 1. Arrange - create items with various relevance scores
|
||||
items = [
|
||||
EnrichedNewsItemDTO(
|
||||
title=f"News {score}",
|
||||
url=f"https://example.com/{score}",
|
||||
content_text=f"Content for news with score {score}",
|
||||
source="Source",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
relevance_score=score,
|
||||
summary_ru=f"Сводка {score}",
|
||||
anomalies_detected=[],
|
||||
category="Tech"
|
||||
) for score in [5, 10, 2, 8, 1]
|
||||
]
|
||||
|
||||
for item in items:
|
||||
await chroma_store.store(item)
|
||||
|
||||
# 2. Act
|
||||
results = await chroma_store.get_top_ranked(limit=10)
|
||||
|
||||
# 3. Assert
|
||||
assert len(results) == 5
|
||||
scores = [r.relevance_score for r in results]
|
||||
# Should be [10, 8, 5, 2, 1]
|
||||
assert scores == [10, 8, 5, 2, 1]
|
||||
assert results[0].title == "News 10"
|
||||
assert results[-1].title == "News 1"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_top_ranked_limit(chroma_store: ChromaStore):
|
||||
"""
|
||||
Test that get_top_ranked respects the limit parameter.
|
||||
"""
|
||||
# 1. Arrange
|
||||
items = [
|
||||
EnrichedNewsItemDTO(
|
||||
title=f"News {i}",
|
||||
url=f"https://example.com/{i}",
|
||||
content_text=f"Content {i}",
|
||||
source="Source",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
relevance_score=i,
|
||||
summary_ru=f"Сводка {i}",
|
||||
anomalies_detected=[],
|
||||
category="Tech"
|
||||
) for i in range(1, 11) # 10 items
|
||||
]
|
||||
|
||||
for item in items:
|
||||
await chroma_store.store(item)
|
||||
|
||||
# 2. Act
|
||||
limit_5 = await chroma_store.get_top_ranked(limit=5)
|
||||
limit_2 = await chroma_store.get_top_ranked(limit=2)
|
||||
|
||||
# 3. Assert
|
||||
assert len(limit_5) == 5
|
||||
assert len(limit_2) == 2
|
||||
assert limit_5[0].relevance_score == 10
|
||||
assert limit_5[4].relevance_score == 6
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_top_ranked_empty_store(chroma_store: ChromaStore):
|
||||
"""
|
||||
Test that get_top_ranked returns an empty list if store is empty.
|
||||
"""
|
||||
# 1. Act
|
||||
results = await chroma_store.get_top_ranked(limit=10)
|
||||
|
||||
# 2. Assert
|
||||
assert results == []
|
||||
Loading…
x
Reference in New Issue
Block a user