Implement 'Top Ranked' feature and expand Habr sources
This commit is contained in:
parent
019d9161de
commit
9fdb4b35cd
@ -51,6 +51,7 @@ def get_router(storage: IVectorStore, processor: ILLMProvider, allowed_chat_id:
|
|||||||
"/start - Start the bot\n"
|
"/start - Start the bot\n"
|
||||||
"/help - Show this help message\n"
|
"/help - Show this help message\n"
|
||||||
"/latest [category] - Show the latest enriched news trends\n"
|
"/latest [category] - Show the latest enriched news trends\n"
|
||||||
|
"/hottest - Show top 10 ranked hot trends\n"
|
||||||
"/search query - Search for news\n"
|
"/search query - Search for news\n"
|
||||||
"/stats - Show database statistics\n"
|
"/stats - Show database statistics\n"
|
||||||
"/params - Show LLM processor parameters\n"
|
"/params - Show LLM processor parameters\n"
|
||||||
@ -93,6 +94,27 @@ def get_router(storage: IVectorStore, processor: ILLMProvider, allowed_chat_id:
|
|||||||
|
|
||||||
await message.answer("Latest news:", reply_markup=builder.as_markup())
|
await message.answer("Latest news:", reply_markup=builder.as_markup())
|
||||||
|
|
||||||
|
@router.message(Command("hottest"))
|
||||||
|
async def command_hottest_handler(message: Message) -> None:
|
||||||
|
"""
|
||||||
|
This handler receives messages with `/hottest` command
|
||||||
|
"""
|
||||||
|
items = await storage.get_top_ranked(limit=10)
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
await message.answer("No hot trends found yet.")
|
||||||
|
return
|
||||||
|
|
||||||
|
builder = InlineKeyboardBuilder()
|
||||||
|
for item in items:
|
||||||
|
item_id = str(uuid.uuid5(uuid.NAMESPACE_URL, item.url))
|
||||||
|
builder.row(InlineKeyboardButton(
|
||||||
|
text=f"🔥 [{item.relevance_score}/10] {item.title}",
|
||||||
|
callback_data=f"detail:{item_id}"
|
||||||
|
))
|
||||||
|
|
||||||
|
await message.answer("Top 10 Hottest Trends:", reply_markup=builder.as_markup())
|
||||||
|
|
||||||
@router.message(Command("search"))
|
@router.message(Command("search"))
|
||||||
async def command_search_handler(message: Message, command: CommandObject) -> None:
|
async def command_search_handler(message: Message, command: CommandObject) -> None:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -8,9 +8,6 @@ crawlers:
|
|||||||
- type: rss
|
- type: rss
|
||||||
url: "https://news.samsung.com/global/rss"
|
url: "https://news.samsung.com/global/rss"
|
||||||
source: "Samsung Newsroom"
|
source: "Samsung Newsroom"
|
||||||
- type: rss
|
|
||||||
url: "https://www.sony.com/en/SonyInfo/News/Service/rss.xml"
|
|
||||||
source: "Sony Newsroom"
|
|
||||||
- type: playwright
|
- type: playwright
|
||||||
url: "https://cvpr.thecvf.com/Conferences/2025"
|
url: "https://cvpr.thecvf.com/Conferences/2025"
|
||||||
source: "CVPR 2025"
|
source: "CVPR 2025"
|
||||||
@ -61,10 +58,10 @@ crawlers:
|
|||||||
url: "https://форумтехнопром.рф/"
|
url: "https://форумтехнопром.рф/"
|
||||||
source: "Technoprom-2025"
|
source: "Technoprom-2025"
|
||||||
selector: ".news-item"
|
selector: ".news-item"
|
||||||
- type: playwright
|
# - type: playwright
|
||||||
url: "https://www.innoprom.com/en/media/news/"
|
# url: "https://www.innoprom.com/en/media/news/"
|
||||||
source: "INNOPROM-2025"
|
# source: "INNOPROM-2025"
|
||||||
selector: ".news-list__item"
|
# selector: ".news-list__item"
|
||||||
- type: playwright
|
- type: playwright
|
||||||
url: "https://www.hannovermesse.de/en/news/news-articles/"
|
url: "https://www.hannovermesse.de/en/news/news-articles/"
|
||||||
source: "Hannover Messe"
|
source: "Hannover Messe"
|
||||||
@ -91,3 +88,12 @@ crawlers:
|
|||||||
url: "https://t.me/s/addmeto"
|
url: "https://t.me/s/addmeto"
|
||||||
source: "Telegram: Addmeto"
|
source: "Telegram: Addmeto"
|
||||||
selector: ".tgme_widget_message_text"
|
selector: ".tgme_widget_message_text"
|
||||||
|
- type: rss
|
||||||
|
url: "https://habr.com/ru/rss/hubs/hi/articles/?fl=ru"
|
||||||
|
source: "Habr HighLoad"
|
||||||
|
- type: rss
|
||||||
|
url: "https://habr.com/ru/rss/hubs/complete_code/articles/?fl=ru"
|
||||||
|
source: "Habr Code Quality"
|
||||||
|
- type: rss
|
||||||
|
url: "https://habr.com/ru/rss/articles/rated100/?fl=ru"
|
||||||
|
source: "Habr High Ranked"
|
||||||
@ -29,3 +29,8 @@ class IVectorStore(ABC):
|
|||||||
async def get_stats(self) -> dict[str, int]:
|
async def get_stats(self) -> dict[str, int]:
|
||||||
"""Get storage statistics including total count and breakdown by category."""
|
"""Get storage statistics including total count and breakdown by category."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def get_top_ranked(self, limit: int = 10) -> List[EnrichedNewsItemDTO]:
|
||||||
|
"""Retrieve top ranked items by relevance score."""
|
||||||
|
pass
|
||||||
|
|||||||
@ -113,3 +113,20 @@ class ChromaStore(IVectorStore):
|
|||||||
stats[key] = stats.get(key, 0) + 1
|
stats[key] = stats.get(key, 0) + 1
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
async def get_top_ranked(self, limit: int = 10) -> List[EnrichedNewsItemDTO]:
|
||||||
|
"""Retrieve top ranked items by relevance score."""
|
||||||
|
# Retrieve all metadatas and documents to sort by relevance score
|
||||||
|
results = self.collection.get(include=["metadatas", "documents"])
|
||||||
|
metadatas = results.get("metadatas") or []
|
||||||
|
documents = results.get("documents") or []
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for meta, doc in zip(metadatas, documents):
|
||||||
|
if meta:
|
||||||
|
items.append(self._reconstruct_dto(meta, doc))
|
||||||
|
|
||||||
|
# Sort by relevance_score descending
|
||||||
|
items.sort(key=lambda x: x.relevance_score, reverse=True)
|
||||||
|
|
||||||
|
return items[:limit]
|
||||||
|
|||||||
@ -170,6 +170,37 @@ async def test_command_stats_handler(router, mock_storage, allowed_chat_id):
|
|||||||
args, kwargs = message.answer.call_args
|
args, kwargs = message.answer.call_args
|
||||||
assert "Database Statistics" in args[0]
|
assert "Database Statistics" in args[0]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_command_hottest_handler(router, mock_storage, allowed_chat_id, mock_item):
|
||||||
|
handler = get_handler(router, "command_hottest_handler")
|
||||||
|
message = AsyncMock()
|
||||||
|
message.chat.id = int(allowed_chat_id)
|
||||||
|
message.answer = AsyncMock()
|
||||||
|
|
||||||
|
mock_storage.get_top_ranked.return_value = [mock_item]
|
||||||
|
|
||||||
|
await handler(message=message)
|
||||||
|
|
||||||
|
mock_storage.get_top_ranked.assert_called_once_with(limit=10)
|
||||||
|
message.answer.assert_called_once()
|
||||||
|
args, kwargs = message.answer.call_args
|
||||||
|
assert "Top 10 Hottest Trends:" in args[0]
|
||||||
|
assert "reply_markup" in kwargs
|
||||||
|
assert "🔥" in str(kwargs["reply_markup"])
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_command_hottest_handler_empty(router, mock_storage, allowed_chat_id):
|
||||||
|
handler = get_handler(router, "command_hottest_handler")
|
||||||
|
message = AsyncMock()
|
||||||
|
message.chat.id = int(allowed_chat_id)
|
||||||
|
message.answer = AsyncMock()
|
||||||
|
|
||||||
|
mock_storage.get_top_ranked.return_value = []
|
||||||
|
|
||||||
|
await handler(message=message)
|
||||||
|
|
||||||
|
message.answer.assert_called_once_with("No hot trends found yet.")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_access_middleware_allowed(allowed_chat_id):
|
async def test_access_middleware_allowed(allowed_chat_id):
|
||||||
middleware = AccessMiddleware(allowed_chat_id)
|
middleware = AccessMiddleware(allowed_chat_id)
|
||||||
|
|||||||
102
tests/bot/test_hottest_command.py
Normal file
102
tests/bot/test_hottest_command.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
import uuid
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
from aiogram.types import Message, InlineKeyboardMarkup
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from src.bot.handlers import get_router
|
||||||
|
from src.processor.dto import EnrichedNewsItemDTO
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_storage():
|
||||||
|
return AsyncMock()
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_processor():
|
||||||
|
processor = MagicMock()
|
||||||
|
processor.get_info.return_value = {"model": "test-model"}
|
||||||
|
return processor
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def allowed_chat_id():
|
||||||
|
return "123456789"
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def router(mock_storage, mock_processor, allowed_chat_id):
|
||||||
|
return get_router(mock_storage, mock_processor, allowed_chat_id)
|
||||||
|
|
||||||
|
def get_handler(router, callback_name):
|
||||||
|
for handler in router.message.handlers:
|
||||||
|
if handler.callback.__name__ == callback_name:
|
||||||
|
return handler.callback
|
||||||
|
raise ValueError(f"Handler {callback_name} not found")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_command_hottest_handler_success(router, mock_storage, allowed_chat_id):
|
||||||
|
"""
|
||||||
|
Test that /hottest command calls get_top_ranked and returns a list of items.
|
||||||
|
"""
|
||||||
|
# 1. Arrange
|
||||||
|
handler = get_handler(router, "command_hottest_handler")
|
||||||
|
message = AsyncMock()
|
||||||
|
message.chat = MagicMock()
|
||||||
|
message.chat.id = int(allowed_chat_id)
|
||||||
|
message.answer = AsyncMock()
|
||||||
|
|
||||||
|
mock_items = [
|
||||||
|
EnrichedNewsItemDTO(
|
||||||
|
title=f"Hot News {i}",
|
||||||
|
url=f"https://example.com/{i}",
|
||||||
|
content_text=f"Content {i}",
|
||||||
|
source="Source",
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
relevance_score=10-i,
|
||||||
|
summary_ru=f"Сводка {i}",
|
||||||
|
anomalies_detected=[],
|
||||||
|
category="Tech"
|
||||||
|
) for i in range(3)
|
||||||
|
]
|
||||||
|
mock_storage.get_top_ranked.return_value = mock_items
|
||||||
|
|
||||||
|
# 2. Act
|
||||||
|
await handler(message=message)
|
||||||
|
|
||||||
|
# 3. Assert
|
||||||
|
mock_storage.get_top_ranked.assert_called_once_with(limit=10)
|
||||||
|
message.answer.assert_called_once()
|
||||||
|
|
||||||
|
args, kwargs = message.answer.call_args
|
||||||
|
assert "Top 10 Hottest Trends:" in args[0]
|
||||||
|
assert "reply_markup" in kwargs
|
||||||
|
assert isinstance(kwargs["reply_markup"], InlineKeyboardMarkup)
|
||||||
|
|
||||||
|
# Check if all 3 items are in the markup
|
||||||
|
markup = kwargs["reply_markup"]
|
||||||
|
assert len(markup.inline_keyboard) == 3
|
||||||
|
|
||||||
|
# Check if icons and scores are present
|
||||||
|
button_text = markup.inline_keyboard[0][0].text
|
||||||
|
assert "🔥" in button_text
|
||||||
|
assert "[10/10]" in button_text
|
||||||
|
assert "Hot News 0" in button_text
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_command_hottest_handler_empty(router, mock_storage, allowed_chat_id):
|
||||||
|
"""
|
||||||
|
Test that /hottest command handles empty results correctly.
|
||||||
|
"""
|
||||||
|
# 1. Arrange
|
||||||
|
handler = get_handler(router, "command_hottest_handler")
|
||||||
|
message = AsyncMock()
|
||||||
|
message.chat = MagicMock()
|
||||||
|
message.chat.id = int(allowed_chat_id)
|
||||||
|
message.answer = AsyncMock()
|
||||||
|
|
||||||
|
mock_storage.get_top_ranked.return_value = []
|
||||||
|
|
||||||
|
# 2. Act
|
||||||
|
await handler(message=message)
|
||||||
|
|
||||||
|
# 3. Assert
|
||||||
|
mock_storage.get_top_ranked.assert_called_once_with(limit=10)
|
||||||
|
message.answer.assert_called_once_with("No hot trends found yet.")
|
||||||
@ -2,6 +2,7 @@ import pytest
|
|||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from unittest.mock import MagicMock
|
||||||
import chromadb
|
import chromadb
|
||||||
from chromadb.config import Settings
|
from chromadb.config import Settings
|
||||||
|
|
||||||
@ -259,3 +260,43 @@ async def test_search_sorting(chroma_store: ChromaStore):
|
|||||||
# Should be sorted 5, 4, 3, 2, 1
|
# Should be sorted 5, 4, 3, 2, 1
|
||||||
scores = [r.relevance_score for r in results]
|
scores = [r.relevance_score for r in results]
|
||||||
assert scores == [5, 4, 3, 2, 1]
|
assert scores == [5, 4, 3, 2, 1]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_top_ranked_mock(chroma_store: ChromaStore):
|
||||||
|
# 1. Arrange
|
||||||
|
mock_collection = MagicMock()
|
||||||
|
chroma_store.collection = mock_collection
|
||||||
|
|
||||||
|
# Mock data returned by collection.get
|
||||||
|
mock_collection.get.return_value = {
|
||||||
|
"metadatas": [
|
||||||
|
{"title": "Low", "url": "url1", "relevance_score": 2, "timestamp": "2023-11-01T12:00:00"},
|
||||||
|
{"title": "High", "url": "url2", "relevance_score": 10, "timestamp": "2023-11-01T12:00:00"},
|
||||||
|
{"title": "Mid", "url": "url3", "relevance_score": 7, "timestamp": "2023-11-01T12:00:00"},
|
||||||
|
],
|
||||||
|
"documents": ["doc1", "doc2", "doc3"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. Act
|
||||||
|
results = await chroma_store.get_top_ranked(limit=2)
|
||||||
|
|
||||||
|
# 3. Assert
|
||||||
|
mock_collection.get.assert_called_once_with(include=["metadatas", "documents"])
|
||||||
|
assert len(results) == 2
|
||||||
|
assert results[0].title == "High"
|
||||||
|
assert results[0].relevance_score == 10
|
||||||
|
assert results[1].title == "Mid"
|
||||||
|
assert results[1].relevance_score == 7
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_top_ranked_empty(chroma_store: ChromaStore):
|
||||||
|
# 1. Arrange
|
||||||
|
mock_collection = MagicMock()
|
||||||
|
chroma_store.collection = mock_collection
|
||||||
|
mock_collection.get.return_value = {"metadatas": [], "documents": []}
|
||||||
|
|
||||||
|
# 2. Act
|
||||||
|
results = await chroma_store.get_top_ranked(limit=10)
|
||||||
|
|
||||||
|
# 3. Assert
|
||||||
|
assert len(results) == 0
|
||||||
|
|||||||
95
tests/storage/test_top_ranked.py
Normal file
95
tests/storage/test_top_ranked.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import chromadb
|
||||||
|
from chromadb.config import Settings
|
||||||
|
|
||||||
|
from src.processor.dto import EnrichedNewsItemDTO
|
||||||
|
from src.storage.chroma_store import ChromaStore
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def chroma_store():
|
||||||
|
# Use EphemeralClient for in-memory testing
|
||||||
|
client = chromadb.EphemeralClient(Settings(allow_reset=True))
|
||||||
|
client.reset()
|
||||||
|
store = ChromaStore(client=client, collection_name="test_top_ranked_collection")
|
||||||
|
yield store
|
||||||
|
client.reset()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_top_ranked_sorting(chroma_store: ChromaStore):
|
||||||
|
"""
|
||||||
|
Test that get_top_ranked returns items sorted by relevance_score in descending order.
|
||||||
|
"""
|
||||||
|
# 1. Arrange - create items with various relevance scores
|
||||||
|
items = [
|
||||||
|
EnrichedNewsItemDTO(
|
||||||
|
title=f"News {score}",
|
||||||
|
url=f"https://example.com/{score}",
|
||||||
|
content_text=f"Content for news with score {score}",
|
||||||
|
source="Source",
|
||||||
|
timestamp=datetime.now(timezone.utc),
|
||||||
|
relevance_score=score,
|
||||||
|
summary_ru=f"Сводка {score}",
|
||||||
|
anomalies_detected=[],
|
||||||
|
category="Tech"
|
||||||
|
) for score in [5, 10, 2, 8, 1]
|
||||||
|
]
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
await chroma_store.store(item)
|
||||||
|
|
||||||
|
# 2. Act
|
||||||
|
results = await chroma_store.get_top_ranked(limit=10)
|
||||||
|
|
||||||
|
# 3. Assert
|
||||||
|
assert len(results) == 5
|
||||||
|
scores = [r.relevance_score for r in results]
|
||||||
|
# Should be [10, 8, 5, 2, 1]
|
||||||
|
assert scores == [10, 8, 5, 2, 1]
|
||||||
|
assert results[0].title == "News 10"
|
||||||
|
assert results[-1].title == "News 1"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_top_ranked_limit(chroma_store: ChromaStore):
|
||||||
|
"""
|
||||||
|
Test that get_top_ranked respects the limit parameter.
|
||||||
|
"""
|
||||||
|
# 1. Arrange
|
||||||
|
items = [
|
||||||
|
EnrichedNewsItemDTO(
|
||||||
|
title=f"News {i}",
|
||||||
|
url=f"https://example.com/{i}",
|
||||||
|
content_text=f"Content {i}",
|
||||||
|
source="Source",
|
||||||
|
timestamp=datetime.now(timezone.utc),
|
||||||
|
relevance_score=i,
|
||||||
|
summary_ru=f"Сводка {i}",
|
||||||
|
anomalies_detected=[],
|
||||||
|
category="Tech"
|
||||||
|
) for i in range(1, 11) # 10 items
|
||||||
|
]
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
await chroma_store.store(item)
|
||||||
|
|
||||||
|
# 2. Act
|
||||||
|
limit_5 = await chroma_store.get_top_ranked(limit=5)
|
||||||
|
limit_2 = await chroma_store.get_top_ranked(limit=2)
|
||||||
|
|
||||||
|
# 3. Assert
|
||||||
|
assert len(limit_5) == 5
|
||||||
|
assert len(limit_2) == 2
|
||||||
|
assert limit_5[0].relevance_score == 10
|
||||||
|
assert limit_5[4].relevance_score == 6
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_top_ranked_empty_store(chroma_store: ChromaStore):
|
||||||
|
"""
|
||||||
|
Test that get_top_ranked returns an empty list if store is empty.
|
||||||
|
"""
|
||||||
|
# 1. Act
|
||||||
|
results = await chroma_store.get_top_ranked(limit=10)
|
||||||
|
|
||||||
|
# 2. Assert
|
||||||
|
assert results == []
|
||||||
Loading…
x
Reference in New Issue
Block a user