[perf] stabilization of previous release
This commit is contained in:
parent
9c8e4c7345
commit
4bf7cb4331
@ -2,12 +2,13 @@ from aiogram import Bot, Dispatcher
|
|||||||
from aiogram.client.default import DefaultBotProperties
|
from aiogram.client.default import DefaultBotProperties
|
||||||
from src.bot.handlers import get_router
|
from src.bot.handlers import get_router
|
||||||
from src.storage.base import IVectorStore
|
from src.storage.base import IVectorStore
|
||||||
|
from src.processor.base import ILLMProvider
|
||||||
|
|
||||||
def setup_bot(token: str, storage: IVectorStore, allowed_chat_id: str) -> tuple[Bot, Dispatcher]:
|
def setup_bot(token: str, storage: IVectorStore, processor: ILLMProvider, allowed_chat_id: str) -> tuple[Bot, Dispatcher]:
|
||||||
"""
|
"""
|
||||||
Setup the aiogram Bot and Dispatcher with handlers.
|
Setup the aiogram Bot and Dispatcher with handlers.
|
||||||
"""
|
"""
|
||||||
bot = Bot(token=token, default=DefaultBotProperties(parse_mode="HTML"))
|
bot = Bot(token=token, default=DefaultBotProperties(parse_mode="HTML"))
|
||||||
dp = Dispatcher()
|
dp = Dispatcher()
|
||||||
dp.include_router(get_router(storage, allowed_chat_id))
|
dp.include_router(get_router(storage, processor, allowed_chat_id))
|
||||||
return bot, dp
|
return bot, dp
|
||||||
|
|||||||
@ -10,6 +10,7 @@ from aiogram.utils.keyboard import InlineKeyboardBuilder
|
|||||||
from aiogram.utils.formatting import as_list, as_marked_section, Bold, TextLink
|
from aiogram.utils.formatting import as_list, as_marked_section, Bold, TextLink
|
||||||
|
|
||||||
from src.processor.dto import EnrichedNewsItemDTO
|
from src.processor.dto import EnrichedNewsItemDTO
|
||||||
|
from src.processor.base import ILLMProvider
|
||||||
from src.storage.base import IVectorStore
|
from src.storage.base import IVectorStore
|
||||||
|
|
||||||
class AccessMiddleware(BaseMiddleware):
|
class AccessMiddleware(BaseMiddleware):
|
||||||
@ -28,7 +29,7 @@ class AccessMiddleware(BaseMiddleware):
|
|||||||
return
|
return
|
||||||
return await handler(event, data)
|
return await handler(event, data)
|
||||||
|
|
||||||
def get_router(storage: IVectorStore, allowed_chat_id: str) -> Router:
|
def get_router(storage: IVectorStore, processor: ILLMProvider, allowed_chat_id: str) -> Router:
|
||||||
router = Router(name="main_router")
|
router = Router(name="main_router")
|
||||||
router.message.middleware(AccessMiddleware(allowed_chat_id))
|
router.message.middleware(AccessMiddleware(allowed_chat_id))
|
||||||
|
|
||||||
@ -52,9 +53,24 @@ def get_router(storage: IVectorStore, allowed_chat_id: str) -> Router:
|
|||||||
"/latest [category] - Show the latest enriched news trends\n"
|
"/latest [category] - Show the latest enriched news trends\n"
|
||||||
"/search query - Search for news\n"
|
"/search query - Search for news\n"
|
||||||
"/stats - Show database statistics\n"
|
"/stats - Show database statistics\n"
|
||||||
|
"/params - Show LLM processor parameters\n"
|
||||||
)
|
)
|
||||||
await message.answer(help_text)
|
await message.answer(help_text)
|
||||||
|
|
||||||
|
@router.message(Command("params"))
|
||||||
|
async def command_params_handler(message: Message) -> None:
|
||||||
|
"""
|
||||||
|
This handler receives messages with `/params` command
|
||||||
|
"""
|
||||||
|
info = processor.get_info()
|
||||||
|
|
||||||
|
response = "🤖 <b>LLM Processor Parameters</b>\n\n"
|
||||||
|
response += f"<b>Model:</b> {html.escape(info.get('model', 'Unknown'))}\n"
|
||||||
|
response += f"<b>Base URL:</b> {html.escape(info.get('base_url', 'Unknown'))}\n"
|
||||||
|
response += f"<b>Prompt Summary:</b> {html.escape(info.get('prompt_summary', 'Unknown'))}"
|
||||||
|
|
||||||
|
await message.answer(response, parse_mode="HTML")
|
||||||
|
|
||||||
@router.message(Command("latest"))
|
@router.message(Command("latest"))
|
||||||
async def command_latest_handler(message: Message, command: CommandObject) -> None:
|
async def command_latest_handler(message: Message, command: CommandObject) -> None:
|
||||||
"""
|
"""
|
||||||
@ -146,12 +162,13 @@ def get_router(storage: IVectorStore, allowed_chat_id: str) -> Router:
|
|||||||
This handler receives messages with `/stats` command
|
This handler receives messages with `/stats` command
|
||||||
"""
|
"""
|
||||||
stats = await storage.get_stats()
|
stats = await storage.get_stats()
|
||||||
total = stats.get("total", 0)
|
total = stats.get("total_count", 0)
|
||||||
|
|
||||||
breakdown = []
|
breakdown = []
|
||||||
for cat, count in stats.items():
|
for key, count in stats.items():
|
||||||
if cat != "total":
|
if key.startswith("category_"):
|
||||||
breakdown.append(f"- {cat}: {count}")
|
cat_name = key.replace("category_", "")
|
||||||
|
breakdown.append(f"- {cat_name}: {count}")
|
||||||
|
|
||||||
response = f"📊 <b>Database Statistics</b>\n\nTotal items: {total}\n"
|
response = f"📊 <b>Database Statistics</b>\n\nTotal items: {total}\n"
|
||||||
if breakdown:
|
if breakdown:
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from email.utils import parsedate_to_datetime
|
from email.utils import parsedate_to_datetime
|
||||||
from typing import List
|
from typing import List
|
||||||
@ -7,17 +8,26 @@ from typing import List
|
|||||||
from src.crawlers.base import ICrawler
|
from src.crawlers.base import ICrawler
|
||||||
from src.crawlers.dto import NewsItemDTO
|
from src.crawlers.dto import NewsItemDTO
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class RSSCrawler(ICrawler):
|
class RSSCrawler(ICrawler):
|
||||||
|
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
|
||||||
def __init__(self, url: str, source: str):
|
def __init__(self, url: str, source: str):
|
||||||
self.url = url
|
self.url = url
|
||||||
self.source = source
|
self.source = source
|
||||||
|
|
||||||
async def fetch_latest(self) -> List[NewsItemDTO]:
|
async def fetch_latest(self) -> List[NewsItemDTO]:
|
||||||
async with aiohttp.ClientSession() as session:
|
headers = {"User-Agent": self.USER_AGENT}
|
||||||
async with session.get(self.url) as response:
|
try:
|
||||||
response.raise_for_status()
|
async with aiohttp.ClientSession() as session:
|
||||||
xml_data = await response.text()
|
async with session.get(self.url, headers=headers) as response:
|
||||||
return self._parse_xml(xml_data)
|
response.raise_for_status()
|
||||||
|
xml_data = await response.text()
|
||||||
|
return self._parse_xml(xml_data)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching RSS from {self.url}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
def _parse_xml(self, xml_data: str) -> List[NewsItemDTO]:
|
def _parse_xml(self, xml_data: str) -> List[NewsItemDTO]:
|
||||||
root = ET.fromstring(xml_data)
|
root = ET.fromstring(xml_data)
|
||||||
|
|||||||
18
src/main.py
18
src/main.py
@ -9,6 +9,7 @@ from aiogram import Bot, Dispatcher
|
|||||||
|
|
||||||
from src.crawlers.base import ICrawler
|
from src.crawlers.base import ICrawler
|
||||||
from src.crawlers.rss_crawler import RSSCrawler
|
from src.crawlers.rss_crawler import RSSCrawler
|
||||||
|
from src.crawlers.playwright_crawler import PlaywrightCrawler
|
||||||
from src.processor.ollama_provider import OllamaProvider
|
from src.processor.ollama_provider import OllamaProvider
|
||||||
from src.storage.chroma_store import ChromaStore
|
from src.storage.chroma_store import ChromaStore
|
||||||
from src.notifications.telegram import TelegramNotifier
|
from src.notifications.telegram import TelegramNotifier
|
||||||
@ -49,7 +50,20 @@ async def main():
|
|||||||
|
|
||||||
# 1. Initialize Components that do not depend on Bot
|
# 1. Initialize Components that do not depend on Bot
|
||||||
crawlers: List[ICrawler] = [
|
crawlers: List[ICrawler] = [
|
||||||
RSSCrawler("https://habr.com/ru/rss/hubs/artificial_intelligence/articles/?fl=ru", source="Habr AI")
|
RSSCrawler("https://habr.com/ru/rss/hubs/artificial_intelligence/articles/?fl=ru", source="Habr AI"),
|
||||||
|
RSSCrawler("https://www.nature.com/nature.rss", source="Nature"),
|
||||||
|
RSSCrawler("https://news.google.com/rss/search?q=WebOS+Chromium+Edge+AI+LGE+SmartTV&hl=en-US&gl=US&ceid=US:en", source="Google News R&D"),
|
||||||
|
RSSCrawler("https://news.samsung.com/global/rss", source="Samsung Newsroom"),
|
||||||
|
RSSCrawler("https://www.sony.com/en/SonyInfo/News/Service/rss.xml", source="Sony Newsroom"),
|
||||||
|
PlaywrightCrawler("https://cvpr.thecvf.com/Conferences/2025", source="CVPR 2025", selector=".conference-news-item"),
|
||||||
|
PlaywrightCrawler("https://www.ces.tech/news/press-releases.aspx", source="CES 2025", selector=".press-release-item"),
|
||||||
|
RSSCrawler("https://vc.ru/rss/tech", source="VC.ru Tech"),
|
||||||
|
RSSCrawler("https://rb.ru/rss/", source="RB.ru"),
|
||||||
|
RSSCrawler("https://www.science.org/rss/news_current.xml", source="Science News"),
|
||||||
|
RSSCrawler("https://ufn.ru/en/rss/", source="УФН"),
|
||||||
|
RSSCrawler("https://www.tadviser.ru/xml/tadviser.xml", source="TAdviser"),
|
||||||
|
RSSCrawler("https://habr.com/ru/rss/company/yandex/blog/", source="Yandex Tech"),
|
||||||
|
RSSCrawler("https://blog.google/technology/ai/rss/", source="Google AI Blog"),
|
||||||
]
|
]
|
||||||
|
|
||||||
processor = OllamaProvider()
|
processor = OllamaProvider()
|
||||||
@ -62,7 +76,7 @@ async def main():
|
|||||||
storage = ChromaStore(client=chroma_client)
|
storage = ChromaStore(client=chroma_client)
|
||||||
|
|
||||||
# 2. Initialize Bot & Dispatcher
|
# 2. Initialize Bot & Dispatcher
|
||||||
bot, dp = setup_bot(bot_token, storage, chat_id)
|
bot, dp = setup_bot(bot_token, storage, processor, chat_id)
|
||||||
|
|
||||||
# 3. Initialize Notifier and Orchestrator
|
# 3. Initialize Notifier and Orchestrator
|
||||||
notifier = TelegramNotifier(bot, chat_id)
|
notifier = TelegramNotifier(bot, chat_id)
|
||||||
|
|||||||
@ -1,9 +1,12 @@
|
|||||||
|
import logging
|
||||||
from typing import List
|
from typing import List
|
||||||
from src.crawlers.base import ICrawler
|
from src.crawlers.base import ICrawler
|
||||||
from src.processor.base import ILLMProvider
|
from src.processor.base import ILLMProvider
|
||||||
from src.storage.base import IVectorStore
|
from src.storage.base import IVectorStore
|
||||||
from src.notifications.base import INotificationService
|
from src.notifications.base import INotificationService
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class TrendScoutService:
|
class TrendScoutService:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -19,18 +22,24 @@ class TrendScoutService:
|
|||||||
|
|
||||||
async def run_iteration(self) -> None:
|
async def run_iteration(self) -> None:
|
||||||
for crawler in self.crawlers:
|
for crawler in self.crawlers:
|
||||||
items = await crawler.fetch_latest()
|
try:
|
||||||
for item in items:
|
items = await crawler.fetch_latest()
|
||||||
if await self.storage.exists(item.url):
|
for item in items:
|
||||||
continue
|
try:
|
||||||
|
if await self.storage.exists(item.url):
|
||||||
|
continue
|
||||||
|
|
||||||
enriched_item = await self.processor.analyze(item)
|
enriched_item = await self.processor.analyze(item)
|
||||||
|
|
||||||
if enriched_item.relevance_score < 5:
|
if enriched_item.relevance_score < 5:
|
||||||
enriched_item.content_text = ""
|
enriched_item.content_text = ""
|
||||||
|
|
||||||
await self.storage.store(enriched_item)
|
await self.storage.store(enriched_item)
|
||||||
|
|
||||||
# Proactive alerts are currently disabled per user request
|
# Proactive alerts are currently disabled per user request
|
||||||
# if enriched_item.relevance_score >= 8 or bool(enriched_item.anomalies_detected):
|
# if enriched_item.relevance_score >= 8 or bool(enriched_item.anomalies_detected):
|
||||||
# await self.notifier.send_alert(enriched_item)
|
# await self.notifier.send_alert(enriched_item)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing item {item.url}: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error running crawler {crawler}: {e}")
|
||||||
|
|||||||
@ -6,3 +6,7 @@ class ILLMProvider(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def analyze(self, news_item: NewsItemDTO) -> EnrichedNewsItemDTO:
|
async def analyze(self, news_item: NewsItemDTO) -> EnrichedNewsItemDTO:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_info(self) -> dict[str, str]:
|
||||||
|
pass
|
||||||
|
|||||||
@ -7,6 +7,15 @@ from src.processor.base import ILLMProvider
|
|||||||
from src.processor.dto import EnrichedNewsItemDTO
|
from src.processor.dto import EnrichedNewsItemDTO
|
||||||
|
|
||||||
class OllamaProvider(ILLMProvider):
|
class OllamaProvider(ILLMProvider):
|
||||||
|
def get_info(self) -> dict[str, str]:
|
||||||
|
base_url = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
|
||||||
|
model = os.environ.get('OLLAMA_MODEL', 'gpt-oss:120b-cloud')
|
||||||
|
return {
|
||||||
|
"model": model,
|
||||||
|
"base_url": base_url,
|
||||||
|
"prompt_summary": "Russian summary, 2 sentences, R&D focus"
|
||||||
|
}
|
||||||
|
|
||||||
async def analyze(self, news_item: NewsItemDTO) -> EnrichedNewsItemDTO:
|
async def analyze(self, news_item: NewsItemDTO) -> EnrichedNewsItemDTO:
|
||||||
base_url = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
|
base_url = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
|
||||||
url = base_url if base_url.endswith('/api/generate') else f"{base_url.rstrip('/')}/api/generate"
|
url = base_url if base_url.endswith('/api/generate') else f"{base_url.rstrip('/')}/api/generate"
|
||||||
@ -14,6 +23,7 @@ class OllamaProvider(ILLMProvider):
|
|||||||
f"Analyze the following article.\nTitle: {news_item.title}\n"
|
f"Analyze the following article.\nTitle: {news_item.title}\n"
|
||||||
f"Content: {news_item.content_text}\n"
|
f"Content: {news_item.content_text}\n"
|
||||||
"Return JSON with 'relevance_score' (0-10), 'summary_ru' (string), 'anomalies_detected' (list of strings), and 'category' (string).\n"
|
"Return JSON with 'relevance_score' (0-10), 'summary_ru' (string), 'anomalies_detected' (list of strings), and 'category' (string).\n"
|
||||||
|
"The 'summary_ru' MUST be in Russian and strictly NO MORE than 2 sentences.\n"
|
||||||
"The 'category' must be exactly one of: 'Browsers', 'Edge AI', 'SmartTV', 'Samsung New Technologies', 'Middleware new trends', 'Competitors', 'Other'.\n"
|
"The 'category' must be exactly one of: 'Browsers', 'Edge AI', 'SmartTV', 'Samsung New Technologies', 'Middleware new trends', 'Competitors', 'Other'.\n"
|
||||||
"For 'relevance_score', prioritize and give higher scores to articles related to R&D, Chromium, NPU, and Smart TV operating systems.\n"
|
"For 'relevance_score', prioritize and give higher scores to articles related to R&D, Chromium, NPU, and Smart TV operating systems.\n"
|
||||||
"Regarding 'anomalies_detected': only detect factual, conceptual, or industry-related anomalies (e.g., sudden technological shifts, unexpected competitor moves). "
|
"Regarding 'anomalies_detected': only detect factual, conceptual, or industry-related anomalies (e.g., sudden technological shifts, unexpected competitor moves). "
|
||||||
|
|||||||
@ -68,6 +68,9 @@ class ChromaStore(IVectorStore):
|
|||||||
document = documents[0][idx] if documents and documents[0] else ""
|
document = documents[0][idx] if documents and documents[0] else ""
|
||||||
items.append(self._reconstruct_dto(metadata, document))
|
items.append(self._reconstruct_dto(metadata, document))
|
||||||
|
|
||||||
|
# Sort items by relevance_score in descending order
|
||||||
|
items.sort(key=lambda x: x.relevance_score, reverse=True)
|
||||||
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
def _reconstruct_dto(self, metadata: Mapping[str, Any], document: str) -> EnrichedNewsItemDTO:
|
def _reconstruct_dto(self, metadata: Mapping[str, Any], document: str) -> EnrichedNewsItemDTO:
|
||||||
|
|||||||
@ -35,8 +35,18 @@ def allowed_chat_id():
|
|||||||
return "123456789"
|
return "123456789"
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def router(mock_storage, allowed_chat_id):
|
def mock_processor():
|
||||||
return get_router(mock_storage, allowed_chat_id)
|
processor = MagicMock()
|
||||||
|
processor.get_info.return_value = {
|
||||||
|
"model": "test-model",
|
||||||
|
"base_url": "http://test-url",
|
||||||
|
"prompt_summary": "Test summary"
|
||||||
|
}
|
||||||
|
return processor
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def router(mock_storage, mock_processor, allowed_chat_id):
|
||||||
|
return get_router(mock_storage, mock_processor, allowed_chat_id)
|
||||||
|
|
||||||
def get_handler(router, callback_name):
|
def get_handler(router, callback_name):
|
||||||
for handler in router.message.handlers:
|
for handler in router.message.handlers:
|
||||||
@ -79,6 +89,22 @@ async def test_command_help_handler(router, allowed_chat_id):
|
|||||||
assert "/start" in args[0]
|
assert "/start" in args[0]
|
||||||
assert "/latest" in args[0]
|
assert "/latest" in args[0]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_command_params_handler(router, mock_processor, allowed_chat_id):
|
||||||
|
handler = get_handler(router, "command_params_handler")
|
||||||
|
message = AsyncMock()
|
||||||
|
message.chat.id = int(allowed_chat_id)
|
||||||
|
message.answer = AsyncMock()
|
||||||
|
|
||||||
|
await handler(message)
|
||||||
|
|
||||||
|
mock_processor.get_info.assert_called_once()
|
||||||
|
message.answer.assert_called_once()
|
||||||
|
args, kwargs = message.answer.call_args
|
||||||
|
assert "LLM Processor Parameters" in args[0]
|
||||||
|
assert "test-model" in args[0]
|
||||||
|
assert "HTML" in kwargs.get("parse_mode", "")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_command_latest_handler(router, mock_storage, allowed_chat_id):
|
async def test_command_latest_handler(router, mock_storage, allowed_chat_id):
|
||||||
handler = get_handler(router, "command_latest_handler")
|
handler = get_handler(router, "command_latest_handler")
|
||||||
|
|||||||
@ -45,8 +45,8 @@ async def test_rss_crawler_fetch_latest():
|
|||||||
# Call the method
|
# Call the method
|
||||||
items = await crawler.fetch_latest()
|
items = await crawler.fetch_latest()
|
||||||
|
|
||||||
# Verify the mock was called with the correct URL
|
# Verify the mock was called with the correct URL and headers
|
||||||
mock_get.assert_called_once_with(url)
|
mock_get.assert_called_once_with(url, headers={"User-Agent": RSSCrawler.USER_AGENT})
|
||||||
|
|
||||||
# Verify the parsing results
|
# Verify the parsing results
|
||||||
assert len(items) == 2
|
assert len(items) == 2
|
||||||
@ -66,3 +66,17 @@ async def test_rss_crawler_fetch_latest():
|
|||||||
assert items[1].content_text == "Test Content 2"
|
assert items[1].content_text == "Test Content 2"
|
||||||
assert items[1].source == source
|
assert items[1].source == source
|
||||||
assert items[1].timestamp == datetime(2002, 10, 3, 10, 0, tzinfo=timezone.utc)
|
assert items[1].timestamp == datetime(2002, 10, 3, 10, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rss_crawler_fetch_latest_error():
|
||||||
|
url = "http://error.source.com/rss"
|
||||||
|
source = "Error Source"
|
||||||
|
crawler = RSSCrawler(url, source)
|
||||||
|
|
||||||
|
with patch("aiohttp.ClientSession.get") as mock_get:
|
||||||
|
# Simulate an exception during the request
|
||||||
|
mock_get.side_effect = Exception("Connection error")
|
||||||
|
|
||||||
|
items = await crawler.fetch_latest()
|
||||||
|
|
||||||
|
assert items == []
|
||||||
|
|||||||
@ -100,3 +100,67 @@ async def test_run_iteration():
|
|||||||
|
|
||||||
# Should not alert proactively anymore as per updated requirements
|
# Should not alert proactively anymore as per updated requirements
|
||||||
assert notifier_mock.send_alert.call_count == 0
|
assert notifier_mock.send_alert.call_count == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_iteration_crawler_failure():
|
||||||
|
# Arrange
|
||||||
|
crawler1 = AsyncMock()
|
||||||
|
crawler2 = AsyncMock()
|
||||||
|
processor = AsyncMock()
|
||||||
|
storage = AsyncMock()
|
||||||
|
notifier = AsyncMock()
|
||||||
|
|
||||||
|
crawler1.fetch_latest.side_effect = Exception("Crawler 1 failed")
|
||||||
|
crawler2.fetch_latest.return_value = []
|
||||||
|
|
||||||
|
service = TrendScoutService(
|
||||||
|
crawlers=[crawler1, crawler2],
|
||||||
|
processor=processor,
|
||||||
|
storage=storage,
|
||||||
|
notifier=notifier
|
||||||
|
)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
await service.run_iteration()
|
||||||
|
|
||||||
|
# Assert - crawler 1 failed, but it shouldn't stop crawler 2
|
||||||
|
crawler1.fetch_latest.assert_called_once()
|
||||||
|
crawler2.fetch_latest.assert_called_once()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_iteration_item_failure():
|
||||||
|
# Arrange
|
||||||
|
crawler = AsyncMock()
|
||||||
|
processor = AsyncMock()
|
||||||
|
storage = AsyncMock()
|
||||||
|
notifier = AsyncMock()
|
||||||
|
|
||||||
|
item1 = NewsItemDTO(title="T1", url="U1", content_text="C1", source="S1", timestamp=datetime.now())
|
||||||
|
item2 = NewsItemDTO(title="T2", url="U2", content_text="C2", source="S2", timestamp=datetime.now())
|
||||||
|
|
||||||
|
crawler.fetch_latest.return_value = [item1, item2]
|
||||||
|
storage.exists.return_value = False
|
||||||
|
|
||||||
|
# processor.analyze fails for item1
|
||||||
|
enriched_item2 = EnrichedNewsItemDTO(
|
||||||
|
**item2.model_dump(),
|
||||||
|
relevance_score=6,
|
||||||
|
summary_ru="Summary",
|
||||||
|
anomalies_detected=[]
|
||||||
|
)
|
||||||
|
processor.analyze.side_effect = [Exception("Analyze failed"), enriched_item2]
|
||||||
|
|
||||||
|
service = TrendScoutService(
|
||||||
|
crawlers=[crawler],
|
||||||
|
processor=processor,
|
||||||
|
storage=storage,
|
||||||
|
notifier=notifier
|
||||||
|
)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
await service.run_iteration()
|
||||||
|
|
||||||
|
# Assert - item 1 failed, but it shouldn't stop item 2
|
||||||
|
assert processor.analyze.call_count == 2
|
||||||
|
# Only item 2 should be stored
|
||||||
|
assert storage.store.call_count == 1
|
||||||
|
|||||||
@ -106,3 +106,13 @@ async def test_ollama_provider_analyze_markdown_json(sample_news_item):
|
|||||||
assert result.anomalies_detected == []
|
assert result.anomalies_detected == []
|
||||||
assert result.category == "Browsers"
|
assert result.category == "Browsers"
|
||||||
|
|
||||||
|
def test_ollama_provider_get_info():
|
||||||
|
os.environ['OLLAMA_API_URL'] = 'http://test-url:11434'
|
||||||
|
os.environ['OLLAMA_MODEL'] = 'test-model'
|
||||||
|
provider = OllamaProvider()
|
||||||
|
info = provider.get_info()
|
||||||
|
|
||||||
|
assert info["model"] == "test-model"
|
||||||
|
assert info["base_url"] == "http://test-url:11434"
|
||||||
|
assert info["prompt_summary"] == "Russian summary, 2 sentences, R&D focus"
|
||||||
|
|
||||||
|
|||||||
@ -230,3 +230,32 @@ async def test_get_stats(chroma_store: ChromaStore):
|
|||||||
assert stats["total_count"] == 3
|
assert stats["total_count"] == 3
|
||||||
assert stats["category_Tech"] == 2
|
assert stats["category_Tech"] == 2
|
||||||
assert stats["category_Science"] == 1
|
assert stats["category_Science"] == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_sorting(chroma_store: ChromaStore):
|
||||||
|
# Arrange
|
||||||
|
items = [
|
||||||
|
EnrichedNewsItemDTO(
|
||||||
|
title=f"Title {i}",
|
||||||
|
url=f"https://example.com/{i}",
|
||||||
|
content_text=f"Content {i}",
|
||||||
|
source="Source",
|
||||||
|
timestamp=datetime.now(timezone.utc),
|
||||||
|
relevance_score=i,
|
||||||
|
summary_ru=f"Сводка {i}",
|
||||||
|
anomalies_detected=[],
|
||||||
|
category="Tech"
|
||||||
|
) for i in range(1, 6) # Scores 1 to 5
|
||||||
|
]
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
await chroma_store.store(item)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
results = await chroma_store.search("Content", limit=10)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(results) == 5
|
||||||
|
# Should be sorted 5, 4, 3, 2, 1
|
||||||
|
scores = [r.relevance_score for r in results]
|
||||||
|
assert scores == [5, 4, 3, 2, 1]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user