import pytest from unittest.mock import AsyncMock from datetime import datetime from src.crawlers.dto import NewsItemDTO from src.processor.dto import EnrichedNewsItemDTO from src.orchestrator.service import TrendScoutService @pytest.mark.asyncio async def test_run_iteration(): # Arrange crawler_mock = AsyncMock() processor_mock = AsyncMock() storage_mock = AsyncMock() notifier_mock = AsyncMock() timestamp = datetime.now() news_item = NewsItemDTO( title="Test Title", url="http://example.com/new1", content_text="Sample text", source="Source", timestamp=timestamp ) existing_item = NewsItemDTO( title="Test Title Existing", url="http://example.com/existing", content_text="Sample text", source="Source", timestamp=timestamp ) another_new_item = NewsItemDTO( title="Test Title 3", url="http://example.com/new2", content_text="Sample text", source="Source", timestamp=timestamp ) high_relevance_item = EnrichedNewsItemDTO( **news_item.model_dump(), relevance_score=8, summary_ru="Summary", anomalies_detected=[] ) anomaly_item = EnrichedNewsItemDTO( **another_new_item.model_dump(), relevance_score=5, summary_ru="Summary", anomalies_detected=["Anomaly"] ) low_relevance_item = EnrichedNewsItemDTO( **news_item.model_dump(), relevance_score=3, summary_ru="Summary", anomalies_detected=[] ) crawler_mock.fetch_latest.return_value = [news_item, existing_item, another_new_item, news_item] # Mock exists to return True only for existing_item storage_mock.exists.side_effect = lambda url: url == "http://example.com/existing" # Return different items for each call to simulate different results processor_mock.analyze.side_effect = [ high_relevance_item, anomaly_item, low_relevance_item, ] service = TrendScoutService( crawlers=[crawler_mock], processor=processor_mock, storage=storage_mock, notifier=notifier_mock ) # Act await service.run_iteration() # Assert crawler_mock.fetch_latest.assert_called_once() assert processor_mock.analyze.call_count == 3 assert storage_mock.store.call_count == 3 assert storage_mock.exists.call_count == 4 # Verify low relevance item had its content cleared # It was the 3rd item stored stored_items = [call.args[0] for call in storage_mock.store.call_args_list] assert stored_items[0].relevance_score == 8 assert stored_items[0].content_text == "Sample text" assert stored_items[1].relevance_score == 5 assert stored_items[1].content_text == "Sample text" assert stored_items[2].relevance_score == 3 assert stored_items[2].content_text == "" # Should not alert proactively anymore as per updated requirements assert notifier_mock.send_alert.call_count == 0 @pytest.mark.asyncio async def test_run_iteration_crawler_failure(): # Arrange crawler1 = AsyncMock() crawler2 = AsyncMock() processor = AsyncMock() storage = AsyncMock() notifier = AsyncMock() crawler1.fetch_latest.side_effect = Exception("Crawler 1 failed") crawler2.fetch_latest.return_value = [] service = TrendScoutService( crawlers=[crawler1, crawler2], processor=processor, storage=storage, notifier=notifier ) # Act await service.run_iteration() # Assert - crawler 1 failed, but it shouldn't stop crawler 2 crawler1.fetch_latest.assert_called_once() crawler2.fetch_latest.assert_called_once() @pytest.mark.asyncio async def test_run_iteration_item_failure(): # Arrange crawler = AsyncMock() processor = AsyncMock() storage = AsyncMock() notifier = AsyncMock() item1 = NewsItemDTO(title="T1", url="U1", content_text="C1", source="S1", timestamp=datetime.now()) item2 = NewsItemDTO(title="T2", url="U2", content_text="C2", source="S2", timestamp=datetime.now()) crawler.fetch_latest.return_value = [item1, item2] storage.exists.return_value = False # processor.analyze fails for item1 enriched_item2 = EnrichedNewsItemDTO( **item2.model_dump(), relevance_score=6, summary_ru="Summary", anomalies_detected=[] ) processor.analyze.side_effect = [Exception("Analyze failed"), enriched_item2] service = TrendScoutService( crawlers=[crawler], processor=processor, storage=storage, notifier=notifier ) # Act await service.run_iteration() # Assert - item 1 failed, but it shouldn't stop item 2 assert processor.analyze.call_count == 2 # Only item 2 should be stored assert storage.store.call_count == 1