167 lines
4.9 KiB
Python
167 lines
4.9 KiB
Python
import pytest
|
|
from unittest.mock import AsyncMock
|
|
from datetime import datetime
|
|
from src.crawlers.dto import NewsItemDTO
|
|
from src.processor.dto import EnrichedNewsItemDTO
|
|
from src.orchestrator.service import TrendScoutService
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_iteration():
|
|
# Arrange
|
|
crawler_mock = AsyncMock()
|
|
processor_mock = AsyncMock()
|
|
storage_mock = AsyncMock()
|
|
notifier_mock = AsyncMock()
|
|
|
|
timestamp = datetime.now()
|
|
|
|
news_item = NewsItemDTO(
|
|
title="Test Title",
|
|
url="http://example.com/new1",
|
|
content_text="Sample text",
|
|
source="Source",
|
|
timestamp=timestamp
|
|
)
|
|
|
|
existing_item = NewsItemDTO(
|
|
title="Test Title Existing",
|
|
url="http://example.com/existing",
|
|
content_text="Sample text",
|
|
source="Source",
|
|
timestamp=timestamp
|
|
)
|
|
|
|
another_new_item = NewsItemDTO(
|
|
title="Test Title 3",
|
|
url="http://example.com/new2",
|
|
content_text="Sample text",
|
|
source="Source",
|
|
timestamp=timestamp
|
|
)
|
|
|
|
high_relevance_item = EnrichedNewsItemDTO(
|
|
**news_item.model_dump(),
|
|
relevance_score=8,
|
|
summary_ru="Summary",
|
|
anomalies_detected=[]
|
|
)
|
|
|
|
anomaly_item = EnrichedNewsItemDTO(
|
|
**another_new_item.model_dump(),
|
|
relevance_score=5,
|
|
summary_ru="Summary",
|
|
anomalies_detected=["Anomaly"]
|
|
)
|
|
|
|
low_relevance_item = EnrichedNewsItemDTO(
|
|
**news_item.model_dump(),
|
|
relevance_score=3,
|
|
summary_ru="Summary",
|
|
anomalies_detected=[]
|
|
)
|
|
|
|
crawler_mock.fetch_latest.return_value = [news_item, existing_item, another_new_item, news_item]
|
|
|
|
# Mock exists to return True only for existing_item
|
|
storage_mock.exists.side_effect = lambda url: url == "http://example.com/existing"
|
|
|
|
# Return different items for each call to simulate different results
|
|
processor_mock.analyze.side_effect = [
|
|
high_relevance_item,
|
|
anomaly_item,
|
|
low_relevance_item,
|
|
]
|
|
|
|
service = TrendScoutService(
|
|
crawlers=[crawler_mock],
|
|
processor=processor_mock,
|
|
storage=storage_mock,
|
|
notifier=notifier_mock
|
|
)
|
|
|
|
# Act
|
|
await service.run_iteration()
|
|
|
|
# Assert
|
|
crawler_mock.fetch_latest.assert_called_once()
|
|
assert processor_mock.analyze.call_count == 3
|
|
assert storage_mock.store.call_count == 3
|
|
assert storage_mock.exists.call_count == 4
|
|
|
|
# Verify low relevance item had its content cleared
|
|
# It was the 3rd item stored
|
|
stored_items = [call.args[0] for call in storage_mock.store.call_args_list]
|
|
assert stored_items[0].relevance_score == 8
|
|
assert stored_items[0].content_text == "Sample text"
|
|
assert stored_items[1].relevance_score == 5
|
|
assert stored_items[1].content_text == "Sample text"
|
|
assert stored_items[2].relevance_score == 3
|
|
assert stored_items[2].content_text == ""
|
|
|
|
# Should not alert proactively anymore as per updated requirements
|
|
assert notifier_mock.send_alert.call_count == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_iteration_crawler_failure():
|
|
# Arrange
|
|
crawler1 = AsyncMock()
|
|
crawler2 = AsyncMock()
|
|
processor = AsyncMock()
|
|
storage = AsyncMock()
|
|
notifier = AsyncMock()
|
|
|
|
crawler1.fetch_latest.side_effect = Exception("Crawler 1 failed")
|
|
crawler2.fetch_latest.return_value = []
|
|
|
|
service = TrendScoutService(
|
|
crawlers=[crawler1, crawler2],
|
|
processor=processor,
|
|
storage=storage,
|
|
notifier=notifier
|
|
)
|
|
|
|
# Act
|
|
await service.run_iteration()
|
|
|
|
# Assert - crawler 1 failed, but it shouldn't stop crawler 2
|
|
crawler1.fetch_latest.assert_called_once()
|
|
crawler2.fetch_latest.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_iteration_item_failure():
|
|
# Arrange
|
|
crawler = AsyncMock()
|
|
processor = AsyncMock()
|
|
storage = AsyncMock()
|
|
notifier = AsyncMock()
|
|
|
|
item1 = NewsItemDTO(title="T1", url="U1", content_text="C1", source="S1", timestamp=datetime.now())
|
|
item2 = NewsItemDTO(title="T2", url="U2", content_text="C2", source="S2", timestamp=datetime.now())
|
|
|
|
crawler.fetch_latest.return_value = [item1, item2]
|
|
storage.exists.return_value = False
|
|
|
|
# processor.analyze fails for item1
|
|
enriched_item2 = EnrichedNewsItemDTO(
|
|
**item2.model_dump(),
|
|
relevance_score=6,
|
|
summary_ru="Summary",
|
|
anomalies_detected=[]
|
|
)
|
|
processor.analyze.side_effect = [Exception("Analyze failed"), enriched_item2]
|
|
|
|
service = TrendScoutService(
|
|
crawlers=[crawler],
|
|
processor=processor,
|
|
storage=storage,
|
|
notifier=notifier
|
|
)
|
|
|
|
# Act
|
|
await service.run_iteration()
|
|
|
|
# Assert - item 1 failed, but it shouldn't stop item 2
|
|
assert processor.analyze.call_count == 2
|
|
# Only item 2 should be stored
|
|
assert storage.store.call_count == 1
|