import pytest from unittest.mock import AsyncMock, patch, MagicMock from datetime import datetime, timezone from src.crawlers.rss_crawler import RSSCrawler from src.crawlers.dto import NewsItemDTO MOCK_RSS = """ Mock Source http://mock.source.com Mock Source Description Test Title 1 http://mock.source.com/1 Test Content 1 Wed, 02 Oct 2002 08:00:00 GMT Test Title 2 http://mock.source.com/2 Test Content 2 Thu, 03 Oct 2002 10:00:00 GMT """ @pytest.mark.asyncio async def test_rss_crawler_fetch_latest(): url = "http://mock.source.com/rss" source = "Mock Source" crawler = RSSCrawler(url, source) with patch("aiohttp.ClientSession.get") as mock_get: # Create an async mock for the response object mock_response = AsyncMock() mock_response.text.return_value = MOCK_RSS mock_response.raise_for_status = MagicMock() # Setup context manager for the 'async with session.get(...)' part mock_get.return_value.__aenter__.return_value = mock_response # Call the method items = await crawler.fetch_latest() # Verify the mock was called with the correct URL mock_get.assert_called_once_with(url) # Verify the parsing results assert len(items) == 2 # Check first item assert isinstance(items[0], NewsItemDTO) assert items[0].title == "Test Title 1" assert items[0].url == "http://mock.source.com/1" assert items[0].content_text == "Test Content 1" assert items[0].source == source assert items[0].timestamp == datetime(2002, 10, 2, 8, 0, tzinfo=timezone.utc) # Check second item assert isinstance(items[1], NewsItemDTO) assert items[1].title == "Test Title 2" assert items[1].url == "http://mock.source.com/2" assert items[1].content_text == "Test Content 2" assert items[1].source == source assert items[1].timestamp == datetime(2002, 10, 3, 10, 0, tzinfo=timezone.utc)