AI-Trend-Scout/tests/crawlers/test_rss_crawler.py

69 lines
2.3 KiB
Python

import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from datetime import datetime, timezone
from src.crawlers.rss_crawler import RSSCrawler
from src.crawlers.dto import NewsItemDTO
MOCK_RSS = """<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>Mock Source</title>
<link>http://mock.source.com</link>
<description>Mock Source Description</description>
<item>
<title>Test Title 1</title>
<link>http://mock.source.com/1</link>
<description>Test Content 1</description>
<pubDate>Wed, 02 Oct 2002 08:00:00 GMT</pubDate>
</item>
<item>
<title>Test Title 2</title>
<link>http://mock.source.com/2</link>
<description>Test Content 2</description>
<pubDate>Thu, 03 Oct 2002 10:00:00 GMT</pubDate>
</item>
</channel>
</rss>
"""
@pytest.mark.asyncio
async def test_rss_crawler_fetch_latest():
url = "http://mock.source.com/rss"
source = "Mock Source"
crawler = RSSCrawler(url, source)
with patch("aiohttp.ClientSession.get") as mock_get:
# Create an async mock for the response object
mock_response = AsyncMock()
mock_response.text.return_value = MOCK_RSS
mock_response.raise_for_status = MagicMock()
# Setup context manager for the 'async with session.get(...)' part
mock_get.return_value.__aenter__.return_value = mock_response
# Call the method
items = await crawler.fetch_latest()
# Verify the mock was called with the correct URL
mock_get.assert_called_once_with(url)
# Verify the parsing results
assert len(items) == 2
# Check first item
assert isinstance(items[0], NewsItemDTO)
assert items[0].title == "Test Title 1"
assert items[0].url == "http://mock.source.com/1"
assert items[0].content_text == "Test Content 1"
assert items[0].source == source
assert items[0].timestamp == datetime(2002, 10, 2, 8, 0, tzinfo=timezone.utc)
# Check second item
assert isinstance(items[1], NewsItemDTO)
assert items[1].title == "Test Title 2"
assert items[1].url == "http://mock.source.com/2"
assert items[1].content_text == "Test Content 2"
assert items[1].source == source
assert items[1].timestamp == datetime(2002, 10, 3, 10, 0, tzinfo=timezone.utc)