:Release Notes: - :Detailed Notes: - :Testing Performed: - :QA Notes: as always AI generated :Issues Addressed: -
100 lines
3.5 KiB
Python
100 lines
3.5 KiB
Python
import pytest
|
|
from unittest.mock import AsyncMock, patch, MagicMock
|
|
from src.crawlers.playwright_crawler import PlaywrightCrawler
|
|
from src.crawlers.dto import NewsItemDTO
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_playwright_crawler_fetch_latest_with_selector():
|
|
url = "https://example.com/news"
|
|
source = "ExampleSource"
|
|
selector = ".news-item"
|
|
|
|
crawler = PlaywrightCrawler(url, source, selector)
|
|
|
|
with patch("src.crawlers.playwright_crawler.async_playwright") as mock_playwright:
|
|
# Mocking the async context manager chain
|
|
mock_p = AsyncMock()
|
|
mock_playwright.return_value.__aenter__.return_value = mock_p
|
|
|
|
mock_browser = AsyncMock()
|
|
mock_p.chromium.launch.return_value = mock_browser
|
|
|
|
mock_page = AsyncMock()
|
|
mock_browser.new_page.return_value = mock_page
|
|
|
|
# Setup mock elements
|
|
mock_element = AsyncMock()
|
|
mock_element.evaluate.return_value = False # Assume it's not an 'a' tag itself
|
|
|
|
mock_link = AsyncMock()
|
|
mock_link.inner_text.return_value = "Test News Title"
|
|
mock_link.get_attribute.return_value = "/news/1"
|
|
|
|
mock_element.query_selector.return_value = mock_link
|
|
mock_page.query_selector_all.return_value = [mock_element]
|
|
|
|
results = await crawler.fetch_latest()
|
|
|
|
assert len(results) == 1
|
|
assert results[0].title == "Test News Title"
|
|
assert results[0].url == "https://example.com/news/1"
|
|
assert results[0].source == source
|
|
|
|
mock_page.goto.assert_called_once_with(url, wait_until="networkidle", timeout=60000)
|
|
mock_browser.close.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_playwright_crawler_fetch_latest_no_selector():
|
|
url = "https://example.com/blog"
|
|
source = "ExampleBlog"
|
|
|
|
crawler = PlaywrightCrawler(url, source)
|
|
|
|
with patch("src.crawlers.playwright_crawler.async_playwright") as mock_playwright:
|
|
mock_p = AsyncMock()
|
|
mock_playwright.return_value.__aenter__.return_value = mock_p
|
|
|
|
mock_browser = AsyncMock()
|
|
mock_p.chromium.launch.return_value = mock_browser
|
|
|
|
mock_page = AsyncMock()
|
|
mock_browser.new_page.return_value = mock_page
|
|
|
|
# Setup mock elements for fallback (h2)
|
|
mock_h2 = AsyncMock()
|
|
mock_h2.inner_text.return_value = "Headline Title"
|
|
|
|
mock_page.query_selector_all.return_value = [mock_h2]
|
|
|
|
results = await crawler.fetch_latest()
|
|
|
|
assert len(results) == 1
|
|
assert results[0].title == "Headline Title"
|
|
assert results[0].url == url
|
|
assert results[0].source == source
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_playwright_crawler_fetch_latest_error():
|
|
url = "https://example.com/error"
|
|
source = "ErrorSource"
|
|
|
|
crawler = PlaywrightCrawler(url, source)
|
|
|
|
with patch("src.crawlers.playwright_crawler.async_playwright") as mock_playwright:
|
|
mock_p = AsyncMock()
|
|
mock_playwright.return_value.__aenter__.return_value = mock_p
|
|
|
|
mock_browser = AsyncMock()
|
|
mock_p.chromium.launch.return_value = mock_browser
|
|
|
|
mock_page = AsyncMock()
|
|
mock_browser.new_page.return_value = mock_page
|
|
|
|
# Simulate an error in page.goto
|
|
mock_page.goto.side_effect = Exception("Crawl failed")
|
|
|
|
results = await crawler.fetch_latest()
|
|
|
|
assert results == []
|
|
mock_browser.close.assert_called_once()
|