import pytest from unittest.mock import AsyncMock, patch, MagicMock from src.crawlers.playwright_crawler import PlaywrightCrawler from src.crawlers.dto import NewsItemDTO @pytest.mark.asyncio async def test_playwright_crawler_fetch_latest_with_selector(): url = "https://example.com/news" source = "ExampleSource" selector = ".news-item" crawler = PlaywrightCrawler(url, source, selector) with patch("src.crawlers.playwright_crawler.async_playwright") as mock_playwright: # Mocking the async context manager chain mock_p = AsyncMock() mock_playwright.return_value.__aenter__.return_value = mock_p mock_browser = AsyncMock() mock_p.chromium.launch.return_value = mock_browser mock_page = AsyncMock() mock_browser.new_page.return_value = mock_page # Setup mock elements mock_element = AsyncMock() mock_element.evaluate.return_value = False # Assume it's not an 'a' tag itself mock_link = AsyncMock() mock_link.inner_text.return_value = "Test News Title" mock_link.get_attribute.return_value = "/news/1" mock_element.query_selector.return_value = mock_link mock_page.query_selector_all.return_value = [mock_element] results = await crawler.fetch_latest() assert len(results) == 1 assert results[0].title == "Test News Title" assert results[0].url == "https://example.com/news/1" assert results[0].source == source mock_page.goto.assert_called_once_with(url, wait_until="networkidle", timeout=60000) mock_browser.close.assert_called_once() @pytest.mark.asyncio async def test_playwright_crawler_fetch_latest_no_selector(): url = "https://example.com/blog" source = "ExampleBlog" crawler = PlaywrightCrawler(url, source) with patch("src.crawlers.playwright_crawler.async_playwright") as mock_playwright: mock_p = AsyncMock() mock_playwright.return_value.__aenter__.return_value = mock_p mock_browser = AsyncMock() mock_p.chromium.launch.return_value = mock_browser mock_page = AsyncMock() mock_browser.new_page.return_value = mock_page # Setup mock elements for fallback (h2) mock_h2 = AsyncMock() mock_h2.inner_text.return_value = "Headline Title" mock_page.query_selector_all.return_value = [mock_h2] results = await crawler.fetch_latest() assert len(results) == 1 assert results[0].title == "Headline Title" assert results[0].url == url assert results[0].source == source @pytest.mark.asyncio async def test_playwright_crawler_fetch_latest_error(): url = "https://example.com/error" source = "ErrorSource" crawler = PlaywrightCrawler(url, source) with patch("src.crawlers.playwright_crawler.async_playwright") as mock_playwright: mock_p = AsyncMock() mock_playwright.return_value.__aenter__.return_value = mock_p mock_browser = AsyncMock() mock_p.chromium.launch.return_value = mock_browser mock_page = AsyncMock() mock_browser.new_page.return_value = mock_page # Simulate an error in page.goto mock_page.goto.side_effect = Exception("Crawl failed") results = await crawler.fetch_latest() assert results == [] mock_browser.close.assert_called_once()