import pytest from unittest.mock import AsyncMock, patch, MagicMock from datetime import datetime, timezone from src.crawlers.scirate_crawler import SciRateCrawler from src.crawlers.dto import NewsItemDTO MOCK_SCIRATE_HTML = """
  • Attention is Really All You Need
    Vaswani et al.
    This paper presents a new architecture...
  • Another Paper
    Doe and Smith
    Abstract of another paper.
    """ @pytest.mark.asyncio async def test_scirate_crawler_fetch_latest(): url = "https://scirate.com/" source = "SciRate" crawler = SciRateCrawler(url, source) # HTML with multiple items, one missing title, one with list-like link mock_html = """
  • Paper 1
  • No link here
  • Paper 3
  • """ with patch("aiohttp.ClientSession.get") as mock_get: mock_response = AsyncMock() mock_response.text.return_value = mock_html mock_response.status = 200 mock_get.return_value.__aenter__.return_value = mock_response # We also want to test the 'isinstance(link, list)' part. # This is tricky because BS4 normally doesn't return a list for href. # But we can mock title_el.get to return a list. with patch("bs4.element.Tag.get", side_effect=[["/arxiv/list"], "/arxiv/3"]): items = await crawler.fetch_latest() assert len(items) == 2 assert items[0].url == "https://scirate.com/arxiv/list" assert items[1].url == "https://scirate.com/arxiv/3" @pytest.mark.asyncio async def test_scirate_crawler_exception(): crawler = SciRateCrawler() with patch("aiohttp.ClientSession.get") as mock_get: mock_response = AsyncMock() mock_response.text.return_value = "" mock_response.status = 200 mock_get.return_value.__aenter__.return_value = mock_response # Force an exception in parse_html with patch.object(SciRateCrawler, 'parse_html', side_effect=Exception("Parsing failed")): items = await crawler.fetch_latest() assert items == [] @pytest.mark.asyncio async def test_scirate_crawler_error(): crawler = SciRateCrawler() with patch("aiohttp.ClientSession.get") as mock_get: mock_response = AsyncMock() mock_response.status = 500 mock_get.return_value.__aenter__.return_value = mock_response items = await crawler.fetch_latest() assert items == []