import pytest from datetime import datetime from src.crawlers.cppconf_crawler import CppConfNextJsParser from src.crawlers.dto import NewsItemDTO @pytest.fixture def cppconf_html(): with open("tests/fixtures/cppconf/talks.html", "r", encoding="utf-8") as f: return f.read() def test_cppconf_parser(cppconf_html): parser = CppConfNextJsParser() talks = parser.parse_talks(cppconf_html) assert len(talks) > 0, "Should extract at least one talk" first_talk = talks[0] assert isinstance(first_talk, NewsItemDTO) assert len(first_talk.title) > 0 assert first_talk.url.startswith("https://cppconf.ru/en/talks/") assert len(first_talk.content_text) > 0 assert first_talk.source == "cppconf" assert isinstance(first_talk.timestamp, datetime)