import pytest import chromadb from unittest.mock import AsyncMock, patch from src.crawlers.cppconf_crawler import CppConfCrawler from src.processor.ollama_provider import OllamaProvider from src.storage.chroma_store import ChromaStore @pytest.fixture def cppconf_html(): with open("tests/fixtures/cppconf/talks.html", "r", encoding="utf-8") as f: return f.read() @pytest.mark.asyncio async def test_cppconf_e2e_pipeline(cppconf_html): # 1. Mock Crawler fetch crawler = CppConfCrawler(url="https://cppconf.ru/en/talks/", source="C++ Russia") with patch("aiohttp.ClientSession.get") as mock_get: mock_response = AsyncMock() mock_response.status = 200 mock_response.text.return_value = cppconf_html mock_get.return_value.__aenter__.return_value = mock_response talks = await crawler.fetch_latest() assert len(talks) > 0 talk = talks[0] assert talk.source == "C++ Russia" assert "https://cppconf.ru/en/talks/" in talk.url # 2. Mock AI Processor provider = OllamaProvider() mock_llm_response = { "relevance_score": 9, "summary_ru": "Этот доклад обсуждает новые фичи C++26 и их влияние на производительность. Показаны примеры использования концептов и корутин.", "anomalies_detected": ["Сравнение производительности с Rust"], "category": "C++ Trends" } with patch("aiohttp.ClientSession.post") as mock_post: mock_llm_post_response = AsyncMock() mock_llm_post_response.raise_for_status = AsyncMock() import json mock_llm_post_response.json.return_value = {"response": json.dumps(mock_llm_response)} mock_post.return_value.__aenter__.return_value = mock_llm_post_response enriched_talk = await provider.analyze(talk) assert enriched_talk.relevance_score == 9 assert "Rust" in enriched_talk.anomalies_detected[0] assert enriched_talk.category == "C++ Trends" # 3. Vector DB Store from chromadb.config import Settings client = chromadb.EphemeralClient(Settings(allow_reset=True)) store = ChromaStore(client=client, collection_name="test_cppconf_collection") await store.store(enriched_talk) # Verify it exists exists = await store.exists(enriched_talk.url) assert exists is True # Search results = await store.search("C++26 features", limit=1) assert len(results) == 1 assert results[0].relevance_score == 9 assert results[0].url == enriched_talk.url