AI-Trend-Scout/tests/test_cppconf_pipeline.py
Artur Mukhamadiev a363ca41cf feat(crawlers): implement specialized CppConf crawler and AI analysis
- Added CppConfCrawler using aiohttp and regex to parse Next.js JSON data, skipping the Playwright bottleneck.
- Added C++ specific prompts to OllamaProvider for trend analysis (identifying C++26, memory safety, coroutines).
- Created offline pytest fixtures and TDD unit tests for the parser.
- Created end-to-end pipeline test mapping Crawler -> AI Processor -> Vector DB.
2026-03-15 20:34:39 +03:00

68 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pytest
import chromadb
from unittest.mock import AsyncMock, patch
from src.crawlers.cppconf_crawler import CppConfCrawler
from src.processor.ollama_provider import OllamaProvider
from src.storage.chroma_store import ChromaStore
@pytest.fixture
def cppconf_html():
with open("tests/fixtures/cppconf/talks.html", "r", encoding="utf-8") as f:
return f.read()
@pytest.mark.asyncio
async def test_cppconf_e2e_pipeline(cppconf_html):
# 1. Mock Crawler fetch
crawler = CppConfCrawler(url="https://cppconf.ru/en/talks/", source="C++ Russia")
with patch("aiohttp.ClientSession.get") as mock_get:
mock_response = AsyncMock()
mock_response.status = 200
mock_response.text.return_value = cppconf_html
mock_get.return_value.__aenter__.return_value = mock_response
talks = await crawler.fetch_latest()
assert len(talks) > 0
talk = talks[0]
assert talk.source == "C++ Russia"
assert "https://cppconf.ru/en/talks/" in talk.url
# 2. Mock AI Processor
provider = OllamaProvider()
mock_llm_response = {
"relevance_score": 9,
"summary_ru": "Этот доклад обсуждает новые фичи C++26 и их влияние на производительность. Показаны примеры использования концептов и корутин.",
"anomalies_detected": ["Сравнение производительности с Rust"],
"category": "C++ Trends"
}
with patch("aiohttp.ClientSession.post") as mock_post:
mock_llm_post_response = AsyncMock()
mock_llm_post_response.raise_for_status = AsyncMock()
import json
mock_llm_post_response.json.return_value = {"response": json.dumps(mock_llm_response)}
mock_post.return_value.__aenter__.return_value = mock_llm_post_response
enriched_talk = await provider.analyze(talk)
assert enriched_talk.relevance_score == 9
assert "Rust" in enriched_talk.anomalies_detected[0]
assert enriched_talk.category == "C++ Trends"
# 3. Vector DB Store
client = chromadb.Client()
store = ChromaStore(client=client, collection_name="test_cppconf_collection")
await store.store(enriched_talk)
# Verify it exists
exists = await store.exists(enriched_talk.url)
assert exists is True
# Search
results = await store.search("C++26 features", limit=1)
assert len(results) == 1
assert results[0].relevance_score == 9
assert results[0].url == enriched_talk.url