import pytest import pytest_asyncio from datetime import datetime, timezone import chromadb from chromadb.config import Settings from src.processor.dto import EnrichedNewsItemDTO from src.storage.chroma_store import ChromaStore @pytest_asyncio.fixture async def chroma_store(): # Use EphemeralClient for in-memory testing client = chromadb.EphemeralClient(Settings(allow_reset=True)) client.reset() store = ChromaStore(client=client, collection_name="test_collection") yield store client.reset() @pytest.mark.asyncio async def test_store_and_search(chroma_store: ChromaStore): # 1. Arrange item1 = EnrichedNewsItemDTO( title="Apple announces new M4 chip", url="https://example.com/apple-m4", content_text="Apple has announced its newest M4 chip for next generation Macs. This processor brings massive AI improvements.", source="TechNews", timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc), relevance_score=9, summary_ru="Apple анонсировала новый чип M4.", anomalies_detected=["NPU acceleration"] ) item2 = EnrichedNewsItemDTO( title="Local bakery makes giant bread", url="https://example.com/giant-bread", content_text="A bakery in town just baked the world's largest loaf of bread, weighing over 1000 pounds.", source="LocalNews", timestamp=datetime(2023, 11, 2, 10, 0, tzinfo=timezone.utc), relevance_score=2, summary_ru="Местная пекарня испекла гигантский хлеб.", anomalies_detected=[] ) item3 = EnrichedNewsItemDTO( title="NVIDIA reveals RTX 5090 with WebGPU support", url="https://example.com/nvidia-rtx-5090", content_text="NVIDIA's new RTX 5090 GPU fully accelerates WebGPU workloads for advanced edge AI applications.", source="GPUWeekly", timestamp=datetime(2023, 11, 3, 14, 0, tzinfo=timezone.utc), relevance_score=10, summary_ru="NVIDIA представила RTX 5090 с поддержкой WebGPU.", anomalies_detected=["WebGPU", "Edge AI"] ) # 2. Act await chroma_store.store(item1) await chroma_store.store(item2) await chroma_store.store(item3) # Search for AI and chip related news search_results = await chroma_store.search("AI processor and GPU", limit=2) # 3. Assert assert len(search_results) == 2 # Expected: The Apple M4 chip and NVIDIA RTX 5090 are highly relevant to AI/GPU titles = [res.title for res in search_results] assert "NVIDIA reveals RTX 5090 with WebGPU support" in titles assert "Apple announces new M4 chip" in titles assert "Local bakery makes giant bread" not in titles # Check if properties are correctly restored for one of the items for res in search_results: if "NVIDIA" in res.title: assert res.relevance_score == 10 assert "WebGPU" in res.anomalies_detected assert "Edge AI" in res.anomalies_detected assert "NVIDIA's new RTX 5090" in res.content_text assert res.source == "GPUWeekly" @pytest.mark.asyncio async def test_search_empty_store(chroma_store: ChromaStore): results = await chroma_store.search("test query", limit=5) assert len(results) == 0 @pytest.mark.asyncio async def test_store_upsert(chroma_store: ChromaStore): item1 = EnrichedNewsItemDTO( title="Apple announces new M4 chip", url="https://example.com/apple-m4", content_text="Apple has announced its newest M4 chip for next generation Macs.", source="TechNews", timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc), relevance_score=9, summary_ru="Apple анонсировала новый чип M4.", anomalies_detected=["NPU acceleration"] ) # Store first time await chroma_store.store(item1) results = await chroma_store.search("Apple", limit=5) assert len(results) == 1 assert results[0].relevance_score == 9 # Modify item and store again (same URL, should upsert) item1_updated = item1.model_copy() item1_updated.relevance_score = 10 item1_updated.summary_ru = "Apple анонсировала чип M4. Обновлено." await chroma_store.store(item1_updated) results_updated = await chroma_store.search("Apple", limit=5) # Should still be 1 item, but updated assert len(results_updated) == 1 assert results_updated[0].relevance_score == 10 assert results_updated[0].summary_ru == "Apple анонсировала чип M4. Обновлено."