AI-Trend-Scout/tests/storage/test_chroma_store.py

117 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pytest
import pytest_asyncio
from datetime import datetime, timezone
import chromadb
from chromadb.config import Settings
from src.processor.dto import EnrichedNewsItemDTO
from src.storage.chroma_store import ChromaStore
@pytest_asyncio.fixture
async def chroma_store():
# Use EphemeralClient for in-memory testing
client = chromadb.EphemeralClient(Settings(allow_reset=True))
client.reset()
store = ChromaStore(client=client, collection_name="test_collection")
yield store
client.reset()
@pytest.mark.asyncio
async def test_store_and_search(chroma_store: ChromaStore):
# 1. Arrange
item1 = EnrichedNewsItemDTO(
title="Apple announces new M4 chip",
url="https://example.com/apple-m4",
content_text="Apple has announced its newest M4 chip for next generation Macs. This processor brings massive AI improvements.",
source="TechNews",
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
relevance_score=9,
summary_ru="Apple анонсировала новый чип M4.",
anomalies_detected=["NPU acceleration"]
)
item2 = EnrichedNewsItemDTO(
title="Local bakery makes giant bread",
url="https://example.com/giant-bread",
content_text="A bakery in town just baked the world's largest loaf of bread, weighing over 1000 pounds.",
source="LocalNews",
timestamp=datetime(2023, 11, 2, 10, 0, tzinfo=timezone.utc),
relevance_score=2,
summary_ru="Местная пекарня испекла гигантский хлеб.",
anomalies_detected=[]
)
item3 = EnrichedNewsItemDTO(
title="NVIDIA reveals RTX 5090 with WebGPU support",
url="https://example.com/nvidia-rtx-5090",
content_text="NVIDIA's new RTX 5090 GPU fully accelerates WebGPU workloads for advanced edge AI applications.",
source="GPUWeekly",
timestamp=datetime(2023, 11, 3, 14, 0, tzinfo=timezone.utc),
relevance_score=10,
summary_ru="NVIDIA представила RTX 5090 с поддержкой WebGPU.",
anomalies_detected=["WebGPU", "Edge AI"]
)
# 2. Act
await chroma_store.store(item1)
await chroma_store.store(item2)
await chroma_store.store(item3)
# Search for AI and chip related news
search_results = await chroma_store.search("AI processor and GPU", limit=2)
# 3. Assert
assert len(search_results) == 2
# Expected: The Apple M4 chip and NVIDIA RTX 5090 are highly relevant to AI/GPU
titles = [res.title for res in search_results]
assert "NVIDIA reveals RTX 5090 with WebGPU support" in titles
assert "Apple announces new M4 chip" in titles
assert "Local bakery makes giant bread" not in titles
# Check if properties are correctly restored for one of the items
for res in search_results:
if "NVIDIA" in res.title:
assert res.relevance_score == 10
assert "WebGPU" in res.anomalies_detected
assert "Edge AI" in res.anomalies_detected
assert "NVIDIA's new RTX 5090" in res.content_text
assert res.source == "GPUWeekly"
@pytest.mark.asyncio
async def test_search_empty_store(chroma_store: ChromaStore):
results = await chroma_store.search("test query", limit=5)
assert len(results) == 0
@pytest.mark.asyncio
async def test_store_upsert(chroma_store: ChromaStore):
item1 = EnrichedNewsItemDTO(
title="Apple announces new M4 chip",
url="https://example.com/apple-m4",
content_text="Apple has announced its newest M4 chip for next generation Macs.",
source="TechNews",
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
relevance_score=9,
summary_ru="Apple анонсировала новый чип M4.",
anomalies_detected=["NPU acceleration"]
)
# Store first time
await chroma_store.store(item1)
results = await chroma_store.search("Apple", limit=5)
assert len(results) == 1
assert results[0].relevance_score == 9
# Modify item and store again (same URL, should upsert)
item1_updated = item1.model_copy()
item1_updated.relevance_score = 10
item1_updated.summary_ru = "Apple анонсировала чип M4. Обновлено."
await chroma_store.store(item1_updated)
results_updated = await chroma_store.search("Apple", limit=5)
# Should still be 1 item, but updated
assert len(results_updated) == 1
assert results_updated[0].relevance_score == 10
assert results_updated[0].summary_ru == "Apple анонсировала чип M4. Обновлено."