117 lines
4.6 KiB
Python
117 lines
4.6 KiB
Python
import pytest
|
||
import pytest_asyncio
|
||
from datetime import datetime, timezone
|
||
import chromadb
|
||
from chromadb.config import Settings
|
||
|
||
from src.processor.dto import EnrichedNewsItemDTO
|
||
from src.storage.chroma_store import ChromaStore
|
||
|
||
@pytest_asyncio.fixture
|
||
async def chroma_store():
|
||
# Use EphemeralClient for in-memory testing
|
||
client = chromadb.EphemeralClient(Settings(allow_reset=True))
|
||
client.reset()
|
||
store = ChromaStore(client=client, collection_name="test_collection")
|
||
yield store
|
||
client.reset()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_store_and_search(chroma_store: ChromaStore):
|
||
# 1. Arrange
|
||
item1 = EnrichedNewsItemDTO(
|
||
title="Apple announces new M4 chip",
|
||
url="https://example.com/apple-m4",
|
||
content_text="Apple has announced its newest M4 chip for next generation Macs. This processor brings massive AI improvements.",
|
||
source="TechNews",
|
||
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
|
||
relevance_score=9,
|
||
summary_ru="Apple анонсировала новый чип M4.",
|
||
anomalies_detected=["NPU acceleration"]
|
||
)
|
||
|
||
item2 = EnrichedNewsItemDTO(
|
||
title="Local bakery makes giant bread",
|
||
url="https://example.com/giant-bread",
|
||
content_text="A bakery in town just baked the world's largest loaf of bread, weighing over 1000 pounds.",
|
||
source="LocalNews",
|
||
timestamp=datetime(2023, 11, 2, 10, 0, tzinfo=timezone.utc),
|
||
relevance_score=2,
|
||
summary_ru="Местная пекарня испекла гигантский хлеб.",
|
||
anomalies_detected=[]
|
||
)
|
||
|
||
item3 = EnrichedNewsItemDTO(
|
||
title="NVIDIA reveals RTX 5090 with WebGPU support",
|
||
url="https://example.com/nvidia-rtx-5090",
|
||
content_text="NVIDIA's new RTX 5090 GPU fully accelerates WebGPU workloads for advanced edge AI applications.",
|
||
source="GPUWeekly",
|
||
timestamp=datetime(2023, 11, 3, 14, 0, tzinfo=timezone.utc),
|
||
relevance_score=10,
|
||
summary_ru="NVIDIA представила RTX 5090 с поддержкой WebGPU.",
|
||
anomalies_detected=["WebGPU", "Edge AI"]
|
||
)
|
||
|
||
# 2. Act
|
||
await chroma_store.store(item1)
|
||
await chroma_store.store(item2)
|
||
await chroma_store.store(item3)
|
||
|
||
# Search for AI and chip related news
|
||
search_results = await chroma_store.search("AI processor and GPU", limit=2)
|
||
|
||
# 3. Assert
|
||
assert len(search_results) == 2
|
||
|
||
# Expected: The Apple M4 chip and NVIDIA RTX 5090 are highly relevant to AI/GPU
|
||
titles = [res.title for res in search_results]
|
||
assert "NVIDIA reveals RTX 5090 with WebGPU support" in titles
|
||
assert "Apple announces new M4 chip" in titles
|
||
assert "Local bakery makes giant bread" not in titles
|
||
|
||
# Check if properties are correctly restored for one of the items
|
||
for res in search_results:
|
||
if "NVIDIA" in res.title:
|
||
assert res.relevance_score == 10
|
||
assert "WebGPU" in res.anomalies_detected
|
||
assert "Edge AI" in res.anomalies_detected
|
||
assert "NVIDIA's new RTX 5090" in res.content_text
|
||
assert res.source == "GPUWeekly"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_search_empty_store(chroma_store: ChromaStore):
|
||
results = await chroma_store.search("test query", limit=5)
|
||
assert len(results) == 0
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_store_upsert(chroma_store: ChromaStore):
|
||
item1 = EnrichedNewsItemDTO(
|
||
title="Apple announces new M4 chip",
|
||
url="https://example.com/apple-m4",
|
||
content_text="Apple has announced its newest M4 chip for next generation Macs.",
|
||
source="TechNews",
|
||
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
|
||
relevance_score=9,
|
||
summary_ru="Apple анонсировала новый чип M4.",
|
||
anomalies_detected=["NPU acceleration"]
|
||
)
|
||
|
||
# Store first time
|
||
await chroma_store.store(item1)
|
||
results = await chroma_store.search("Apple", limit=5)
|
||
assert len(results) == 1
|
||
assert results[0].relevance_score == 9
|
||
|
||
# Modify item and store again (same URL, should upsert)
|
||
item1_updated = item1.model_copy()
|
||
item1_updated.relevance_score = 10
|
||
item1_updated.summary_ru = "Apple анонсировала чип M4. Обновлено."
|
||
|
||
await chroma_store.store(item1_updated)
|
||
results_updated = await chroma_store.search("Apple", limit=5)
|
||
|
||
# Should still be 1 item, but updated
|
||
assert len(results_updated) == 1
|
||
assert results_updated[0].relevance_score == 10
|
||
assert results_updated[0].summary_ru == "Apple анонсировала чип M4. Обновлено."
|