262 lines
8.7 KiB
Python
262 lines
8.7 KiB
Python
import pytest
|
||
import pytest_asyncio
|
||
import uuid
|
||
from datetime import datetime, timezone
|
||
import chromadb
|
||
from chromadb.config import Settings
|
||
|
||
from src.processor.dto import EnrichedNewsItemDTO
|
||
from src.storage.chroma_store import ChromaStore
|
||
|
||
@pytest_asyncio.fixture
|
||
async def chroma_store():
|
||
# Use EphemeralClient for in-memory testing
|
||
client = chromadb.EphemeralClient(Settings(allow_reset=True))
|
||
client.reset()
|
||
store = ChromaStore(client=client, collection_name="test_collection")
|
||
yield store
|
||
client.reset()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_store_and_search(chroma_store: ChromaStore):
|
||
# 1. Arrange
|
||
item1 = EnrichedNewsItemDTO(
|
||
title="Apple announces new M4 chip",
|
||
url="https://example.com/apple-m4",
|
||
content_text="Apple has announced its newest M4 chip for next generation Macs. This processor brings massive AI improvements.",
|
||
source="TechNews",
|
||
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
|
||
relevance_score=9,
|
||
summary_ru="Apple анонсировала новый чип M4.",
|
||
anomalies_detected=["NPU acceleration"],
|
||
category="Competitors"
|
||
)
|
||
|
||
item2 = EnrichedNewsItemDTO(
|
||
title="Local bakery makes giant bread",
|
||
url="https://example.com/giant-bread",
|
||
content_text="A bakery in town just baked the world's largest loaf of bread, weighing over 1000 pounds.",
|
||
source="LocalNews",
|
||
timestamp=datetime(2023, 11, 2, 10, 0, tzinfo=timezone.utc),
|
||
relevance_score=2,
|
||
summary_ru="Местная пекарня испекла гигантский хлеб.",
|
||
anomalies_detected=[],
|
||
category="Other"
|
||
)
|
||
|
||
item3 = EnrichedNewsItemDTO(
|
||
title="NVIDIA reveals RTX 5090 with WebGPU support",
|
||
url="https://example.com/nvidia-rtx-5090",
|
||
content_text="NVIDIA's new RTX 5090 GPU fully accelerates WebGPU workloads for advanced edge AI applications.",
|
||
source="GPUWeekly",
|
||
timestamp=datetime(2023, 11, 3, 14, 0, tzinfo=timezone.utc),
|
||
relevance_score=10,
|
||
summary_ru="NVIDIA представила RTX 5090 с поддержкой WebGPU.",
|
||
anomalies_detected=["WebGPU", "Edge AI"],
|
||
category="Edge AI"
|
||
)
|
||
|
||
# 2. Act
|
||
await chroma_store.store(item1)
|
||
await chroma_store.store(item2)
|
||
await chroma_store.store(item3)
|
||
|
||
# Search for AI and chip related news
|
||
search_results = await chroma_store.search("AI processor and GPU", limit=2)
|
||
|
||
# 3. Assert
|
||
assert len(search_results) == 2
|
||
|
||
# Expected: The Apple M4 chip and NVIDIA RTX 5090 are highly relevant to AI/GPU
|
||
titles = [res.title for res in search_results]
|
||
assert "NVIDIA reveals RTX 5090 with WebGPU support" in titles
|
||
assert "Apple announces new M4 chip" in titles
|
||
assert "Local bakery makes giant bread" not in titles
|
||
|
||
# Check if properties are correctly restored for one of the items
|
||
for res in search_results:
|
||
if "NVIDIA" in res.title:
|
||
assert res.relevance_score == 10
|
||
assert "WebGPU" in res.anomalies_detected
|
||
assert "Edge AI" in res.anomalies_detected
|
||
assert "NVIDIA's new RTX 5090" in res.content_text
|
||
assert res.source == "GPUWeekly"
|
||
assert res.category == "Edge AI"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_search_empty_store(chroma_store: ChromaStore):
|
||
results = await chroma_store.search("test query", limit=5)
|
||
assert len(results) == 0
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_store_upsert(chroma_store: ChromaStore):
|
||
item1 = EnrichedNewsItemDTO(
|
||
title="Apple announces new M4 chip",
|
||
url="https://example.com/apple-m4",
|
||
content_text="Apple has announced its newest M4 chip for next generation Macs.",
|
||
source="TechNews",
|
||
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
|
||
relevance_score=9,
|
||
summary_ru="Apple анонсировала новый чип M4.",
|
||
anomalies_detected=["NPU acceleration"],
|
||
category="Competitors"
|
||
)
|
||
|
||
# Store first time
|
||
await chroma_store.store(item1)
|
||
results = await chroma_store.search("Apple", limit=5)
|
||
assert len(results) == 1
|
||
assert results[0].relevance_score == 9
|
||
|
||
# Modify item and store again (same URL, should upsert)
|
||
item1_updated = item1.model_copy()
|
||
item1_updated.relevance_score = 10
|
||
item1_updated.summary_ru = "Apple анонсировала чип M4. Обновлено."
|
||
|
||
await chroma_store.store(item1_updated)
|
||
results_updated = await chroma_store.search("Apple", limit=5)
|
||
|
||
# Should still be 1 item, but updated
|
||
assert len(results_updated) == 1
|
||
assert results_updated[0].relevance_score == 10
|
||
assert results_updated[0].summary_ru == "Apple анонсировала чип M4. Обновлено."
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_exists(chroma_store: ChromaStore):
|
||
url = "https://example.com/unique-news-123"
|
||
|
||
# Check that it doesn't exist initially
|
||
assert not await chroma_store.exists(url)
|
||
|
||
item = EnrichedNewsItemDTO(
|
||
title="Test Title",
|
||
url=url,
|
||
content_text="Test content",
|
||
source="TestSource",
|
||
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
|
||
relevance_score=5,
|
||
summary_ru="Тест",
|
||
anomalies_detected=[],
|
||
category="Other"
|
||
)
|
||
|
||
await chroma_store.store(item)
|
||
|
||
# Check that it exists now
|
||
assert await chroma_store.exists(url)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_get_by_id(chroma_store: ChromaStore):
|
||
# 1. Arrange
|
||
url = "https://example.com/get-by-id-test"
|
||
doc_id = str(uuid.uuid5(uuid.NAMESPACE_URL, url))
|
||
|
||
item = EnrichedNewsItemDTO(
|
||
title="ID Test Title",
|
||
url=url,
|
||
content_text="ID Test Content",
|
||
source="IDTestSource",
|
||
timestamp=datetime(2023, 11, 1, 12, 0, tzinfo=timezone.utc),
|
||
relevance_score=7,
|
||
summary_ru="Тест по ID",
|
||
anomalies_detected=["TestAnomaly"],
|
||
category="Testing"
|
||
)
|
||
|
||
# 2. Act
|
||
await chroma_store.store(item)
|
||
|
||
# Try to retrieve by ID
|
||
retrieved_item = await chroma_store.get_by_id(doc_id)
|
||
|
||
# Try to retrieve non-existent ID
|
||
none_item = await chroma_store.get_by_id("non-existent-id")
|
||
|
||
# 3. Assert
|
||
assert retrieved_item is not None
|
||
assert retrieved_item.title == "ID Test Title"
|
||
assert retrieved_item.url == url
|
||
assert retrieved_item.relevance_score == 7
|
||
assert "TestAnomaly" in retrieved_item.anomalies_detected
|
||
assert retrieved_item.category == "Testing"
|
||
|
||
assert none_item is None
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_get_stats(chroma_store: ChromaStore):
|
||
# 1. Arrange
|
||
item1 = EnrichedNewsItemDTO(
|
||
title="Title 1",
|
||
url="https://example.com/1",
|
||
content_text="Content 1",
|
||
source="Source 1",
|
||
timestamp=datetime.now(timezone.utc),
|
||
relevance_score=5,
|
||
summary_ru="Сводка 1",
|
||
anomalies_detected=[],
|
||
category="Tech"
|
||
)
|
||
item2 = EnrichedNewsItemDTO(
|
||
title="Title 2",
|
||
url="https://example.com/2",
|
||
content_text="Content 2",
|
||
source="Source 2",
|
||
timestamp=datetime.now(timezone.utc),
|
||
relevance_score=5,
|
||
summary_ru="Сводка 2",
|
||
anomalies_detected=[],
|
||
category="Tech"
|
||
)
|
||
item3 = EnrichedNewsItemDTO(
|
||
title="Title 3",
|
||
url="https://example.com/3",
|
||
content_text="Content 3",
|
||
source="Source 3",
|
||
timestamp=datetime.now(timezone.utc),
|
||
relevance_score=5,
|
||
summary_ru="Сводка 3",
|
||
anomalies_detected=[],
|
||
category="Science"
|
||
)
|
||
|
||
# 2. Act
|
||
await chroma_store.store(item1)
|
||
await chroma_store.store(item2)
|
||
await chroma_store.store(item3)
|
||
|
||
stats = await chroma_store.get_stats()
|
||
|
||
# 3. Assert
|
||
assert stats["total_count"] == 3
|
||
assert stats["category_Tech"] == 2
|
||
assert stats["category_Science"] == 1
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_search_sorting(chroma_store: ChromaStore):
|
||
# Arrange
|
||
items = [
|
||
EnrichedNewsItemDTO(
|
||
title=f"Title {i}",
|
||
url=f"https://example.com/{i}",
|
||
content_text=f"Content {i}",
|
||
source="Source",
|
||
timestamp=datetime.now(timezone.utc),
|
||
relevance_score=i,
|
||
summary_ru=f"Сводка {i}",
|
||
anomalies_detected=[],
|
||
category="Tech"
|
||
) for i in range(1, 6) # Scores 1 to 5
|
||
]
|
||
|
||
for item in items:
|
||
await chroma_store.store(item)
|
||
|
||
# Act
|
||
results = await chroma_store.search("Content", limit=10)
|
||
|
||
# Assert
|
||
assert len(results) == 5
|
||
# Should be sorted 5, 4, 3, 2, 1
|
||
scores = [r.relevance_score for r in results]
|
||
assert scores == [5, 4, 3, 2, 1]
|