- Add specialized prompt branch for research papers and SOTA detection - Improve Russian summarization quality for technical abstracts - Update relevance scoring to prioritize NPU/Edge AI breakthroughs - Add README.md with project overview
141 lines
5.3 KiB
Python
141 lines
5.3 KiB
Python
import os
|
|
import pytest
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock, patch
|
|
from src.crawlers.dto import NewsItemDTO
|
|
from src.processor.dto import EnrichedNewsItemDTO
|
|
from src.processor.ollama_provider import OllamaProvider
|
|
|
|
@pytest.fixture
|
|
def sample_news_item():
|
|
return NewsItemDTO(
|
|
title="Test News",
|
|
url="http://example.com",
|
|
content_text="This is a test article about AI and NPU acceleration.",
|
|
source="Test Source",
|
|
timestamp=datetime.now()
|
|
)
|
|
|
|
def create_mock_session(mock_response_json):
|
|
class MockResponse:
|
|
async def __aenter__(self):
|
|
return self
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
pass
|
|
async def json(self):
|
|
return mock_response_json
|
|
def raise_for_status(self):
|
|
pass
|
|
|
|
class MockSession:
|
|
async def __aenter__(self):
|
|
return self
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
pass
|
|
def post(self, url, **kwargs):
|
|
return MockResponse()
|
|
|
|
return MockSession()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ollama_provider_analyze_success(sample_news_item):
|
|
os.environ['OLLAMA_API_URL'] = 'http://localhost:11434/api/generate'
|
|
mock_response_json = {
|
|
"response": '{"relevance_score": 8, "summary_ru": "Тестовая статья про ИИ.", "anomalies_detected": ["NPU acceleration"], "category": "Edge AI"}'
|
|
}
|
|
|
|
provider = OllamaProvider()
|
|
with patch('aiohttp.ClientSession', return_value=create_mock_session(mock_response_json)):
|
|
result = await provider.analyze(sample_news_item)
|
|
|
|
assert isinstance(result, EnrichedNewsItemDTO)
|
|
assert result.title == "Test News"
|
|
assert result.relevance_score == 8
|
|
assert result.summary_ru == "Тестовая статья про ИИ."
|
|
assert result.anomalies_detected == ["NPU acceleration"]
|
|
assert result.category == "Edge AI"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ollama_provider_analyze_empty_response(sample_news_item):
|
|
os.environ['OLLAMA_API_URL'] = 'http://localhost:11434/api/generate'
|
|
mock_response_json = {
|
|
"response": ""
|
|
}
|
|
|
|
provider = OllamaProvider()
|
|
with patch('aiohttp.ClientSession', return_value=create_mock_session(mock_response_json)):
|
|
result = await provider.analyze(sample_news_item)
|
|
|
|
assert isinstance(result, EnrichedNewsItemDTO)
|
|
assert result.relevance_score == 0
|
|
assert result.summary_ru == ""
|
|
assert result.anomalies_detected == []
|
|
assert result.category == "Other"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ollama_provider_analyze_malformed_json(sample_news_item):
|
|
os.environ['OLLAMA_API_URL'] = 'http://localhost:11434/api/generate'
|
|
mock_response_json = {
|
|
"response": "{ invalid json"
|
|
}
|
|
|
|
provider = OllamaProvider()
|
|
with patch('aiohttp.ClientSession', return_value=create_mock_session(mock_response_json)):
|
|
result = await provider.analyze(sample_news_item)
|
|
|
|
assert isinstance(result, EnrichedNewsItemDTO)
|
|
assert result.relevance_score == 0
|
|
assert "Error parsing LLM response" in result.summary_ru
|
|
assert result.anomalies_detected == []
|
|
assert result.category == "Other"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ollama_provider_analyze_markdown_json(sample_news_item):
|
|
os.environ['OLLAMA_API_URL'] = 'http://localhost:11434/api/generate'
|
|
mock_response_json = {
|
|
"response": "```json\n{\"relevance_score\": 5, \"summary_ru\": \"Markdown test\", \"anomalies_detected\": [], \"category\": \"Browsers\"}\n```"
|
|
}
|
|
|
|
provider = OllamaProvider()
|
|
with patch('aiohttp.ClientSession', return_value=create_mock_session(mock_response_json)):
|
|
result = await provider.analyze(sample_news_item)
|
|
|
|
assert isinstance(result, EnrichedNewsItemDTO)
|
|
assert result.relevance_score == 5
|
|
assert result.summary_ru == "Markdown test"
|
|
assert result.anomalies_detected == []
|
|
assert result.category == "Browsers"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ollama_provider_academic_content():
|
|
os.environ['OLLAMA_API_URL'] = 'http://localhost:11434/api/generate'
|
|
academic_item = NewsItemDTO(
|
|
title="Attention Is All You Need",
|
|
url="https://arxiv.org/abs/1706.03762",
|
|
content_text="The dominant sequence transduction models...",
|
|
source="ArXiv",
|
|
timestamp=datetime.now()
|
|
)
|
|
mock_response_json = {
|
|
"response": '{"relevance_score": 10, "summary_ru": "Революционная архитектура Transformer.", "anomalies_detected": ["SOTA"], "category": "Academic/SOTA"}'
|
|
}
|
|
|
|
provider = OllamaProvider()
|
|
with patch('aiohttp.ClientSession', return_value=create_mock_session(mock_response_json)):
|
|
result = await provider.analyze(academic_item)
|
|
|
|
assert result.relevance_score == 10
|
|
assert result.category == "Academic/SOTA"
|
|
assert "Transformer" in result.summary_ru
|
|
|
|
def test_ollama_provider_get_info():
|
|
os.environ['OLLAMA_API_URL'] = 'http://test-url:11434'
|
|
os.environ['OLLAMA_MODEL'] = 'test-model'
|
|
provider = OllamaProvider()
|
|
info = provider.get_info()
|
|
|
|
assert info["model"] == "test-model"
|
|
assert info["base_url"] == "http://test-url:11434"
|
|
assert info["prompt_summary"] == "Russian summary, 2 sentences, R&D focus"
|
|
|