From 9daf07b72d653acd3a4217558ef4c5f9135fa877 Mon Sep 17 00:00:00 2001 From: Artur Mukhamadiev Date: Mon, 16 Mar 2026 13:40:46 +0300 Subject: [PATCH] Update Ollama prompt and crawler sources - crawlers.yml appended with more google scholar topics, removed habr AI - in LLM prompt removed C++ trends relation and changed web rendering to web engine --- src/crawlers.yml | 23 ++++++++++++++++------- src/processor/ollama_provider.py | 4 ++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/crawlers.yml b/src/crawlers.yml index 6bcadbc..c0d8ee0 100644 --- a/src/crawlers.yml +++ b/src/crawlers.yml @@ -1,7 +1,4 @@ crawlers: - - type: rss - url: "https://habr.com/ru/rss/hubs/artificial_intelligence/articles/?fl=ru" - source: "Habr AI" - type: rss url: "https://www.nature.com/nature.rss" source: "Nature" @@ -102,9 +99,21 @@ crawlers: source: "SciRate" - type: scholar url: "https://scholar.google.com/" - source: "Google Scholar" - query: "WebGPU machine learning" + source: "Google Scholar WebGPU" + query: "WebGPU" - type: scholar url: "https://scholar.google.com/" - source: "Google Scholar" - query: "NPU acceleration" \ No newline at end of file + source: "Google Scholar NPU" + query: "NPU acceleration" + - type: scholar + url: "https://scholar.google.com/" + source: "Google Scholar Browsers" + query: "Browsers | Lightweight Web Engine" + - type: scholar + url: "https://scholar.google.com/" + source: "Google Scholar Performance" + query: "Software Optimization" + - type: scholar + url: "https://scholar.google.com/" + source: "Google Scholar BMI" + query: "Brain-machine interface (IoT|Webengine|Linux)" \ No newline at end of file diff --git a/src/processor/ollama_provider.py b/src/processor/ollama_provider.py index 7ffbda4..08a2427 100644 --- a/src/processor/ollama_provider.py +++ b/src/processor/ollama_provider.py @@ -32,11 +32,11 @@ class OllamaProvider(ILLMProvider): "1. 'relevance_score' (integer 0-10): Score the potential impact on our R&D targets.\n" "2. 'summary_ru' (string): A concise technical summary in Russian (2-3 sentences). Explain methodology, core innovation, and practical relevance.\n" "3. 'anomalies_detected' (list of strings): Identify state-of-the-art (SOTA) breakthroughs, strategic disruptions, new standards, or unexpected results. Return [] if none.\n" - "4. 'category' (string): Must be exactly one of: 'WebEngines/Browsers', 'System Tools (SWE)', 'Middleware Platforms', 'Cross-Platform', 'SmartTV/IoT', 'Samsung New Technologies', 'C++ Trends', 'Competitors', 'Academic/SOTA', 'Other'.\n\n" + "4. 'category' (string): Must be exactly one of: 'WebEngines/Browsers', 'System Tools (SWE)', 'Middleware Platforms', 'Cross-Platform', 'SmartTV/IoT', 'Samsung New Technologies', 'Competitors', 'Academic/SOTA', 'Other'.\n\n" "SCORING GUIDELINES ('relevance_score'):\n" "Start with a base score:\n" - "- 9-10 (Core R&D): Breakthroughs in web rendering engines, cross-platform frameworks, modern C++ paradigms relevant to system tools, or SOTA research in web/middleware.\n" + "- 9-10 (Core R&D): Breakthroughs in web engines, cross-platform frameworks, system tools, or SOTA research in web engines/middleware.\n" "- 7-8 (Ecosystem): Solid improvements applicable to Automotive Content Platforms, IoT ecosystems, SmartTV OS, or major SWE tool improvements.\n" "- 4-6 (Peripheral): Theoretical work, general programming news, or technologies with distant industrial application.\n" "- 0-3 (Out of Scope): Pure medicine, social sciences, consumer electronics reviews, pure audio/acoustics.\n\n"