diff --git a/src/processor/ollama_provider.py b/src/processor/ollama_provider.py index 95ac536..ed44b28 100644 --- a/src/processor/ollama_provider.py +++ b/src/processor/ollama_provider.py @@ -21,62 +21,35 @@ class OllamaProvider(ILLMProvider): base_url = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434') url = base_url if base_url.endswith( '/api/generate') else f"{base_url.rstrip('/')}/api/generate" - if news_item.source in ["C++ Russia", "cppconf"]: - prompt = ( - "Analyze this C++ conference talk abstract. Extract the primary C++ trends discussed " - "(e.g., C++20/26 concepts, memory safety, coroutines, heterogeneous computing).\n\n" - f"Title: {news_item.title}\nContent: {news_item.content_text}\n\n" - "Return a JSON object strictly with these keys:\n" - "1. 'relevance_score' (integer 0-10): Indicate its importance to the modern C++ ecosystem.\n" - "2. 'summary_ru' (string): A concise 2-sentence summary in Russian.\n" - "3. 'anomalies_detected' (list of strings): Any bleeding-edge tech, controversial topics, or Rust comparisons.\n" - "4. 'category' (string): Must be exactly 'C++ Trends'.\n" - ) - elif "Scholar" in news_item.source or news_item.source == "SciRate" or "arxiv" in news_item.url.lower(): - prompt = ( - "Act as a Senior Research Scientist and Strategic Tech Scout. Analyze this academic research abstract.\n\n" - f"Title: {news_item.title}\nContent: {news_item.content_text}\n\n" - "Return a JSON object strictly with these keys:\n" - "1. 'relevance_score' (integer 0-10): Score the potential impact on industrial R&D (Edge AI, NPU acceleration, WebEngines).\n" - "2. 'summary_ru' (string): A technical summary in Russian (2-3 sentences). Explain the methodology, core innovation, and practical relevance.\n" - "3. 'anomalies_detected' (list of strings): Identify if this is State-of-the-art (SOTA) improvement, a paradigm shift, or unexpected results.\n" - "4. 'category' (string): Must be exactly 'Academic/SOTA'.\n\n" - "SCORING GUIDELINES:\n" - "- 9-10: SOTA breakthrough in NPU/AI efficiency, new web standards, or major SWE tool improvements.\n" - "- 7-8: Solid research with clear application in SmartTV, IoT, or Browsers.\n" - "- 4-6: Theoretical work with distant industrial application.\n" - "- 0-3: Out of scope (e.g., pure medicine, social sciences, or consumer electronics reviews).\n" - ) - else: - prompt = ( - "Act as a Strategic Tech Scout for an R&D department specializing in WebEngine (Chromium) extensions, " - "cross-platform porting, Middleware platform solutions, and System Tools (SWE) for developers.\n\n" - f"Analyze the following article or research abstract.\nTitle: {news_item.title}\nContent: {news_item.content_text}\n\n" - - "Return a JSON object with: 'relevance_score' (integer 0-10), 'summary_ru' (string), " - "'anomalies_detected' (list of strings), and 'category' (string).\n\n" - - "OUTPUT RULES:\n" - "1. 'summary_ru': MUST be in Russian and strictly NO MORE than 2-3 sentences. " - "For academic/research papers, provide a technical summary of the methodology, key findings, and potential R&D application. " - "Focus on the technological or business value for an R&D team.\n" - "2. 'category': Must be exactly one of: 'Browsers', 'Edge AI', 'SmartTV', 'Samsung New Technologies', 'Middleware new trends', 'Competitors', 'Academic/SOTA', 'Other'.\n\n" - - "SCORING LOGIC ('relevance_score'):\n" - "- Score 9-10 (Core R&D): Breakthroughs in web rendering engines, new cross-platform porting frameworks, " - "Edge AI/NPU integration at the middleware level, State-of-the-art (SOTA) research in AI/ML/NPU acceleration, " - "or disruptive software developer tools (SWE).\n" - "- Score 7-8 (Ecosystem): Technologies highly applicable to Automotive Content Platforms, IoT ecosystems, " - "or major SmartTV OS updates. Peer-reviewed research with clear industrial application or architectural improvements.\n" - "- Score 3-6 (Peripheral): General news in Robotics, Medical Displays, or HVAC.\n" - "- Score 0 (Excluded): Pure Audio/Acoustic technologies, or consumer-level updates about standalone laptops.\n\n" - - "ANOMALY DETECTION ('anomalies_detected'):\n" - "Do not just summarize. Look for strategic or architectural disruptions. Examples: " - "a competitor abandoning a proprietary OS for Chromium, sudden new industry standards in IoT/Middleware, " - "unexpected convergence of WebTech with hardware, or research that significantly outperforms current SOTA. " - "Ignore technical text formatting issues. Return an empty list [] if no strategic anomalies are found." - ) + prompt = ( + "Act as a Strategic Tech Scout for an R&D department specializing in WebEngine (Chromium) extensions, " + "cross-platform porting, Middleware platform solutions, and System Tools (SWE) for developers. " + "Evaluate ALL articles, including C++ conference talks and academic research, based on their value to these specific targets.\n\n" + + f"Analyze the following article or research abstract.\nTitle: {news_item.title}\nSource: {news_item.source}\nContent: {news_item.content_text}\n\n" + + "Return a JSON object strictly with these keys:\n" + "1. 'relevance_score' (integer 0-10): Score the potential impact on our R&D targets.\n" + "2. 'summary_ru' (string): A concise technical summary in Russian (2-3 sentences). Explain methodology, core innovation, and practical relevance.\n" + "3. 'anomalies_detected' (list of strings): Identify state-of-the-art (SOTA) breakthroughs, strategic disruptions, new standards, or unexpected results. Return [] if none.\n" + "4. 'category' (string): Must be exactly one of: 'Browsers', 'SmartTV', 'Samsung New Technologies', 'Middleware new trends', 'Competitors', 'Academic/SOTA', 'C++ Trends', 'Edge AI', 'Other'.\n\n" + + "SCORING GUIDELINES ('relevance_score'):\n" + "Start with a base score:\n" + "- 9-10 (Core R&D): Breakthroughs in web rendering engines, cross-platform frameworks, modern C++ paradigms relevant to system tools, or SOTA research in web/middleware.\n" + "- 7-8 (Ecosystem): Solid improvements applicable to Automotive Content Platforms, IoT ecosystems, SmartTV OS, or major SWE tool improvements.\n" + "- 4-6 (Peripheral): Theoretical work, general programming news, or technologies with distant industrial application.\n" + "- 0-3 (Out of Scope): Pure medicine, social sciences, consumer electronics reviews, pure audio/acoustics.\n\n" + + "AI PENALTY (CRITICAL):\n" + "AI mentions are unwanted. Penalize the 'relevance_score' if the article is about AI:\n" + "- Subtract 2 points for a minor or peripheral AI/ML mention.\n" + "- Subtract 5 points if it is primarily an AI/ML/LLM article.\n" + "Ensure the final score remains between 0 and 10.\n\n" + + "ANOMALY DETECTION ('anomalies_detected'):\n" + "Do not just summarize. Look for strategic/architectural disruptions (e.g., a competitor abandoning a proprietary OS for Chromium, sudden new industry standards, convergence of WebTech with hardware, or research significantly outperforming current SOTA). Ignore technical text formatting issues." + ) payload = { "model": os.environ.get('OLLAMA_MODEL', 'gpt-oss:120b-cloud'), "prompt": prompt,