Complete digest system

2026-01-06 10:01:24 +01:00
parent 3f2d58b257
commit 252ba9f5cc
6 changed files with 751 additions and 224 deletions
--- a/publish_digest.rb
+++ b/publish_digest.rb
@@ -0,0 +1,542 @@
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
+#
+# Zprávobot.news - AI Daily Digest Publisher
+# Version: 1.0.1 (Cloudron - Direct HTTP)
+# 
+# Generates and publishes daily digest posts to Mastodon bots:
+# - @zpravobot (7:30) - neutral overview
+# - @pozitivni (12:00) - positive news
+# - @sarkasticky (19:00) - sarcastic commentary
+
+require 'csv'
+require 'json'
+require 'time'
+require 'net/http'
+require 'uri'
+require 'optparse'
+
+# ==========================================
+# CONFIGURATION
+# ==========================================
+
+MASTODON_URL = 'https://zpravobot.news'
+CSV_PATH = '/app/data/posts-latest.csv'
+ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages'
+
+BOTS = {
+  'zpravobot' => {
+    token: ENV['ZPRAVOBOT_TOKEN'],
+    style: 'neutral',
+    time_slot: 'morning',
+    hashtags: '#zpravobot #trendydne'
+  },
+  'pozitivni' => {
+    token: ENV['POZITIVNI_TOKEN'],
+    style: 'positive',
+    time_slot: 'noon',
+    hashtags: '#dobréZprávy #zpravobot'
+  },
+  'sarkasticky' => {
+    token: ENV['SARKASTICKY_TOKEN'],
+    style: 'sarcastic',
+    time_slot: 'evening',
+    hashtags: '#realita #zpravobot'
+  }
+}
+
+# ==========================================
+# COMMAND LINE PARSING
+# ==========================================
+
+options = {}
+OptionParser.new do |opts|
+  opts.banner = "Usage: publish_digest.rb [options]"
+  
+  opts.on("--bot BOT", String, "Bot name (zpravobot, pozitivni, sarkasticky)") do |b|
+    options[:bot] = b
+  end
+  
+  opts.on("--dry-run", "Test mode - don't actually publish") do
+    options[:dry_run] = true
+  end
+  
+  opts.on("--date DATE", String, "Process specific date (YYYY-MM-DD)") do |d|
+    options[:date] = d
+  end
+  
+  opts.on("-h", "--help", "Show this help") do
+    puts opts
+    exit
+  end
+end.parse!
+
+bot_name = options[:bot]
+
+unless bot_name && BOTS.key?(bot_name)
+  puts "❌ ERROR: Invalid bot name. Use: zpravobot, pozitivni, or sarkasticky"
+  exit 1
+end
+
+config = BOTS[bot_name]
+
+# Validate environment
+unless config[:token]
+  puts "❌ ERROR: Missing token for @#{bot_name}"
+  puts "   Set environment variable: #{bot_name.upcase}_TOKEN"
+  exit 1
+end
+
+unless ENV['ANTHROPIC_API_KEY']
+  puts "❌ ERROR: Missing ANTHROPIC_API_KEY"
+  exit 1
+end
+
+# ==========================================
+# UTILITIES
+# ==========================================
+
+def log(message)
+  timestamp = Time.now.strftime('%Y-%m-%d %H:%M:%S')
+  puts "[#{timestamp}] #{message}"
+end
+
+def extract_url(text)
+  text[/https?:\/\/[^\s<>"]+/]
+end
+
+# ==========================================
+# DATA LOADING
+# ==========================================
+
+def load_posts_from_csv(date = nil)
+  target_date = date || (Time.now - 86400).strftime('%Y-%m-%d')
+  
+  unless File.exist?(CSV_PATH)
+    log "❌ CSV file not found: #{CSV_PATH}"
+    exit 1
+  end
+  
+  posts = []
+  
+  CSV.foreach(CSV_PATH, headers: true, encoding: 'utf-8') do |row|
+    begin
+      created = Time.parse(row['created_at'])
+      
+      if created.strftime('%Y-%m-%d') == target_date
+        posts << {
+          'text' => row['text'],
+          'url' => row['url'] || '',
+          'created_at' => row['created_at']
+        }
+      end
+    rescue => e
+      # Skip problematic rows
+      next
+    end
+  end
+  
+  log "📊 Loaded #{posts.size} posts from #{target_date}"
+  
+  if posts.empty?
+    log "⚠️  No posts found for #{target_date}"
+    exit 1
+  end
+  
+  posts
+end
+
+# ==========================================
+# TOPIC EXTRACTION
+# ==========================================
+
+def extract_topics(posts)
+  topics = Hash.new { |h, k| h[k] = [] }
+  
+  posts.each do |post|
+    text = post['text'].downcase
+    
+    # Add URL to post if not present
+    post['extracted_url'] = extract_url(post['text']) || post['url']
+    
+    # Categorize by topic
+    if text.match?(/trump|venezuela|maduro|grónsko|greenland|usa|bílý dům/)
+      topics['🌍 Zahraniční politika'] << post
+    elsif text.match?(/hokej|extraliga|nhl|ms u20/)
+      topics['🏒 Hokej'] << post
+    elsif text.match?(/fotbal|chelsea|liga|gól|penalty/)
+      topics['⚽ Fotbal'] << post
+    elsif text.match?(/film|seriál|stranger things|hudba|koncert|festival|netflix/)
+      topics['🎬 Kultura'] << post
+    elsif text.match?(/počasí|teplota|mráz|sníh|déšť/)
+      topics['❄️ Počasí'] << post
+    elsif text.match?(/politika|parlament|vláda|ministr/)
+      topics['🏛️ Politika'] << post
+    elsif text.match?(/ekonomika|koruna|inflace|mzdy|ceny/)
+      topics['💼 Ekonomika'] << post
+    end
+  end
+  
+  # Sort by post count
+  topics = topics.sort_by { |_, posts| -posts.size }.to_h
+  
+  log "🔍 Found #{topics.size} topics:"
+  topics.each { |topic, posts| log "   #{topic}: #{posts.size} posts" }
+  
+  topics
+end
+
+# ==========================================
+# CONTENT FILTERING BY STYLE
+# ==========================================
+
+def filter_topics_by_style(topics, style)
+  case style
+  when 'neutral'
+    topics
+    
+  when 'positive'
+    positive_topics = {}
+    
+    topics.each do |topic, posts|
+      next if topic.include?('Politika') || topic.include?('Zahraniční')
+      
+      positive_posts = posts.select do |post|
+        text = post['text'].downcase
+        has_positive = text.match?(/úspěch|vítěz|rekord|festival|koncert|ocenění|talent/)
+        no_negative = !text.match?(/nehoda|smrt|tragédie|havárie|konflikt|krize/)
+        has_positive && no_negative
+      end
+      
+      positive_topics[topic] = positive_posts unless positive_posts.empty?
+    end
+    
+    log "💚 Filtered to #{positive_topics.size} positive topics"
+    positive_topics
+    
+  when 'sarcastic'
+    sarcastic_topics = {}
+    
+    topics.each do |topic, posts|
+      if topic.include?('Zahraniční') || topic.include?('Politika')
+        sarcastic_topics[topic] = posts
+      end
+    end
+    
+    if sarcastic_topics.size < 3
+      topics.each do |topic, posts|
+        break if sarcastic_topics.size >= 5
+        sarcastic_topics[topic] = posts unless sarcastic_topics.key?(topic)
+      end
+    end
+    
+    log "😏 Selected #{sarcastic_topics.size} topics for sarcasm"
+    sarcastic_topics
+    
+  else
+    topics
+  end
+end
+
+# ==========================================
+# CLAUDE API ANALYSIS
+# ==========================================
+
+def analyze_with_claude(posts, topics)
+  log "🤖 Analyzing with Claude API..."
+  
+  topic_summary = topics.map { |topic, posts| "#{topic}: #{posts.size}" }.join(', ')
+  sample_texts = posts[0..49].map { |p| p['text'][0..150] }
+  
+  prompt = <<~PROMPT
+    Analyzuj #{posts.size} českých/slovenských zpráv z Mastodon instance Zprávobot.news.
+    
+    Témata: #{topic_summary}
+    
+    Ukázka textů:
+    #{sample_texts[0..9].join("\n---\n")}
+    
+    Vrať POUZE JSON (žádný markdown):
+    {
+      "main_topics": ["téma1", "téma2", "téma3"],
+      "sentiment": "neutral|positive|negative",
+      "notable_events": ["událost1", "událost2"]
+    }
+  PROMPT
+  
+  uri = URI(ANTHROPIC_API_URL)
+  request = Net::HTTP::Post.new(uri)
+  request['anthropic-version'] = '2023-06-01'
+  request['content-type'] = 'application/json'
+  request['x-api-key'] = ENV['ANTHROPIC_API_KEY']
+  
+  request.body = {
+    model: 'claude-sonnet-4-20250514',
+    max_tokens: 1000,
+    messages: [
+      { role: 'user', content: prompt }
+    ]
+  }.to_json
+  
+  response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
+    http.request(request)
+  end
+  
+  if response.code != '200'
+    log "⚠️  Claude API error: #{response.code}"
+    return default_analysis(topics)
+  end
+  
+  data = JSON.parse(response.body)
+  text = data['content'][0]['text']
+  
+  analysis = JSON.parse(text.gsub(/```json|```/, '').strip)
+  log "✅ Claude analysis complete"
+  analysis
+  
+rescue => e
+  log "⚠️  Claude API error: #{e.message}"
+  default_analysis(topics)
+end
+
+def default_analysis(topics)
+  {
+    'main_topics' => topics.keys[0..2],
+    'sentiment' => 'neutral',
+    'notable_events' => []
+  }
+end
+
+# ==========================================
+# TOOT GENERATION
+# ==========================================
+
+def generate_summary_toot(posts_count, topics, style, hashtags)
+  date = (Time.now - 86400).strftime('%d.%m.%Y')
+  
+  topic_lines = topics.keys[0..4].map do |topic|
+    count = topics[topic].size
+    "#{topic} (#{count}#{style == 'sarcastic' ? '×' : ' postů'})"
+  end
+  
+  case style
+  when 'neutral'
+    summary = <<~TOOT
+      📊 TRENDY DNE (#{date})
+      
+      Zpracováno #{posts_count} postů:
+      
+      #{topic_lines.join("\n")}
+      
+      #{hashtags}
+      
+      👇 Odkazy na vybrané články
+    TOOT
+    
+  when 'positive'
+    summary = <<~TOOT
+      ☀️ DOBRÉ ZPRÁVY DNE (#{date})
+      
+      Z dnešních #{posts_count} zpráv vybrané momenty:
+      
+      #{topic_lines[0..3].join("\n")}
+      
+      #{hashtags}
+      
+      👇 Inspirace na čtení
+    TOOT
+    
+  when 'sarcastic'
+    summary = <<~TOOT
+      😏 DNEŠNÍ REALITA (#{date})
+      
+      #{posts_count} postů = co se stalo?
+      
+      #{topic_lines[0..3].join("\n")}
+      
+      #{hashtags}
+      
+      👇 Důkazy zmaru
+    TOOT
+  end
+  
+  if summary.length > 500
+    summary = summary[0..496] + "..."
+  end
+  
+  summary.strip
+end
+
+def generate_links_toot(topics, style)
+  links = []
+  max_topics = 5
+  max_links_per_topic = 2
+  
+  topics.keys[0...max_topics].each do |topic|
+    posts = topics[topic]
+    links << "\n#{topic}:"
+    
+    selected = []
+    selected << posts[0] if posts[0]
+    selected << posts[posts.size / 2] if posts.size > 1
+    
+    selected[0...max_links_per_topic].each do |post|
+      title = post['text'].split("\n")[0][0..50].strip
+      title = title.gsub(/\s+/, ' ')
+      
+      url = post['extracted_url']
+      next unless url && !url.empty?
+      
+      short_url = url.gsub(/https?:\/\//, '')
+      short_url = short_url[0..37] + '...' if short_url.length > 40
+      
+      links << "• #{title}..."
+      links << "  🔗 #{short_url}"
+    end
+  end
+  
+  case style
+  when 'neutral'
+    header = "📌 VYBRANÉ ČLÁNKY DNE:"
+    footer = "\n#články #zprávy"
+    
+  when 'positive'
+    header = "💚 POZITIVNÍ PŘÍBĚHY DNE:"
+    footer = "\n💙 Máte skvělý den!\n#inspirace"
+    
+  when 'sarcastic'
+    header = "🤡 \"BREAKING NEWS\" DNE:"
+    footer = "\n🙃 Zítra: repeat\n#sarkasmus"
+  end
+  
+  toot = header + links.join("\n") + footer
+  
+  if toot.length > 500
+    truncated_links = links[0..(links.size * 2 / 3)]
+    toot = header + truncated_links.join("\n") + footer
+    
+    if toot.length > 500
+      toot = toot[0..496] + "..."
+    end
+  end
+  
+  toot.strip
+end
+
+# ==========================================
+# MASTODON PUBLISHING (DIRECT HTTP)
+# ==========================================
+
+def publish_thread(bot_name, summary_toot, links_toot, dry_run: false)
+  config = BOTS[bot_name]
+  
+  log "📤 Publishing thread for @#{bot_name}..."
+  
+  if dry_run
+    log "🧪 DRY RUN MODE - Not actually publishing"
+    log "\n--- TOOT 1/2 (#{summary_toot.length} chars) ---"
+    log summary_toot
+    log "\n--- TOOT 2/2 (#{links_toot.length} chars) ---"
+    log links_toot
+    log "\n✅ Dry run complete"
+    return [nil, nil]
+  end
+  
+  # Publish toot 1
+  uri = URI("#{MASTODON_URL}/api/v1/statuses")
+  request = Net::HTTP::Post.new(uri)
+  request['Authorization'] = "Bearer #{config[:token]}"
+  request['Content-Type'] = 'application/json'
+  request.body = { status: summary_toot, visibility: 'public' }.to_json
+  
+  response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
+    http.request(request)
+  end
+  
+  unless response.code == '200'
+    log "❌ ERROR: #{response.body}"
+    exit 1
+  end
+  
+  toot1_data = JSON.parse(response.body)
+  toot1_url = toot1_data['url']
+  toot1_id = toot1_data['id']
+  log "✅ Toot 1/2 published: #{toot1_url}"
+  
+  # Publish toot 2 as reply
+  request2 = Net::HTTP::Post.new(uri)
+  request2['Authorization'] = "Bearer #{config[:token]}"
+  request2['Content-Type'] = 'application/json'
+  request2.body = { 
+    status: links_toot, 
+    in_reply_to_id: toot1_id,
+    visibility: 'public' 
+  }.to_json
+  
+  response2 = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
+    http.request(request2)
+  end
+  
+  log "✅ Toot 2/2 published (thread)"
+  
+  [toot1_data, JSON.parse(response2.body)]
+  
+rescue => e
+  log "❌ ERROR publishing thread: #{e.message}"
+  exit 1
+end
+
+# ==========================================
+# MAIN EXECUTION
+# ==========================================
+
+def main(bot_name, options = {})
+  log "🚀 Starting Daily Digest for @#{bot_name}"
+  log "=" * 60
+  
+  config = BOTS[bot_name]
+  
+  posts = load_posts_from_csv(options[:date])
+  
+  log "\n🔍 Extracting topics..."
+  all_topics = extract_topics(posts)
+  
+  topics = filter_topics_by_style(all_topics, config[:style])
+  
+  if topics.empty?
+    log "⚠️  No suitable topics found for style: #{config[:style]}"
+    exit 1
+  end
+  
+  log "\n🤖 Analyzing with Claude..."
+  analysis = analyze_with_claude(posts, topics)
+  
+  log "\n📝 Generating content..."
+  summary = generate_summary_toot(posts.size, topics, config[:style], config[:hashtags])
+  links = generate_links_toot(topics, config[:style])
+  
+  log "   Summary: #{summary.length} chars"
+  log "   Links: #{links.length} chars"
+  
+  log "\n📤 Publishing to Mastodon..."
+  toot1, toot2 = publish_thread(bot_name, summary, links, dry_run: options[:dry_run])
+  
+  log "\n" + "=" * 60
+  log "✅ Digest complete for @#{bot_name}"
+  
+  unless options[:dry_run]
+    log "🔗 Thread: #{toot1['url']}" if toot1
+  end
+end
+
+# Run main
+begin
+  main(bot_name, options)
+rescue Interrupt
+  log "\n⚠️  Interrupted by user"
+  exit 130
+rescue => e
+  log "❌ FATAL ERROR: #{e.message}"
+  log "   #{e.backtrace[0..4].join("\n   ")}"
+  exit 1
+end