#!/usr/bin/env ruby # -*- coding: utf-8 -*- # # Zprávobot.news - AI Daily Digest Publisher # Version: 1.0.2 (Fixed URLs) require 'csv' require 'json' require 'time' require 'net/http' require 'uri' require 'optparse' # ========================================== # CONFIGURATION # ========================================== MASTODON_URL = 'https://zpravobot.news' CSV_PATH = '/app/data/posts-latest.csv' ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages' BOTS = { 'zpravobot' => { token: ENV['ZPRAVOBOT_TOKEN'], style: 'neutral', time_slot: 'morning', hashtags: '#zpravobot #trendydne' }, 'pozitivni' => { token: ENV['POZITIVNI_TOKEN'], style: 'positive', time_slot: 'noon', hashtags: '#dobréZprávy #zpravobot' }, 'sarkasticky' => { token: ENV['SARKASTICKY_TOKEN'], style: 'sarcastic', time_slot: 'evening', hashtags: '#realita #zpravobot' } } # ========================================== # COMMAND LINE PARSING # ========================================== options = {} OptionParser.new do |opts| opts.banner = "Usage: publish_digest.rb [options]" opts.on("--bot BOT", String, "Bot name (zpravobot, pozitivni, sarkasticky)") do |b| options[:bot] = b end opts.on("--dry-run", "Test mode - don't actually publish") do options[:dry_run] = true end opts.on("--date DATE", String, "Process specific date (YYYY-MM-DD)") do |d| options[:date] = d end opts.on("-h", "--help", "Show this help") do puts opts exit end end.parse! bot_name = options[:bot] unless bot_name && BOTS.key?(bot_name) puts "❌ ERROR: Invalid bot name. Use: zpravobot, pozitivni, or sarkasticky" exit 1 end config = BOTS[bot_name] # Validate environment unless config[:token] puts "❌ ERROR: Missing token for @#{bot_name}" puts " Set environment variable: #{bot_name.upcase}_TOKEN" exit 1 end unless ENV['ANTHROPIC_API_KEY'] puts "❌ ERROR: Missing ANTHROPIC_API_KEY" exit 1 end # ========================================== # UTILITIES # ========================================== def log(message) timestamp = Time.now.strftime('%Y-%m-%d %H:%M:%S') puts "[#{timestamp}] #{message}" end def extract_url(text) text[/https?:\/\/[^\s<>"]+/] end # ========================================== # DATA LOADING # ========================================== def load_posts_from_csv(date = nil) target_date = date || (Time.now - 86400).strftime('%Y-%m-%d') unless File.exist?(CSV_PATH) log "❌ CSV file not found: #{CSV_PATH}" exit 1 end posts = [] CSV.foreach(CSV_PATH, headers: true, encoding: 'utf-8') do |row| begin created = Time.parse(row['created_at']) if created.strftime('%Y-%m-%d') == target_date posts << { 'text' => row['text'], 'url' => row['url'] || '', 'created_at' => row['created_at'] } end rescue => e # Skip problematic rows next end end log "📊 Loaded #{posts.size} posts from #{target_date}" if posts.empty? log "⚠️ No posts found for #{target_date}" exit 1 end posts end # ========================================== # TOPIC EXTRACTION # ========================================== def extract_topics(posts) topics = Hash.new { |h, k| h[k] = [] } posts.each do |post| text = post['text'].downcase # Add URL to post if not present post['extracted_url'] = extract_url(post['text']) || post['url'] # Categorize by topic if text.match?(/trump|venezuela|maduro|grónsko|greenland|usa|bílý dům/) topics['🌍 Zahraniční politika'] << post elsif text.match?(/hokej|extraliga|nhl|ms u20/) topics['🏒 Hokej'] << post elsif text.match?(/fotbal|chelsea|liga|gól|penalty/) topics['⚽ Fotbal'] << post elsif text.match?(/film|seriál|stranger things|hudba|koncert|festival|netflix/) topics['🎬 Kultura'] << post elsif text.match?(/počasí|teplota|mráz|sníh|déšť/) topics['❄️ Počasí'] << post elsif text.match?(/politika|parlament|vláda|ministr/) topics['🏛️ Politika'] << post elsif text.match?(/ekonomika|koruna|inflace|mzdy|ceny/) topics['💼 Ekonomika'] << post end end # Sort by post count topics = topics.sort_by { |_, posts| -posts.size }.to_h log "🔍 Found #{topics.size} topics:" topics.each { |topic, posts| log " #{topic}: #{posts.size} posts" } topics end # ========================================== # CONTENT FILTERING BY STYLE # ========================================== def filter_topics_by_style(topics, style) case style when 'neutral' topics when 'positive' positive_topics = {} topics.each do |topic, posts| next if topic.include?('Politika') || topic.include?('Zahraniční') positive_posts = posts.select do |post| text = post['text'].downcase has_positive = text.match?(/úspěch|vítěz|rekord|festival|koncert|ocenění|talent/) no_negative = !text.match?(/nehoda|smrt|tragédie|havárie|konflikt|krize/) has_positive && no_negative end positive_topics[topic] = positive_posts unless positive_posts.empty? end log "💚 Filtered to #{positive_topics.size} positive topics" positive_topics when 'sarcastic' sarcastic_topics = {} topics.each do |topic, posts| if topic.include?('Zahraniční') || topic.include?('Politika') sarcastic_topics[topic] = posts end end if sarcastic_topics.size < 3 topics.each do |topic, posts| break if sarcastic_topics.size >= 5 sarcastic_topics[topic] = posts unless sarcastic_topics.key?(topic) end end log "😏 Selected #{sarcastic_topics.size} topics for sarcasm" sarcastic_topics else topics end end # ========================================== # CLAUDE API ANALYSIS # ========================================== def analyze_with_claude(posts, topics) log "🤖 Analyzing with Claude API..." topic_summary = topics.map { |topic, posts| "#{topic}: #{posts.size}" }.join(', ') sample_texts = posts[0..49].map { |p| p['text'][0..150] } prompt = <<~PROMPT Analyzuj #{posts.size} českých/slovenských zpráv z Mastodon instance Zprávobot.news. Témata: #{topic_summary} Ukázka textů: #{sample_texts[0..9].join("\n---\n")} Vrať POUZE JSON (žádný markdown): { "main_topics": ["téma1", "téma2", "téma3"], "sentiment": "neutral|positive|negative", "notable_events": ["událost1", "událost2"] } PROMPT uri = URI(ANTHROPIC_API_URL) request = Net::HTTP::Post.new(uri) request['anthropic-version'] = '2023-06-01' request['content-type'] = 'application/json' request['x-api-key'] = ENV['ANTHROPIC_API_KEY'] request.body = { model: 'claude-sonnet-4-20250514', max_tokens: 1000, messages: [ { role: 'user', content: prompt } ] }.to_json response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| http.request(request) end if response.code != '200' log "⚠️ Claude API error: #{response.code}" return default_analysis(topics) end data = JSON.parse(response.body) text = data['content'][0]['text'] analysis = JSON.parse(text.gsub(/```json|```/, '').strip) log "✅ Claude analysis complete" analysis rescue => e log "⚠️ Claude API error: #{e.message}" default_analysis(topics) end def default_analysis(topics) { 'main_topics' => topics.keys[0..2], 'sentiment' => 'neutral', 'notable_events' => [] } end # ========================================== # TOOT GENERATION # ========================================== def generate_summary_toot(posts_count, topics, style, hashtags) date = (Time.now - 86400).strftime('%d.%m.%Y') topic_lines = topics.keys[0..4].map do |topic| count = topics[topic].size "#{topic} (#{count}#{style == 'sarcastic' ? '×' : ' postů'})" end case style when 'neutral' summary = <<~TOOT 📊 TRENDY DNE (#{date}) Zpracováno #{posts_count} postů: #{topic_lines.join("\n")} #{hashtags} 👇 Odkazy na vybrané články TOOT when 'positive' summary = <<~TOOT ☀️ DOBRÉ ZPRÁVY DNE (#{date}) Z dnešních #{posts_count} zpráv vybrané momenty: #{topic_lines[0..3].join("\n")} #{hashtags} 👇 Inspirace na čtení TOOT when 'sarcastic' summary = <<~TOOT 😏 DNEŠNÍ REALITA (#{date}) #{posts_count} postů = co se stalo? #{topic_lines[0..3].join("\n")} #{hashtags} 👇 Důkazy zmaru TOOT end if summary.length > 500 summary = summary[0..496] + "..." end summary.strip end def generate_links_toot(topics, style) links = [] max_topics = 5 max_links_per_topic = 2 topics.keys[0...max_topics].each do |topic| posts = topics[topic] links << "\n#{topic}:" # Select diverse posts (first and middle) selected = [] selected << posts[0] if posts[0] selected << posts[posts.size / 2] if posts.size > 1 selected[0...max_links_per_topic].each do |post| # Extract title (first line, max 50 chars) title = post['text'].split("\n")[0][0..50].strip title = title.gsub(/\s+/, ' ') url = post['extracted_url'] next unless url && !url.empty? # Use full URL (Mastodon counts it as 23 chars anyway) links << "• #{title}..." links << " #{url}" end end # Style-specific header and footer case style when 'neutral' header = "📌 VYBRANÉ ČLÁNKY DNE:" footer = "\n#články #zprávy" when 'positive' header = "💚 POZITIVNÍ PŘÍBĚHY DNE:" footer = "\n💙 Máte skvělý den!\n#inspirace" when 'sarcastic' header = "🤡 \"BREAKING NEWS\" DNE:" footer = "\n🙃 Zítra: repeat\n#sarkasmus" end toot = header + links.join("\n") + footer # Ensure under 500 chars if toot.length > 500 # Truncate links section truncated_links = links[0..(links.size * 2 / 3)] toot = header + truncated_links.join("\n") + footer if toot.length > 500 toot = toot[0..496] + "..." end end toot.strip end # ========================================== # MASTODON PUBLISHING (DIRECT HTTP) # ========================================== def publish_thread(bot_name, summary_toot, links_toot, dry_run: false) config = BOTS[bot_name] log "📤 Publishing thread for @#{bot_name}..." if dry_run log "🧪 DRY RUN MODE - Not actually publishing" log "\n--- TOOT 1/2 (#{summary_toot.length} chars) ---" log summary_toot log "\n--- TOOT 2/2 (#{links_toot.length} chars) ---" log links_toot log "\n✅ Dry run complete" return [nil, nil] end # Publish toot 1 uri = URI("#{MASTODON_URL}/api/v1/statuses") request = Net::HTTP::Post.new(uri) request['Authorization'] = "Bearer #{config[:token]}" request['Content-Type'] = 'application/json' request.body = { status: summary_toot, visibility: 'public' }.to_json response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| http.request(request) end unless response.code == '200' log "❌ ERROR: #{response.body}" exit 1 end toot1_data = JSON.parse(response.body) toot1_url = toot1_data['url'] toot1_id = toot1_data['id'] log "✅ Toot 1/2 published: #{toot1_url}" # Publish toot 2 as reply request2 = Net::HTTP::Post.new(uri) request2['Authorization'] = "Bearer #{config[:token]}" request2['Content-Type'] = 'application/json' request2.body = { status: links_toot, in_reply_to_id: toot1_id, visibility: 'public' }.to_json response2 = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| http.request(request2) end log "✅ Toot 2/2 published (thread)" [toot1_data, JSON.parse(response2.body)] rescue => e log "❌ ERROR publishing thread: #{e.message}" exit 1 end # ========================================== # MAIN EXECUTION # ========================================== def main(bot_name, options = {}) log "🚀 Starting Daily Digest for @#{bot_name}" log "=" * 60 config = BOTS[bot_name] posts = load_posts_from_csv(options[:date]) log "\n🔍 Extracting topics..." all_topics = extract_topics(posts) topics = filter_topics_by_style(all_topics, config[:style]) if topics.empty? log "⚠️ No suitable topics found for style: #{config[:style]}" exit 1 end log "\n🤖 Analyzing with Claude..." analysis = analyze_with_claude(posts, topics) log "\n📝 Generating content..." summary = generate_summary_toot(posts.size, topics, config[:style], config[:hashtags]) links = generate_links_toot(topics, config[:style]) log " Summary: #{summary.length} chars" log " Links: #{links.length} chars" log "\n📤 Publishing to Mastodon..." toot1, toot2 = publish_thread(bot_name, summary, links, dry_run: options[:dry_run]) log "\n" + "=" * 60 log "✅ Digest complete for @#{bot_name}" unless options[:dry_run] log "🔗 Thread: #{toot1['url']}" if toot1 end end # Run main begin main(bot_name, options) rescue Interrupt log "\n⚠️ Interrupted by user" exit 130 rescue => e log "❌ FATAL ERROR: #{e.message}" log " #{e.backtrace[0..4].join("\n ")}" exit 1 end