zpravobot-digest/publish_digest.rb

#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
#
# Zprávobot.news - AI Daily Digest Publisher
# Version: 1.0.2 (Fixed URLs)

require 'csv'
require 'json'
require 'time'
require 'net/http'
require 'uri'
require 'optparse'

# ==========================================
# CONFIGURATION
# ==========================================

MASTODON_URL = 'https://zpravobot.news'
CSV_PATH = '/app/data/posts-latest.csv'
ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages'

BOTS = {
  'zpravobot' => {
    token: ENV['ZPRAVOBOT_TOKEN'],
    style: 'neutral',
    time_slot: 'morning',
    hashtags: '#zpravobot #trendydne'
  },
  'pozitivni' => {
    token: ENV['POZITIVNI_TOKEN'],
    style: 'positive',
    time_slot: 'noon',
    hashtags: '#dobréZprávy #zpravobot'
  },
  'sarkasticky' => {
    token: ENV['SARKASTICKY_TOKEN'],
    style: 'sarcastic',
    time_slot: 'evening',
    hashtags: '#realita #zpravobot'
  }
}

# ==========================================
# COMMAND LINE PARSING
# ==========================================

options = {}
OptionParser.new do |opts|
  opts.banner = "Usage: publish_digest.rb [options]"

  opts.on("--bot BOT", String, "Bot name (zpravobot, pozitivni, sarkasticky)") do |b|
    options[:bot] = b
  end

  opts.on("--dry-run", "Test mode - don't actually publish") do
    options[:dry_run] = true
  end

  opts.on("--date DATE", String, "Process specific date (YYYY-MM-DD)") do |d|
    options[:date] = d
  end

  opts.on("-h", "--help", "Show this help") do
    puts opts
    exit
  end
end.parse!

bot_name = options[:bot]

unless bot_name && BOTS.key?(bot_name)
  puts "❌ ERROR: Invalid bot name. Use: zpravobot, pozitivni, or sarkasticky"
  exit 1
end

config = BOTS[bot_name]

# Validate environment
unless config[:token]
  puts "❌ ERROR: Missing token for @#{bot_name}"
  puts "   Set environment variable: #{bot_name.upcase}_TOKEN"
  exit 1
end

unless ENV['ANTHROPIC_API_KEY']
  puts "❌ ERROR: Missing ANTHROPIC_API_KEY"
  exit 1
end

# ==========================================
# UTILITIES
# ==========================================

def log(message)
  timestamp = Time.now.strftime('%Y-%m-%d %H:%M:%S')
  puts "[#{timestamp}] #{message}"
end

def extract_url(text)
  text[/https?:\/\/[^\s<>"]+/]
end

# ==========================================
# DATA LOADING
# ==========================================

def load_posts_from_csv(date = nil)
  target_date = date || (Time.now - 86400).strftime('%Y-%m-%d')

  unless File.exist?(CSV_PATH)
    log "❌ CSV file not found: #{CSV_PATH}"
    exit 1
  end

  posts = []

  CSV.foreach(CSV_PATH, headers: true, encoding: 'utf-8') do |row|
    begin
      created = Time.parse(row['created_at'])

      if created.strftime('%Y-%m-%d') == target_date
        posts << {
          'text' => row['text'],
          'url' => row['url'] || '',
          'created_at' => row['created_at']
        }
      end
    rescue => e
      # Skip problematic rows
      next
    end
  end

  log "📊 Loaded #{posts.size} posts from #{target_date}"

  if posts.empty?
    log "⚠️  No posts found for #{target_date}"
    exit 1
  end

  posts
end

# ==========================================
# TOPIC EXTRACTION
# ==========================================

def extract_topics(posts)
  topics = Hash.new { |h, k| h[k] = [] }

  posts.each do |post|
    text = post['text'].downcase

    # Add URL to post if not present
    post['extracted_url'] = extract_url(post['text']) || post['url']

    # Categorize by topic
    if text.match?(/trump|venezuela|maduro|grónsko|greenland|usa|bílý dům/)
      topics['🌍 Zahraniční politika'] << post
    elsif text.match?(/hokej|extraliga|nhl|ms u20/)
      topics['🏒 Hokej'] << post
    elsif text.match?(/fotbal|chelsea|liga|gól|penalty/)
      topics['⚽ Fotbal'] << post
    elsif text.match?(/film|seriál|stranger things|hudba|koncert|festival|netflix/)
      topics['🎬 Kultura'] << post
    elsif text.match?(/počasí|teplota|mráz|sníh|déšť/)
      topics['❄️ Počasí'] << post
    elsif text.match?(/politika|parlament|vláda|ministr/)
      topics['🏛️ Politika'] << post
    elsif text.match?(/ekonomika|koruna|inflace|mzdy|ceny/)
      topics['💼 Ekonomika'] << post
    end
  end

  # Sort by post count
  topics = topics.sort_by { |_, posts| -posts.size }.to_h

  log "🔍 Found #{topics.size} topics:"
  topics.each { |topic, posts| log "   #{topic}: #{posts.size} posts" }

  topics
end

# ==========================================
# CONTENT FILTERING BY STYLE
# ==========================================

def filter_topics_by_style(topics, style)
  case style
  when 'neutral'
    topics

  when 'positive'
    positive_topics = {}

    topics.each do |topic, posts|
      next if topic.include?('Politika') || topic.include?('Zahraniční')

      positive_posts = posts.select do |post|
        text = post['text'].downcase
        has_positive = text.match?(/úspěch|vítěz|rekord|festival|koncert|ocenění|talent/)
        no_negative = !text.match?(/nehoda|smrt|tragédie|havárie|konflikt|krize/)
        has_positive && no_negative
      end

      positive_topics[topic] = positive_posts unless positive_posts.empty?
    end

    log "💚 Filtered to #{positive_topics.size} positive topics"
    positive_topics

  when 'sarcastic'
    sarcastic_topics = {}

    topics.each do |topic, posts|
      if topic.include?('Zahraniční') || topic.include?('Politika')
        sarcastic_topics[topic] = posts
      end
    end

    if sarcastic_topics.size < 3
      topics.each do |topic, posts|
        break if sarcastic_topics.size >= 5
        sarcastic_topics[topic] = posts unless sarcastic_topics.key?(topic)
      end
    end

    log "😏 Selected #{sarcastic_topics.size} topics for sarcasm"
    sarcastic_topics

  else
    topics
  end
end

# ==========================================
# CLAUDE API ANALYSIS
# ==========================================

def analyze_with_claude(posts, topics)
  log "🤖 Analyzing with Claude API..."

  topic_summary = topics.map { |topic, posts| "#{topic}: #{posts.size}" }.join(', ')
  sample_texts = posts[0..49].map { |p| p['text'][0..150] }

  prompt = <<~PROMPT
    Analyzuj #{posts.size} českých/slovenských zpráv z Mastodon instance Zprávobot.news.

    Témata: #{topic_summary}

    Ukázka textů:
    #{sample_texts[0..9].join("\n---\n")}

    Vrať POUZE JSON (žádný markdown):
    {
      "main_topics": ["téma1", "téma2", "téma3"],
      "sentiment": "neutral|positive|negative",
      "notable_events": ["událost1", "událost2"]
    }
  PROMPT

  uri = URI(ANTHROPIC_API_URL)
  request = Net::HTTP::Post.new(uri)
  request['anthropic-version'] = '2023-06-01'
  request['content-type'] = 'application/json'
  request['x-api-key'] = ENV['ANTHROPIC_API_KEY']

  request.body = {
    model: 'claude-sonnet-4-20250514',
    max_tokens: 1000,
    messages: [
      { role: 'user', content: prompt }
    ]
  }.to_json

  response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
    http.request(request)
  end

  if response.code != '200'
    log "⚠️  Claude API error: #{response.code}"
    return default_analysis(topics)
  end

  data = JSON.parse(response.body)
  text = data['content'][0]['text']

  analysis = JSON.parse(text.gsub(/```json|```/, '').strip)
  log "✅ Claude analysis complete"
  analysis

rescue => e
  log "⚠️  Claude API error: #{e.message}"
  default_analysis(topics)
end

def default_analysis(topics)
  {
    'main_topics' => topics.keys[0..2],
    'sentiment' => 'neutral',
    'notable_events' => []
  }
end

# ==========================================
# TOOT GENERATION
# ==========================================

def generate_summary_toot(posts_count, topics, style, hashtags)
  date = (Time.now - 86400).strftime('%d.%m.%Y')

  topic_lines = topics.keys[0..4].map do |topic|
    count = topics[topic].size
    "#{topic} (#{count}#{style == 'sarcastic' ? '×' : ' postů'})"
  end

  case style
  when 'neutral'
    summary = <<~TOOT
      📊 TRENDY DNE (#{date})

      Zpracováno #{posts_count} postů:

      #{topic_lines.join("\n")}

      #{hashtags}

      👇 Odkazy na vybrané články
    TOOT

  when 'positive'
    summary = <<~TOOT
      ☀️ DOBRÉ ZPRÁVY DNE (#{date})

      Z dnešních #{posts_count} zpráv vybrané momenty:

      #{topic_lines[0..3].join("\n")}

      #{hashtags}

      👇 Inspirace na čtení
    TOOT

  when 'sarcastic'
    summary = <<~TOOT
      😏 DNEŠNÍ REALITA (#{date})

      #{posts_count} postů = co se stalo?

      #{topic_lines[0..3].join("\n")}

      #{hashtags}

      👇 Důkazy zmaru
    TOOT
  end

  if summary.length > 500
    summary = summary[0..496] + "..."
  end

  summary.strip
end

def generate_links_toot(topics, style)
  links = []
  max_topics = 5
  max_links_per_topic = 2

  topics.keys[0...max_topics].each do |topic|
    posts = topics[topic]
    links << "\n#{topic}:"

    # Select diverse posts (first and middle)
    selected = []
    selected << posts[0] if posts[0]
    selected << posts[posts.size / 2] if posts.size > 1

    selected[0...max_links_per_topic].each do |post|
      # Extract title (first line, max 50 chars)
      title = post['text'].split("\n")[0][0..50].strip
      title = title.gsub(/\s+/, ' ')

      url = post['extracted_url']
      next unless url && !url.empty?

      # Use full URL (Mastodon counts it as 23 chars anyway)
      links << "• #{title}..."
      links << "  #{url}"
    end
  end

  # Style-specific header and footer
  case style
  when 'neutral'
    header = "📌 VYBRANÉ ČLÁNKY DNE:"
    footer = "\n#články #zprávy"

  when 'positive'
    header = "💚 POZITIVNÍ PŘÍBĚHY DNE:"
    footer = "\n💙 Máte skvělý den!\n#inspirace"

  when 'sarcastic'
    header = "🤡 \"BREAKING NEWS\" DNE:"
    footer = "\n🙃 Zítra: repeat\n#sarkasmus"
  end

  toot = header + links.join("\n") + footer

  # Ensure under 500 chars
  if toot.length > 500
    # Truncate links section
    truncated_links = links[0..(links.size * 2 / 3)]
    toot = header + truncated_links.join("\n") + footer

    if toot.length > 500
      toot = toot[0..496] + "..."
    end
  end

  toot.strip
end

# ==========================================
# MASTODON PUBLISHING (DIRECT HTTP)
# ==========================================

def publish_thread(bot_name, summary_toot, links_toot, dry_run: false)
  config = BOTS[bot_name]

  log "📤 Publishing thread for @#{bot_name}..."

  if dry_run
    log "🧪 DRY RUN MODE - Not actually publishing"
    log "\n--- TOOT 1/2 (#{summary_toot.length} chars) ---"
    log summary_toot
    log "\n--- TOOT 2/2 (#{links_toot.length} chars) ---"
    log links_toot
    log "\n✅ Dry run complete"
    return [nil, nil]
  end

  # Publish toot 1
  uri = URI("#{MASTODON_URL}/api/v1/statuses")
  request = Net::HTTP::Post.new(uri)
  request['Authorization'] = "Bearer #{config[:token]}"
  request['Content-Type'] = 'application/json'
  request.body = { status: summary_toot, visibility: 'public' }.to_json

  response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
    http.request(request)
  end

  unless response.code == '200'
    log "❌ ERROR: #{response.body}"
    exit 1
  end

  toot1_data = JSON.parse(response.body)
  toot1_url = toot1_data['url']
  toot1_id = toot1_data['id']
  log "✅ Toot 1/2 published: #{toot1_url}"

  # Publish toot 2 as reply
  request2 = Net::HTTP::Post.new(uri)
  request2['Authorization'] = "Bearer #{config[:token]}"
  request2['Content-Type'] = 'application/json'
  request2.body = {
    status: links_toot,
    in_reply_to_id: toot1_id,
    visibility: 'public'
  }.to_json

  response2 = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
    http.request(request2)
  end

  log "✅ Toot 2/2 published (thread)"

  [toot1_data, JSON.parse(response2.body)]

rescue => e
  log "❌ ERROR publishing thread: #{e.message}"
  exit 1
end

# ==========================================
# MAIN EXECUTION
# ==========================================

def main(bot_name, options = {})
  log "🚀 Starting Daily Digest for @#{bot_name}"
  log "=" * 60

  config = BOTS[bot_name]

  posts = load_posts_from_csv(options[:date])

  log "\n🔍 Extracting topics..."
  all_topics = extract_topics(posts)

  topics = filter_topics_by_style(all_topics, config[:style])

  if topics.empty?
    log "⚠️  No suitable topics found for style: #{config[:style]}"
    exit 1
  end

  log "\n🤖 Analyzing with Claude..."
  analysis = analyze_with_claude(posts, topics)

  log "\n📝 Generating content..."
  summary = generate_summary_toot(posts.size, topics, config[:style], config[:hashtags])
  links = generate_links_toot(topics, config[:style])

  log "   Summary: #{summary.length} chars"
  log "   Links: #{links.length} chars"

  log "\n📤 Publishing to Mastodon..."
  toot1, toot2 = publish_thread(bot_name, summary, links, dry_run: options[:dry_run])

  log "\n" + "=" * 60
  log "✅ Digest complete for @#{bot_name}"

  unless options[:dry_run]
    log "🔗 Thread: #{toot1['url']}" if toot1
  end
end

# Run main
begin
  main(bot_name, options)
rescue Interrupt
  log "\n⚠️  Interrupted by user"
  exit 130
rescue => e
  log "❌ FATAL ERROR: #{e.message}"
  log "   #{e.backtrace[0..4].join("\n   ")}"
  exit 1
end