Class: AutoBlogSchemaExtractionWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker, Workers::StatusBroadcastable
Defined in:
app/workers/auto_blog_schema_extraction_worker.rb

Instance Attribute Summary

Attributes included from Workers::StatusBroadcastable

#broadcast_status_updates

Instance Method Summary collapse

Methods included from Workers::StatusBroadcastable::Overrides

#at, #store, #total

Instance Method Details

#perform(limit = nil, article_ids: nil, force_update: false, send_email: true) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'app/workers/auto_blog_schema_extraction_worker.rb', line 7

def perform(limit = nil, article_ids: nil, force_update: false, send_email: true)
  store status: 'processing'

  # Find blog posts that need schema extraction
  articles_needing_schema = find_articles_needing_schema(limit, article_ids, force_update)

  if articles_needing_schema.empty?
    store status: 'completed'
    store articles_processed: 0
    store message: 'No articles found needing schema extraction'
    Rails.logger.info '[AutoBlogSchemaExtractionWorker] No articles found needing schema extraction'

    # Send email report even when no articles found
    if send_email
      send_email_report({
        articles_processed: 0,
        errors: 0,
        total_articles: 0,
        message: 'No articles found needing schema extraction',
        processed_articles: [],
        error_articles: []
      })
    end
    return
  end

  store total_articles: articles_needing_schema.count
  store status: 'processing'

  processed_count = 0
  error_count = 0
  processed_articles = []
  error_articles = []

  Rails.logger.info "Processing #{articles_needing_schema.count} articles..."

  articles_needing_schema.each_with_index do |article, index|
    store current_article: index + 1
    store current_article_title: article.subject

    Rails.logger.info "[AutoBlogSchemaExtractionWorker] Processing article #{article.id}: #{article.subject}"
    print "  [#{index + 1}/#{articles_needing_schema.count}] Processing article #{article.id}: #{article.subject}... "

    begin
      extractor = BlogSchemaExtractor.new(article)
      result = extractor.process

      if result[:error].present?
        error_count += 1
        error_articles << {
          id: article.id,
          title: article.subject,
          error: result[:error],
          confidence: result[:confidence],
          reasoning: result[:reasoning],
          crm_url: "#{CRM_URL}/posts/#{article.id}"
        }
        puts "❌ ERROR: #{result[:error]}"
      else
        processed_count += 1
        schema_count = result[:schemas].length
        schema_types = result[:schemas].map { |s| s['@type'] }.join(', ')
        processed_articles << {
          id: article.id,
          title: article.subject,
          status: "Extracted #{schema_count} schemas",
          confidence: result[:confidence],
          reasoning: result[:reasoning],
          schema_types: schema_types,
          crm_url: "#{CRM_URL}/posts/#{article.id}"
        }
        puts "✅ Extracted #{schema_count} schemas (#{schema_types}) - #{result[:confidence]} confidence"
      end
    rescue StandardError => e
      error_count += 1
      Rails.logger.error "[AutoBlogSchemaExtractionWorker] Error processing article #{article.id}: #{e.message}"
      ErrorReporting.error(e, source: :background, article_id: article.id)
      error_articles << {
        id: article.id,
        title: article.subject,
        error: e.message,
        crm_url: "#{CRM_URL}/posts/#{article.id}"
      }
      puts "❌ ERROR: #{e.message}"
    end

    # Add a small delay to avoid overwhelming the API
    sleep(rand(2..5)) if index < articles_needing_schema.count - 1
  end

  store status: 'completed'
  store articles_processed: processed_count
  store errors: error_count
  store message: "Processed #{processed_count} articles, #{error_count} errors"

  Rails.logger.info "[AutoBlogSchemaExtractionWorker] Completed: Processed #{processed_count} articles, #{error_count} errors"

  # Print summary to terminal
  puts ''
  puts '=' * 80
  puts 'EXTRACTION SUMMARY'
  puts '=' * 80
  puts "Total articles processed: #{articles_needing_schema.count}"
  puts "Successfully processed: #{processed_count}"
  puts "Errors: #{error_count}"
  puts "Success rate: #{articles_needing_schema.count > 0 ? "#{((processed_count.to_f / articles_needing_schema.count) * 100).round(1)}%" : 'N/A'}"
  puts ''

  if processed_articles.any?
    puts 'SUCCESSFULLY PROCESSED ARTICLES:'
    puts '-' * 50
    processed_articles.each do |article|
      puts "  #{article[:id]}: #{article[:title]}"
      puts "    Status: #{article[:status]}"
      puts "    Confidence: #{article[:confidence]}"
      puts "    Schema Types: #{article[:schema_types]}"
      puts "    CRM: #{article[:crm_url]}"
      puts ''
    end
  end

  if error_articles.any?
    puts 'ARTICLES WITH ERRORS:'
    puts '-' * 30
    error_articles.each do |article|
      puts "  #{article[:id]}: #{article[:title]}"
      puts "    Error: #{article[:error]}"
      puts "    CRM: #{article[:crm_url]}"
      puts ''
    end
  end

  # Send email report if requested
  if send_email
    puts 'Sending email report...'
    report_data = {
      articles_processed: processed_count,
      errors: error_count,
      total_articles: articles_needing_schema.count,
      message: "Processed #{processed_count} articles, #{error_count} errors",
      processed_articles: processed_articles,
      error_articles: error_articles
    }

    send_email_report(report_data)
    puts 'Email report sent successfully!'
  end

  puts '=' * 80
end