7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
# File 'app/workers/auto_blog_schema_extraction_worker.rb', line 7
def perform(limit = nil, article_ids: nil, force_update: false, send_email: true)
store status: 'processing'
articles_needing_schema = find_articles_needing_schema(limit, article_ids, force_update)
if articles_needing_schema.empty?
store status: 'completed'
store articles_processed: 0
store message: 'No articles found needing schema extraction'
Rails.logger.info '[AutoBlogSchemaExtractionWorker] No articles found needing schema extraction'
if send_email
send_email_report({
articles_processed: 0,
errors: 0,
total_articles: 0,
message: 'No articles found needing schema extraction',
processed_articles: [],
error_articles: []
})
end
return
end
store total_articles: articles_needing_schema.count
store status: 'processing'
processed_count = 0
error_count = 0
processed_articles = []
error_articles = []
Rails.logger.info "Processing #{articles_needing_schema.count} articles..."
articles_needing_schema.each_with_index do |article, index|
store current_article: index + 1
store current_article_title: article.subject
Rails.logger.info "[AutoBlogSchemaExtractionWorker] Processing article #{article.id}: #{article.subject}"
print " [#{index + 1}/#{articles_needing_schema.count}] Processing article #{article.id}: #{article.subject}... "
begin
= BlogSchemaExtractor.new(article)
result = .process
if result[:error].present?
error_count += 1
error_articles << {
id: article.id,
title: article.subject,
error: result[:error],
confidence: result[:confidence],
reasoning: result[:reasoning],
crm_url: "#{CRM_URL}/posts/#{article.id}"
}
puts "❌ ERROR: #{result[:error]}"
else
processed_count += 1
schema_count = result[:schemas].length
schema_types = result[:schemas].map { |s| s['@type'] }.join(', ')
processed_articles << {
id: article.id,
title: article.subject,
status: "Extracted #{schema_count} schemas",
confidence: result[:confidence],
reasoning: result[:reasoning],
schema_types: schema_types,
crm_url: "#{CRM_URL}/posts/#{article.id}"
}
puts "✅ Extracted #{schema_count} schemas (#{schema_types}) - #{result[:confidence]} confidence"
end
rescue StandardError => e
error_count += 1
Rails.logger.error "[AutoBlogSchemaExtractionWorker] Error processing article #{article.id}: #{e.message}"
ErrorReporting.error(e, source: :background, article_id: article.id)
error_articles << {
id: article.id,
title: article.subject,
error: e.message,
crm_url: "#{CRM_URL}/posts/#{article.id}"
}
puts "❌ ERROR: #{e.message}"
end
sleep(rand(2..5)) if index < articles_needing_schema.count - 1
end
store status: 'completed'
store articles_processed: processed_count
store errors: error_count
store message: "Processed #{processed_count} articles, #{error_count} errors"
Rails.logger.info "[AutoBlogSchemaExtractionWorker] Completed: Processed #{processed_count} articles, #{error_count} errors"
puts ''
puts '=' * 80
puts 'EXTRACTION SUMMARY'
puts '=' * 80
puts "Total articles processed: #{articles_needing_schema.count}"
puts "Successfully processed: #{processed_count}"
puts "Errors: #{error_count}"
puts "Success rate: #{articles_needing_schema.count > 0 ? "#{((processed_count.to_f / articles_needing_schema.count) * 100).round(1)}%" : 'N/A'}"
puts ''
if processed_articles.any?
puts 'SUCCESSFULLY PROCESSED ARTICLES:'
puts '-' * 50
processed_articles.each do |article|
puts " #{article[:id]}: #{article[:title]}"
puts " Status: #{article[:status]}"
puts " Confidence: #{article[:confidence]}"
puts " Schema Types: #{article[:schema_types]}"
puts " CRM: #{article[:crm_url]}"
puts ''
end
end
if error_articles.any?
puts 'ARTICLES WITH ERRORS:'
puts '-' * 30
error_articles.each do |article|
puts " #{article[:id]}: #{article[:title]}"
puts " Error: #{article[:error]}"
puts " CRM: #{article[:crm_url]}"
puts ''
end
end
if send_email
puts 'Sending email report...'
report_data = {
articles_processed: processed_count,
errors: error_count,
total_articles: articles_needing_schema.count,
message: "Processed #{processed_count} articles, #{error_count} errors",
processed_articles: processed_articles,
error_articles: error_articles
}
send_email_report(report_data)
puts 'Email report sent successfully!'
end
puts '=' * 80
end
|