Class: BlogSchemaExtractionWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker, Workers::StatusBroadcastable
Defined in:
app/workers/blog_schema_extraction_worker.rb

Instance Attribute Summary

Attributes included from Workers::StatusBroadcastable

#broadcast_status_updates

Instance Method Summary collapse

Methods included from Workers::StatusBroadcastable::Overrides

#at, #store, #total

Instance Method Details

#perform(article_id, options = {}) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'app/workers/blog_schema_extraction_worker.rb', line 8

def perform(article_id, options = {})
  store status: 'processing'
  store article_id: article_id
  store options: options

  total_steps = 4
  total total_steps

  # Add a small random delay to help spread out API calls and reduce rate limit collisions
  jitter = rand(1..5)
  sleep(jitter) if jitter.positive?

  article = Article.find(article_id)
  store article_title: article.subject

  at(1, 'Loading article content')
  log_info "Starting comprehensive schema extraction for article #{article_id}: #{article.subject}"

  at(2, 'Analyzing content for structured data')
  extractor = BlogSchemaExtractor.new(article, options)

  at(3, 'Extracting schemas with AI')
  result = extractor.process

  if result[:error].present?
    store status: 'error'
    store error: result[:error]
    log_error "Schema extraction failed for article #{article_id}: #{result[:error]}"
    raise result[:error]
  elsif result[:schemas].present?
    at(4, "Saving #{result[:schemas].length} schemas to database")
    store status: 'completed'
    store schemas_count: result[:schemas].length
    store confidence: result[:confidence]
    store schema_types: result[:schemas].filter_map { |s| s['@type'] }
    store redirect_to: options[:return_path] || post_path(article)
    log_info "Schema extraction completed for article #{article_id}: #{result[:schemas].length} schemas extracted"
  else
    at(4, 'No structured data found')
    store status: 'completed'
    store schemas_count: 0
    store confidence: result[:confidence]
    store redirect_to: options[:return_path] || post_path(article)
    log_info "Schema extraction completed for article #{article_id}: No schemas found"
  end
rescue StandardError => e
  store status: 'error'
  store error: e.message
  log_error "Schema extraction failed for article #{article_id}: #{e.message}"
  raise e
end