Class: EmbeddingWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker
Defined in:
app/workers/embedding_worker.rb

Overview

Background worker for generating content embeddings.
Uses RubyLLM to generate vector embeddings for semantic search.

Examples:

Queue embedding generation

EmbeddingWorker.perform_async('Showcase', 123)

Queue with specific content type

EmbeddingWorker.perform_async('Video', 456, content_types: ['transcript'])

Instance Method Summary collapse

Instance Method Details

#perform(class_name, id, options = {}) ⇒ Object

Parameters:

  • class_name (String)

    The model class name (e.g., 'Showcase', 'Post')

  • id (Integer)

    The record ID

  • options (Hash) (defaults to: {})

    Optional parameters

Options Hash (options):

  • :content_types (Array<String>)

    Specific content types to embed

  • :force (Boolean)

    Force regeneration even if not stale

  • :chunked (Boolean)

    Use chunked embedding for large content (e.g., PDFs)

  • :locales (Array<String>)

    Override locales to embed (default: record.embeddable_locales)



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'app/workers/embedding_worker.rb', line 32

def perform(class_name, id, options = {})
  options = options.with_indifferent_access
  record = find_record(class_name, id)
  return unless record

  content_types = options[:content_types]&.map(&:to_sym) || record.class.embeddable_content_types
  force   = options[:force].to_b
  chunked = options[:chunked].to_b

  # Models that define embeddable_locales (e.g. Post → ['en-US', 'en-CA']) will have
  # a separate embedding generated for each locale. All other models fall back to a
  # single locale via locale_for_embedding.
  locales = options[:locales]&.map(&:to_s).presence || record.embeddable_locales

  log_info "Generating embeddings for #{class_name}##{id} " \
           "(types: #{content_types.join(', ')}, locales: #{locales.join(', ')}, chunked: #{chunked})"

  results = content_types.flat_map do |content_type|
    locales.map do |locale|
      if chunked && record.respond_to?(:generate_chunked_embeddings!)
        generate_chunked_with_rate_limit(record, content_type, force, locale)
      else
        generate_with_rate_limit(record, content_type, force, locale)
      end
    end
  end.flatten

  successful = results.count { |r| r.is_a?(ContentEmbedding) }
  log_info "Completed #{class_name}##{id}: #{successful} embeddings generated"
rescue ActiveRecord::RecordNotFound
  log_info "Record not found: #{class_name}##{id} - skipping"
rescue RubyLLM::Error => e
  log_error "RubyLLM error for #{class_name}##{id}: #{e.message}"
  raise # Re-raise to trigger retry
rescue StandardError => e
  log_error "Unexpected error for #{class_name}##{id}: #{e.message}"
  ErrorReporting.error(e)
  raise
end