Class: ImageGenerationWorker

Inherits:

Object

Object
ImageGenerationWorker

show all

Includes:: Sidekiq::Job, Workers::StatusBroadcastable

Defined in:: app/workers/image_generation_worker.rb

Overview

Background worker for generating AI images and staging them as +GeneratedImage+
records for user review before library import.

Flow:

Fetch any reference images from the library (downloaded + base64-encoded)
Call +ImageGeneration::Service+ to generate the image
Write the result to a Tempfile
Upload to ImageKit under /img/ai-generated/
Create a +GeneratedImage+ record
Store redirect_to: the show path so the job status page sends the user
directly to the review screen

Examples:

Enqueue from controller

jid = ImageGenerationWorker.perform_async(
  prompt:              'A heated driveway melting snow at night',
  model:               'gemini-2.5-flash-image',
  aspect_ratio:        '16:9',
  image_size:          '2K',
  reference_image_ids: [42, 99],
  source_image_id:     nil,
  created_by_id:       current_account.id,
  return_to:           '/en-US/image_generations/new'
)

Constant Summary collapse

AI_GENERATED_TAG = Ai generated tag.

'ai-generated'

IK_FOLDER = Ik folder.

'/img/ai-generated/'

VIRTUAL_RATIOS = Virtual aspect ratios that map to a real API ratio + post-crop. Format: { "virtual_value" => { api_ratio: "...", crop_to: [w, h], prompt_prefix: "..." } } prompt_prefix guides composition so subjects stay in the crop-safe zone.

{
  'banner:16:5' => {
    api_ratio: '21:9',
    crop_to: [16, 5],
    prompt_prefix: <<~PROMPT.squish
      Generate a sweeping ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. Compose the
      image so the main subject sits in the vertical middle third of the frame, with
      supporting scenery (sky, landscape, architecture, flooring) extending naturally
      to the very top and bottom edges. Think of a cinematic wide-angle photograph
      that bleeds off all four sides.
    PROMPT
  },
  'banner:4:1' => {
    api_ratio: '21:9',
    crop_to: [4, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an extreme ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. The final
      image will be center-cropped to a very wide 4:1 banner, so place the main
      subject precisely in the vertical center band of the frame and keep critical
      details (faces, text, focal objects) within the middle 25% of the height.
      Allow background scenery to extend to the top and bottom edges so it can be
      safely cropped away.
    PROMPT
  },
  'banner:5:1' => {
    api_ratio: '21:9',
    crop_to: [5, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an extreme ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. The final
      image will be center-cropped to an ultra-wide 5:1 banner, so place the main
      subject precisely in the vertical center band of the frame and keep critical
      details (faces, text, focal objects) within the middle 20% of the height.
      Allow background scenery to extend to the top and bottom edges so it can be
      safely cropped away.
    PROMPT
  },
  'banner:10:1' => {
    api_ratio: '21:9',
    crop_to: [10, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an extreme ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. The final
      image will be center-cropped to an extremely narrow 10:1 banner strip, so
      place the main subject precisely on the horizontal centerline and keep all
      critical details (faces, text, focal objects) within the middle 10% of the
      height. Allow background scenery to extend to the top and bottom edges so it
      can be safely cropped away.
    PROMPT
  },
  'banner:3:1' => {
    api_ratio: '21:9',
    crop_to: [3, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an ultra-wide image that fills the ENTIRE 21:9 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips — every pixel must contain scene content. The final image will be
      center-cropped to a 3:1 banner (used for X headers and Bluesky banners), so
      place the main subject in the vertical center and keep critical details
      within the middle 30% of the height.
    PROMPT
  },
  'social:191:100' => {
    api_ratio: '16:9',
    crop_to: [191, 100],
    prompt_prefix: <<~PROMPT.squish
      Generate an image that fills the ENTIRE 16:9 frame edge-to-edge with
      content. DO NOT add borders, bars, letterboxing, padding, or blank strips.
      The final image will be lightly center-cropped to ~1.91:1 — the standard
      social media link card / in-feed landscape ratio used across Facebook,
      Instagram, LinkedIn, and X. Place the main subject and critical details
      in the vertical center of the frame.
    PROMPT
  },
  'fb:851:315' => {
    api_ratio: '21:9',
    crop_to: [851, 315],
    prompt_prefix: <<~PROMPT.squish
      Generate an ultra-wide image that fills the ENTIRE 21:9 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips. The final image will be center-cropped to 851×315 (~2.70:1) for a
      Facebook cover photo, so place the main subject in the vertical center
      and keep critical details within the middle 30% of the height. Note that
      the profile photo overlay sits at the bottom-left on desktop.
    PROMPT
  },
  'li:1128:191' => {
    api_ratio: '21:9',
    crop_to: [1128, 191],
    prompt_prefix: <<~PROMPT.squish
      Generate an ultra-wide image that fills the ENTIRE 21:9 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips. The final image will be aggressively center-cropped to 1128×191
      (~5.91:1) for a LinkedIn company cover, so place the main subject precisely
      on the horizontal centerline and keep all critical details within the
      middle 12% of the height.
    PROMPT
  },
  'snap:375:278' => {
    api_ratio: '4:3',
    crop_to: [375, 278],
    prompt_prefix: <<~PROMPT.squish
      Generate at 4:3 for a Snapchat banner (375×278, ~1.35:1). The image will
      be lightly center-cropped from 4:3; keep the main subject centered. Use
      bold colors and high contrast for visibility at small banner sizes.
    PROMPT
  },
  'og:40:21' => {
    api_ratio: '16:9',
    crop_to: [40, 21],
    prompt_prefix: <<~PROMPT.squish
      IMPORTANT COMPOSITION CONSTRAINT: This image will be used as an Open Graph
      social sharing thumbnail displayed at 1200×630 pixels. Place the main subject
      prominently in the centre of the frame. Avoid fine text, thin lines, or small
      details — the image must read clearly at thumbnail size in social media feeds.
      Use bold colours, high contrast, and a clean composition.
    PROMPT
  },
  'aplus:1464:600' => {
    api_ratio: '21:9',
    crop_to: [61, 25],
    prompt_prefix: <<~PROMPT.squish
      Generate a wide image that fills the ENTIRE 21:9 frame edge-to-edge with
      content. DO NOT add borders, bars, letterboxing, padding, or blank strips —
      every pixel must contain scene content. The final image will be lightly
      center-cropped to 1464×600 (61:25, ~2.44:1) for an Amazon A+ Premium desktop
      banner or background, so place the main subject and critical details in the
      vertical center of the frame and avoid putting anything important within the
      outer 5% top or bottom.
    PROMPT
  },
  'aplus:600:450' => {
    api_ratio: '4:3',
    prompt_prefix: <<~PROMPT.squish
      Generate at 4:3 for an Amazon A+ Premium mobile background (600×450). Amazon
      may overlay text on this background on mobile product pages, so keep the
      main subject roughly centered, avoid placing critical detail in the lower
      third where overlay text typically appears, and prefer clean uncluttered
      compositions that read well behind text.
    PROMPT
  },
  'doc:17:22' => {
    api_ratio: '3:4',
    crop_to: [17, 22],
    prompt_prefix: <<~PROMPT.squish
      Generate a portrait image that fills the ENTIRE 3:4 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips — every pixel must contain scene content. The final image will be
      lightly center-cropped to 17:22, the US Letter page ratio (8.5×11 inches)
      used for cover letters and printed documents. Keep the main subject
      roughly centered and keep critical detail (headings, logos, text) clear
      of the extreme top and bottom edges so the crop is clean.
    PROMPT
  },
  'doc:70:99' => {
    api_ratio: '3:4',
    crop_to: [70, 99],
    prompt_prefix: <<~PROMPT.squish
      Generate a portrait image that fills the ENTIRE 3:4 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips — every pixel must contain scene content. The final image will be
      center-cropped to 70:99, the A4 page ratio (210×297 mm) used for cover
      letters and printed documents. Keep the main subject roughly centered and
      keep critical detail (headings, logos, text) clear of the extreme left
      and right edges so the crop is clean.
    PROMPT
  }
}.freeze

Instance Attribute Summary

Attributes included from Workers::StatusBroadcastable

#broadcast_status_updates

Instance Method Summary collapse

#perform(options = {}) ⇒ Object

Methods included from Workers::StatusBroadcastable::Overrides

#at, #store, #total

Instance Method Details

#perform(options = {}) ⇒ `Object`

# File 'app/workers/image_generation_worker.rb', line 217

def perform(options = {})
  opts = options.with_indifferent_access

  prompt               = opts[:prompt].to_s.strip
  model                = opts[:model].presence || ImageGeneration::Service::DEFAULT_MODEL
  aspect_ratio         = opts[:aspect_ratio].presence || '1:1'
  image_size           = opts[:image_size].presence || '1K'
  reference_image_ids  = Array(opts[:reference_image_ids]).map(&:to_i).uniq
  extra_reference_urls = Array(opts[:extra_reference_urls]).map(&:to_s).compact_blank
  source_image_id      = opts[:source_image_id]&.to_i
  source_record_type   = opts[:source_record_type].presence
  source_record_id     = opts[:source_record_id]&.to_i.presence
  auto_import          = opts[:auto_import]
  created_by_id        = opts[:created_by_id]&.to_i.presence
  @return_to           = opts[:return_to].presence

  virtual_config  = VIRTUAL_RATIOS[aspect_ratio]
  api_ratio       = virtual_config ? virtual_config[:api_ratio] : aspect_ratio
  @post_crop      = virtual_config&.dig(:crop_to)
  generation_prompt = if virtual_config&.dig(:prompt_prefix)
                        "#{virtual_config[:prompt_prefix]}\n\n#{prompt}"
                      else
                        prompt
                      end

  total 7
  at 1, 'Starting image generation...'

  if prompt.blank?
    store error_message: 'Prompt is required', redirect_to: @return_to
    return log_error('No prompt provided to ImageGenerationWorker')
  end

  unless ImageGeneration::Service.available?
    store error_message: 'No image generation provider is configured. Contact an administrator.',
          redirect_to: @return_to
    return log_error('No image generation provider configured')
  end

  at 2, 'Loading reference images...'
  reference_images = reference_image_ids.any? ? Image.where(id: reference_image_ids).to_a : []

  if extra_reference_urls.any?
    url_ref = Struct.new(:id, :ik_url)
    extra_refs = extra_reference_urls.each_with_index.map { |url, i| url_ref.new("extra_#{i}", url) }
    reference_images += extra_refs
    log_info "Added #{extra_refs.size} extra URL reference(s) (e.g. prior generation)"
  end

  log_info "Loaded #{reference_images.size} reference image(s) total"

  at 3, 'Generating image with AI...'
  service = ImageGeneration::Service.new(model: model)
  generate_result = call_generation_service(service, generation_prompt, reference_images, api_ratio, image_size)
  return unless generate_result # error already stored by call_generation_service

  jpeg_data = generate_result.jpeg_data
  log_info "Generated image: #{jpeg_data.bytesize} bytes"

  if @post_crop
    at 3, "Cropping to #{@post_crop.join(':')}..."
    jpeg_data = crop_to_ratio(jpeg_data, *@post_crop)
    log_info "Cropped image: #{jpeg_data.bytesize} bytes"
  end

  at 4, 'Uploading to ImageKit...'
  tempfile = write_tempfile(jpeg_data)

  unless tempfile
    store error_message: 'Failed to write generated image to disk', redirect_to: @return_to
    return log_error('Tempfile write failed')
  end

  begin
    ik_result = upload_to_imagekit(tempfile, prompt)

    unless ik_result
      store error_message: 'Failed to upload generated image to ImageKit', redirect_to: @return_to
      return log_error('ImageKit upload failed')
    end

    log_info "Uploaded to ImageKit: #{ik_result[:file_path] || ik_result[:filePath]}"

    at 5, 'Staging generated image...'
    source_image = Image.find_by(id: source_image_id)
    generated    = GeneratedImage.create!(
      source_image_id:   source_image_id,
      source_record_type: source_record_type,
      source_record_id:   source_record_id,
      created_by_id:      created_by_id,
      ik_asset:            ik_result,
      prompt:              prompt,
      model:               model,
      aspect_ratio:        aspect_ratio,
      image_size:          image_size,
      reference_image_ids: reference_image_ids,
      status:              'pending'
    )

    # Log image generation cost now that we have a subject to link to
    AiUsageLog.log!(
      provider:      generate_result.provider,
      model_id:      model,
      feature:       'image_generation',
      input_tokens:  generate_result.input_tokens,
      output_tokens: generate_result.output_tokens,
      subject:       generated,
      account_id:    created_by_id,
      metadata:      { prompt_length: prompt.length, reference_count: reference_images.size }
    )

    at 6, 'Generating title suggestion...'
    title_result = ImageGeneration::TitleSuggester.call(
      prompt:       prompt,
      source_title: source_image&.title,
      source_tags:  source_image&.tags || []
    )

    if title_result.success?
      log_info "Title suggested: #{title_result.title}"
      generated.update!(suggested_title: title_result.title)
      title_provider = LlmModel.find_by(model_id: title_result.model_id.to_s)&.provider || 'unknown'
      AiUsageLog.log!(
        provider:      title_provider,
        model_id:      title_result.model_id.to_s,
        feature:       'title_suggestion',
        input_tokens:  title_result.input_tokens,
        output_tokens: title_result.output_tokens,
        subject:       generated,
        account_id:    created_by_id
      )
    else
      log_info "Title suggestion skipped: #{title_result.error}"
    end

    if auto_import
      at 7, 'Auto-importing...'
      import_result = GeneratedImageImporter.call(
        generated,
        title:    generated.suggested_title.presence || 'AI Generated Image',
        tags:     source_record_tags_for(generated.source_record),
        notes:    "Auto-generated avatar. Prompt: #{prompt.truncate(200)}"
      )

      if import_result.success?
        log_info "Auto-imported as Image #{import_result.image.id}: #{import_result.notice}"
        store info_message: import_result.notice || 'Avatar generated and imported.',
              redirect_to:  @return_to
      else
        log_error "Auto-import failed: #{import_result.error}"
        store info_message: 'Image generated but auto-import failed — review it manually.',
              redirect_to:  generated_image_path(generated)
      end
    else
      at 7, 'Ready for review!'
      store info_message: 'Image generated and ready for import.',
            redirect_to:  generated_image_path(generated)
    end

    log_info "Created GeneratedImage #{generated.id}"
  ensure
    tempfile&.close
    tempfile&.unlink
  end
end