Class: ImageGenerationWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Job, Workers::StatusBroadcastable
Defined in:
app/workers/image_generation_worker.rb

Overview

Background worker for generating AI images and staging them as +GeneratedImage+
records for user review before library import.

Flow:

  1. Fetch any reference images from the library (downloaded + base64-encoded)
  2. Call +ImageGeneration::Service+ to generate the image
  3. Write the result to a Tempfile
  4. Upload to ImageKit under /img/ai-generated/
  5. Create a +GeneratedImage+ record
  6. Store redirect_to: the show path so the job status page sends the user
    directly to the review screen

Examples:

Enqueue from controller

jid = ImageGenerationWorker.perform_async(
  prompt:              'A heated driveway melting snow at night',
  model:               'gemini-2.5-flash-image',
  aspect_ratio:        '16:9',
  image_size:          '2K',
  reference_image_ids: [42, 99],
  source_image_id:     nil,
  created_by_id:       .id,
  return_to:           '/en-US/image_generations/new'
)

Constant Summary collapse

AI_GENERATED_TAG =

Ai generated tag.

'ai-generated'
IK_FOLDER =

Ik folder.

'/img/ai-generated/'
VIRTUAL_RATIOS =

Virtual aspect ratios that map to a real API ratio + post-crop.
Format: { "virtual_value" => { api_ratio: "...", crop_to: [w, h], prompt_prefix: "..." } }
prompt_prefix guides composition so subjects stay in the crop-safe zone.

{
  'banner:16:5' => {
    api_ratio: '21:9',
    crop_to: [16, 5],
    prompt_prefix: <<~PROMPT.squish
      Generate a sweeping ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. Compose the
      image so the main subject sits in the vertical middle third of the frame, with
      supporting scenery (sky, landscape, architecture, flooring) extending naturally
      to the very top and bottom edges. Think of a cinematic wide-angle photograph
      that bleeds off all four sides.
    PROMPT
  },
  'banner:4:1' => {
    api_ratio: '21:9',
    crop_to: [4, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an extreme ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. The final
      image will be center-cropped to a very wide 4:1 banner, so place the main
      subject precisely in the vertical center band of the frame and keep critical
      details (faces, text, focal objects) within the middle 25% of the height.
      Allow background scenery to extend to the top and bottom edges so it can be
      safely cropped away.
    PROMPT
  },
  'banner:5:1' => {
    api_ratio: '21:9',
    crop_to: [5, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an extreme ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. The final
      image will be center-cropped to an ultra-wide 5:1 banner, so place the main
      subject precisely in the vertical center band of the frame and keep critical
      details (faces, text, focal objects) within the middle 20% of the height.
      Allow background scenery to extend to the top and bottom edges so it can be
      safely cropped away.
    PROMPT
  },
  'banner:10:1' => {
    api_ratio: '21:9',
    crop_to: [10, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an extreme ultra-wide panoramic image that fills the ENTIRE 21:9
      frame edge-to-edge with content. DO NOT add any borders, bars, letterboxing,
      padding, or blank strips — every pixel must contain scene content. The final
      image will be center-cropped to an extremely narrow 10:1 banner strip, so
      place the main subject precisely on the horizontal centerline and keep all
      critical details (faces, text, focal objects) within the middle 10% of the
      height. Allow background scenery to extend to the top and bottom edges so it
      can be safely cropped away.
    PROMPT
  },
  'banner:3:1' => {
    api_ratio: '21:9',
    crop_to: [3, 1],
    prompt_prefix: <<~PROMPT.squish
      Generate an ultra-wide image that fills the ENTIRE 21:9 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips — every pixel must contain scene content. The final image will be
      center-cropped to a 3:1 banner (used for X headers and Bluesky banners), so
      place the main subject in the vertical center and keep critical details
      within the middle 30% of the height.
    PROMPT
  },
  'social:191:100' => {
    api_ratio: '16:9',
    crop_to: [191, 100],
    prompt_prefix: <<~PROMPT.squish
      Generate an image that fills the ENTIRE 16:9 frame edge-to-edge with
      content. DO NOT add borders, bars, letterboxing, padding, or blank strips.
      The final image will be lightly center-cropped to ~1.91:1 — the standard
      social media link card / in-feed landscape ratio used across Facebook,
      Instagram, LinkedIn, and X. Place the main subject and critical details
      in the vertical center of the frame.
    PROMPT
  },
  'fb:851:315' => {
    api_ratio: '21:9',
    crop_to: [851, 315],
    prompt_prefix: <<~PROMPT.squish
      Generate an ultra-wide image that fills the ENTIRE 21:9 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips. The final image will be center-cropped to 851×315 (~2.70:1) for a
      Facebook cover photo, so place the main subject in the vertical center
      and keep critical details within the middle 30% of the height. Note that
      the profile photo overlay sits at the bottom-left on desktop.
    PROMPT
  },
  'li:1128:191' => {
    api_ratio: '21:9',
    crop_to: [1128, 191],
    prompt_prefix: <<~PROMPT.squish
      Generate an ultra-wide image that fills the ENTIRE 21:9 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips. The final image will be aggressively center-cropped to 1128×191
      (~5.91:1) for a LinkedIn company cover, so place the main subject precisely
      on the horizontal centerline and keep all critical details within the
      middle 12% of the height.
    PROMPT
  },
  'snap:375:278' => {
    api_ratio: '4:3',
    crop_to: [375, 278],
    prompt_prefix: <<~PROMPT.squish
      Generate at 4:3 for a Snapchat banner (375×278, ~1.35:1). The image will
      be lightly center-cropped from 4:3; keep the main subject centered. Use
      bold colors and high contrast for visibility at small banner sizes.
    PROMPT
  },
  'og:40:21' => {
    api_ratio: '16:9',
    crop_to: [40, 21],
    prompt_prefix: <<~PROMPT.squish
      IMPORTANT COMPOSITION CONSTRAINT: This image will be used as an Open Graph
      social sharing thumbnail displayed at 1200×630 pixels. Place the main subject
      prominently in the centre of the frame. Avoid fine text, thin lines, or small
      details — the image must read clearly at thumbnail size in social media feeds.
      Use bold colours, high contrast, and a clean composition.
    PROMPT
  },
  'aplus:1464:600' => {
    api_ratio: '21:9',
    crop_to: [61, 25],
    prompt_prefix: <<~PROMPT.squish
      Generate a wide image that fills the ENTIRE 21:9 frame edge-to-edge with
      content. DO NOT add borders, bars, letterboxing, padding, or blank strips —
      every pixel must contain scene content. The final image will be lightly
      center-cropped to 1464×600 (61:25, ~2.44:1) for an Amazon A+ Premium desktop
      banner or background, so place the main subject and critical details in the
      vertical center of the frame and avoid putting anything important within the
      outer 5% top or bottom.
    PROMPT
  },
  'aplus:600:450' => {
    api_ratio: '4:3',
    prompt_prefix: <<~PROMPT.squish
      Generate at 4:3 for an Amazon A+ Premium mobile background (600×450). Amazon
      may overlay text on this background on mobile product pages, so keep the
      main subject roughly centered, avoid placing critical detail in the lower
      third where overlay text typically appears, and prefer clean uncluttered
      compositions that read well behind text.
    PROMPT
  },
  'doc:17:22' => {
    api_ratio: '3:4',
    crop_to: [17, 22],
    prompt_prefix: <<~PROMPT.squish
      Generate a portrait image that fills the ENTIRE 3:4 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips — every pixel must contain scene content. The final image will be
      lightly center-cropped to 17:22, the US Letter page ratio (8.5×11 inches)
      used for cover letters and printed documents. Keep the main subject
      roughly centered and keep critical detail (headings, logos, text) clear
      of the extreme top and bottom edges so the crop is clean.
    PROMPT
  },
  'doc:70:99' => {
    api_ratio: '3:4',
    crop_to: [70, 99],
    prompt_prefix: <<~PROMPT.squish
      Generate a portrait image that fills the ENTIRE 3:4 frame edge-to-edge
      with content. DO NOT add borders, bars, letterboxing, padding, or blank
      strips — every pixel must contain scene content. The final image will be
      center-cropped to 70:99, the A4 page ratio (210×297 mm) used for cover
      letters and printed documents. Keep the main subject roughly centered and
      keep critical detail (headings, logos, text) clear of the extreme left
      and right edges so the crop is clean.
    PROMPT
  }
}.freeze

Instance Attribute Summary

Attributes included from Workers::StatusBroadcastable

#broadcast_status_updates

Instance Method Summary collapse

Methods included from Workers::StatusBroadcastable::Overrides

#at, #store, #total

Instance Method Details

#perform(options = {}) ⇒ Object



217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'app/workers/image_generation_worker.rb', line 217

def perform(options = {})
  opts = options.with_indifferent_access

  prompt               = opts[:prompt].to_s.strip
  model                = opts[:model].presence || ImageGeneration::Service::DEFAULT_MODEL
  aspect_ratio         = opts[:aspect_ratio].presence || '1:1'
  image_size           = opts[:image_size].presence || '1K'
  reference_image_ids  = Array(opts[:reference_image_ids]).map(&:to_i).uniq
  extra_reference_urls = Array(opts[:extra_reference_urls]).map(&:to_s).compact_blank
  source_image_id      = opts[:source_image_id]&.to_i
  source_record_type   = opts[:source_record_type].presence
  source_record_id     = opts[:source_record_id]&.to_i.presence
  auto_import          = opts[:auto_import]
  created_by_id        = opts[:created_by_id]&.to_i.presence
  @return_to           = opts[:return_to].presence

  virtual_config  = VIRTUAL_RATIOS[aspect_ratio]
  api_ratio       = virtual_config ? virtual_config[:api_ratio] : aspect_ratio
  @post_crop      = virtual_config&.dig(:crop_to)
  generation_prompt = if virtual_config&.dig(:prompt_prefix)
                        "#{virtual_config[:prompt_prefix]}\n\n#{prompt}"
                      else
                        prompt
                      end

  total 7
  at 1, 'Starting image generation...'

  if prompt.blank?
    store error_message: 'Prompt is required', redirect_to: @return_to
    return log_error('No prompt provided to ImageGenerationWorker')
  end

  unless ImageGeneration::Service.available?
    store error_message: 'No image generation provider is configured. Contact an administrator.',
          redirect_to: @return_to
    return log_error('No image generation provider configured')
  end

  at 2, 'Loading reference images...'
  reference_images = reference_image_ids.any? ? Image.where(id: reference_image_ids).to_a : []

  if extra_reference_urls.any?
    url_ref = Struct.new(:id, :ik_url)
    extra_refs = extra_reference_urls.each_with_index.map { |url, i| url_ref.new("extra_#{i}", url) }
    reference_images += extra_refs
    log_info "Added #{extra_refs.size} extra URL reference(s) (e.g. prior generation)"
  end

  log_info "Loaded #{reference_images.size} reference image(s) total"

  at 3, 'Generating image with AI...'
  service = ImageGeneration::Service.new(model: model)
  generate_result = call_generation_service(service, generation_prompt, reference_images, api_ratio, image_size)
  return unless generate_result # error already stored by call_generation_service

  jpeg_data = generate_result.jpeg_data
  log_info "Generated image: #{jpeg_data.bytesize} bytes"

  if @post_crop
    at 3, "Cropping to #{@post_crop.join(':')}..."
    jpeg_data = crop_to_ratio(jpeg_data, *@post_crop)
    log_info "Cropped image: #{jpeg_data.bytesize} bytes"
  end

  at 4, 'Uploading to ImageKit...'
  tempfile = write_tempfile(jpeg_data)

  unless tempfile
    store error_message: 'Failed to write generated image to disk', redirect_to: @return_to
    return log_error('Tempfile write failed')
  end

  begin
    ik_result = upload_to_imagekit(tempfile, prompt)

    unless ik_result
      store error_message: 'Failed to upload generated image to ImageKit', redirect_to: @return_to
      return log_error('ImageKit upload failed')
    end

    log_info "Uploaded to ImageKit: #{ik_result[:file_path] || ik_result[:filePath]}"

    at 5, 'Staging generated image...'
    source_image = Image.find_by(id: source_image_id)
    generated    = GeneratedImage.create!(
      source_image_id:   source_image_id,
      source_record_type: source_record_type,
      source_record_id:   source_record_id,
      created_by_id:      created_by_id,
      ik_asset:            ik_result,
      prompt:              prompt,
      model:               model,
      aspect_ratio:        aspect_ratio,
      image_size:          image_size,
      reference_image_ids: reference_image_ids,
      status:              'pending'
    )

    # Log image generation cost now that we have a subject to link to
    AiUsageLog.log!(
      provider:      generate_result.provider,
      model_id:      model,
      feature:       'image_generation',
      input_tokens:  generate_result.input_tokens,
      output_tokens: generate_result.output_tokens,
      subject:       generated,
      account_id:    created_by_id,
      metadata:      { prompt_length: prompt.length, reference_count: reference_images.size }
    )

    at 6, 'Generating title suggestion...'
    title_result = ImageGeneration::TitleSuggester.call(
      prompt:       prompt,
      source_title: source_image&.title,
      source_tags:  source_image&.tags || []
    )

    if title_result.success?
      log_info "Title suggested: #{title_result.title}"
      generated.update!(suggested_title: title_result.title)
      title_provider = LlmModel.find_by(model_id: title_result.model_id.to_s)&.provider || 'unknown'
      AiUsageLog.log!(
        provider:      title_provider,
        model_id:      title_result.model_id.to_s,
        feature:       'title_suggestion',
        input_tokens:  title_result.input_tokens,
        output_tokens: title_result.output_tokens,
        subject:       generated,
        account_id:    created_by_id
      )
    else
      log_info "Title suggestion skipped: #{title_result.error}"
    end

    if auto_import
      at 7, 'Auto-importing...'
      import_result = GeneratedImageImporter.call(
        generated,
        title:    generated.suggested_title.presence || 'AI Generated Image',
        tags:     source_record_tags_for(generated.source_record),
        notes:    "Auto-generated avatar. Prompt: #{prompt.truncate(200)}"
      )

      if import_result.success?
        log_info "Auto-imported as Image #{import_result.image.id}: #{import_result.notice}"
        store info_message: import_result.notice || 'Avatar generated and imported.',
              redirect_to:  @return_to
      else
        log_error "Auto-import failed: #{import_result.error}"
        store info_message: 'Image generated but auto-import failed — review it manually.',
              redirect_to:  generated_image_path(generated)
      end
    else
      at 7, 'Ready for review!'
      store info_message: 'Image generated and ready for import.',
            redirect_to:  generated_image_path(generated)
    end

    log_info "Created GeneratedImage #{generated.id}"
  ensure
    tempfile&.close
    tempfile&.unlink
  end
end