Class: VideoTranscriptionWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker, Workers::StatusBroadcastable
Defined in:
app/workers/video_transcription_worker.rb

Instance Attribute Summary

Attributes included from Workers::StatusBroadcastable

#broadcast_status_updates

Instance Method Summary collapse

Methods included from Workers::StatusBroadcastable::Overrides

#at, #store, #total

Instance Method Details

#perform(video_id, redirect_path = nil, options = {}) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'app/workers/video_transcription_worker.rb', line 7

def perform(video_id, redirect_path = nil, options = {})
  @video = Video.find(video_id)
  @redirect_path = redirect_path || "#{CRM_URL}/videos/#{@video.id}"
  @options = options.symbolize_keys

  # Debug logging to see what options are being passed
  Rails.logger.debug('VideoTranscriptionWorker starting', video_id: @options&.dig(:video_id))

  # Create a single transcription service instance to reuse throughout
  @transcription_service = VideoProcessing::TranscriptionService.new(@video, @options)

  # Set initial status
  total(100)
  at(0, 'Starting video transcription process...')

  begin
    # Step 1: Verify Cloudflare audio download is ready if needed for transcription
    if @options[:submit_transcription]
      raise 'Cloudflare audio download is not ready. Please wait for the audio download to complete before submitting for transcription.' unless @video.audio_download_ready?

      at(30, 'Cloudflare audio download verified...')
    else
      at(30, 'Skipping audio verification...')
    end

    # Step 2: Submit to AssemblyAI for transcription if requested
    # Uses webhook mode by default - AssemblyAI will call back when complete
    transcription_result = nil
     = false
    if @options[:submit_transcription]
      at(40, 'Submitting audio to AssemblyAI for transcription...')

      transcription_result = submit_to_assemblyai_with_webhook

       = transcription_result && transcription_result[:mode] == :webhook
      if 
        at(60, 'Transcription submitted to AssemblyAI - you will be notified by email when complete')

        # When using webhook mode with only submission, complete the job early
        # The webhook will handle the rest (retrieval, polishing, SEO, etc.)
        if !@options[:retrieve_transcript] && !@options[:polish_transcript] && !@options[:generate_seo]
          store status: 'completed'
          at(100, 'Transcription submitted! You will receive an email when processing completes.')
          store redirect_to: @redirect_path
          return
        end
      else
        at(60, 'Transcription submission completed, but completion status unclear')
      end
    else
      at(60, 'Skipping transcription submission...')
    end

    # Step 3: Retrieve and process structured transcript if requested
    # Only proceed if we have an existing transcript ID (from previous submission or this one)
    # Note: When using webhook mode, retrieval happens in the webhook handler
    if @options[:retrieve_transcript]
      if @video.has_assemblyai_transcript_id?
        at(70, 'Retrieving VTT from AssemblyAI...')

        # Use our new three-step workflow
        if @video.has_assemblyai_transcript_id?
          # Step 1: Retrieve VTT from AssemblyAI
          at(72, 'Retrieving VTT captions from AssemblyAI...')
          @transcription_service.retrieve_and_overwrite_structured_transcript
          at(75, 'VTT captions retrieved successfully')

          # Step 2: Polish transcript if requested
          if @options[:polish_transcript]
            at(77, 'Polishing transcript with company terminology...')
            @transcription_service.polish_transcript_with_company_terminology
            at(80, 'Transcript polished successfully')
          end

          # Step 3: Translate transcript if requested
          if @options[:translate_transcript]
            at(82, 'Starting translation to target locales...')
            translation_locales = parse_translation_locales
            if translation_locales.any?
              @transcription_service.translate_transcript(translation_locales) do |progress, message|
                at(progress, message)
              end
              at(87, "Transcript translated to #{translation_locales.join(', ')}")
            else
              at(82, 'No translation locales selected, skipping translation')
            end
          end

          # Step 4: Upload VTT to Cloudflare if requested
          if @options[:upload_vtt_to_cloudflare]
            if @video.has_polished_vtt?
              at(89, 'Uploading polished VTT captions to Cloudflare...')
              upload_vtt_to_cloudflare
              at(92, 'VTT captions uploaded to Cloudflare successfully')
            else
              at(89, 'Cannot upload VTT to Cloudflare - no polished VTT available')
            end
          end

          # Step 5: Generate SEO metadata if requested
          if @options[:generate_seo]
            at(94, 'Generating SEO metadata...')
            @transcription_service.
            at(97, 'SEO metadata generated successfully')
          end
        else
          at(75, 'No AssemblyAI transcript ID found - cannot retrieve VTT')
        end
        at(95, 'VTT processing completed')
      else
        at(70, 'Cannot retrieve transcript - no AssemblyAI transcript ID. Submit transcription first.')
      end
    else
      at(95, 'Skipping VTT retrieval...')
    end

    # Step 3.5: Polish existing transcript if requested (standalone step)
    if @options[:polish_transcript] && !@options[:retrieve_transcript] && @video.has_structured_transcript_json?
      at(75, 'Polishing existing transcript with company terminology...')

      @transcription_service.polish_transcript_with_company_terminology
      at(80, 'Transcript polishing completed')
    elsif @options[:polish_transcript] && !@video.has_structured_transcript_json?
      at(75, 'Cannot polish transcript - no structured transcript available')
    end

    # Step 3.6: Translate existing transcript if requested (standalone step)
    if @options[:translate_transcript] && !@options[:retrieve_transcript] && @video.has_polished_vtt?
      at(82, 'Starting translation of existing transcript...')
      translation_locales = parse_translation_locales
      if translation_locales.any?
        @transcription_service.translate_transcript(translation_locales) do |progress, message|
          at(progress, message)
        end
        at(87, "Translation completed for #{translation_locales.join(', ')}")
      else
        at(82, 'No translation locales selected, skipping translation')
      end
    elsif @options[:translate_transcript] && !@video.has_polished_vtt?
      at(82, 'Cannot translate - no polished VTT available')
    end

    # Step 3.7: Upload VTT to Cloudflare if requested (standalone step)
    if @options[:upload_vtt_to_cloudflare] && !@options[:retrieve_transcript]
      if @video.has_polished_vtt?
        at(89, 'Uploading polished VTT captions to Cloudflare...')
        upload_vtt_to_cloudflare
        at(92, 'VTT captions uploaded to Cloudflare successfully')
      else
        at(89, 'Cannot upload VTT to Cloudflare - no polished VTT available')
      end
    end

    # Step 4: Generate SEO content if requested (standalone step)
    if @options[:generate_seo] && !@options[:retrieve_transcript]
      at(94, 'Generating SEO metadata...')

      @transcription_service.
      at(97, 'SEO metadata generated')
    elsif @options[:generate_seo] && !@video.has_structured_transcript_json?
      at(94, 'Cannot generate SEO - no structured transcript available')
    end

    # Complete
    store status: 'completed'
    at(100, 'Video transcription completed successfully!')
    store redirect_to: @redirect_path
  rescue StandardError => e
    error_message = e.message
    Rails.logger.error "Video transcription failed for video #{@video.id}: #{error_message}"
    Rails.logger.error e.backtrace.join("\n")

    # Provide more user-friendly error messages for common issues
    user_friendly_message = if error_message.include?('timeout')
                              "Transcription timed out. This can happen with longer videos. The transcription may still be processing on AssemblyAI's servers. You can try retrieving the transcript again in a few minutes."
                            elsif error_message.include?('AssemblyAI')
                              "AssemblyAI transcription service error: #{error_message}"
                            else
                              error_message
                            end

    # Store failure status and error message for the job progress page
    store status: 'failed'
    store error_message: user_friendly_message
    store redirect_to: @redirect_path

    # Report to AppSignal
    ErrorReporting.error(e, source: :background, video_id: @video.id)
  end
end