Class: CallRecordProcessing::TranscriptionService

Inherits:
Object
  • Object
show all
Defined in:
app/services/call_record_processing/transcription_service.rb

Overview

Service for transcribing call recordings using AssemblyAI with speaker diarization.
Audio files (.wav, .mp3, .aac, .oga) are transcribed and formatted with speaker labels.

Examples:

Basic usage

service = CallRecordProcessing::TranscriptionService.new(call_record)
result = service.transcribe

Force retranscription

service = CallRecordProcessing::TranscriptionService.new(call_record, force: true)
result = service.transcribe

Constant Summary collapse

MIN_DURATION_SECONDS =
30
MIN_DURATION_SECONDS_VOICEMAIL =
5
MAX_WAIT_TIME =

Maximum wait time for transcription (most calls are short)

600

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(call_record, options = {}) ⇒ TranscriptionService

Returns a new instance of TranscriptionService.



24
25
26
27
# File 'app/services/call_record_processing/transcription_service.rb', line 24

def initialize(call_record, options = {})
  @call_record = call_record
  @options = options.symbolize_keys
end

Instance Attribute Details

#call_recordObject (readonly)

10 minutes



22
23
24
# File 'app/services/call_record_processing/transcription_service.rb', line 22

def call_record
  @call_record
end

#optionsObject (readonly)

10 minutes



22
23
24
# File 'app/services/call_record_processing/transcription_service.rb', line 22

def options
  @options
end

Instance Method Details

#process_completed_transcript(result) ⇒ Object

Process a completed transcript (called by webhook worker or sync mode)
Formats the transcript, saves it, runs LeMUR analysis, and generates embeddings

Parameters:

  • result (Hash)

    The completed transcript from AssemblyAI



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'app/services/call_record_processing/transcription_service.rb', line 71

def process_completed_transcript(result)
  # Format the transcript with speaker diarization
  # This sets @agent_speaker_label or @use_role_labels for proper speaker identification
  formatted = format_speaker_diarization(result)

  # Determine the agent speaker label to save
  # - For name-based: The agent's actual name (legacy Slam-1 speech_understanding)
  # - For role-based: 'Agent' (literal label from API)
  # - For legacy/Universal-3 Pro A/B diarization: detected label ('A' or 'B')
  # - For multichannel: nil (uses channel mapping instead)
  detected_agent_label = if @use_name_labels
                           agent_name # Save the actual agent name
                         elsif @use_role_labels
                           'Agent'
                         elsif @agent_speaker_label.present?
                           @agent_speaker_label
                         end

  # Save transcription results first
  call_record.update!(
    transcript: formatted[:text],
    structured_transcript_json: result,
    transcription_state: :completed,
    transcribed_at: Time.current,
    assemblyai_transcript_id: result['id'],
    call_direction: detect_call_direction,
    agent_speaker_label: detected_agent_label
  )

  Rails.logger.info "[CallRecordTranscription] Completed transcription for CallRecord #{call_record.id}"

  # Run call analysis (summary, action items, etc.) if transcription succeeded
  run_call_analysis(result['id']) if result['id'].present?

  append_transcription_to_voicemail_activity if call_record.voicemail?
end

#run_call_analysis(transcript_id = nil) ⇒ Object

Run call analysis on the transcript using the LLM (summary, action items, etc.)
This replaces the separate CallRecordSummaryWorker.
Made public so it can be called independently for re-analysis.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'app/services/call_record_processing/transcription_service.rb', line 111

def run_call_analysis(transcript_id = nil)
  transcript_id ||= call_record.assemblyai_transcript_id
  return unless transcript_id.present?

  Rails.logger.info "[CallRecordTranscription] Running call analysis for transcript: #{transcript_id}"

  begin
    analysis = run_analysis_with_agent(transcript_id)

    # Update call record with analysis results
    call_record.update!(
      ai_summary: analysis['summary'],
      call_outcome: map_call_outcome(analysis['call_outcome']),
      customer_satisfaction: analysis['customer_satisfaction'],
      action_items: analysis['action_items'],
      call_phases: analysis['call_phases'],
      key_topics: analysis['key_topics'],
      agent_performance_score: analysis.dig('agent_performance', 'score'),
      summarized_at: Time.current,
      lemur_analyzed_at: Time.current
    )

    Rails.logger.info "[CallRecordTranscription] Call analysis completed for CallRecord #{call_record.id}"

    # Generate embedding with the enriched content (summary, action items, etc.)
    EmbeddingWorker.perform_async('CallRecord', call_record.id)
    Rails.logger.info "[CallRecordTranscription] Queued embedding generation for CallRecord #{call_record.id}"
  rescue StandardError => e
    # Don't fail the whole transcription if analysis fails
    Rails.logger.error "[CallRecordTranscription] Call analysis failed for CallRecord #{call_record.id}: #{e.message}"
    Rails.logger.error e.backtrace.first(5).join("\n")
  end
end

#transcribe(use_webhook: true) ⇒ Hash

Main transcription workflow - now uses webhooks by default
Submit transcription to AssemblyAI and exit immediately.
When transcription completes, AssemblyAI calls our webhook which triggers
AssemblyaiCompletionWorker to process the result.

Parameters:

  • use_webhook (Boolean) (defaults to: true)

    Use async webhook mode (default: true)

Returns:

  • (Hash)

    Result with :status and transcript_id or error



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'app/services/call_record_processing/transcription_service.rb', line 36

def transcribe(use_webhook: true)
  return skip_result(:already_transcribed) if already_transcribed? && !force?
  return skip_result(:too_short) if too_short?
  return skip_result(:no_audio) unless has_audio?

  begin
    mark_processing

    audio_url = get_audio_url
    return error_result(:no_audio_url) unless audio_url

    Rails.logger.info "[CallRecordTranscription] Starting transcription for CallRecord #{call_record.id}, audio: #{audio_url}"

    if use_webhook
      # Async mode: Submit and exit immediately, webhook will handle completion
      transcript_id = submit_transcription_with_webhook(audio_url)
      Rails.logger.info "[CallRecordTranscription] Submitted async transcription for CallRecord #{call_record.id}: #{transcript_id}"
      { status: :submitted, transcript_id: transcript_id, mode: :webhook }
    else
      # Sync mode: Poll for completion (legacy, slower but useful for testing)
      result = submit_and_poll_transcription(audio_url)
      process_completed_transcript(result)
      { status: :success, transcript_id: result['id'], word_count: call_record.transcript&.split&.size || 0 }
    end
  rescue StandardError => e
    Rails.logger.error "[CallRecordTranscription] Failed for CallRecord #{call_record.id}: #{e.message}"
    call_record.update!(transcription_state: :error)
    error_result(:transcription_failed, e.message)
  end
end