Class: SeoPageAnalysisWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker, Workers::StatusBroadcastable
Defined in:
app/workers/seo_page_analysis_worker.rb

Overview

Background worker for SEO page analysis.

Full mode (default) — orchestrates 9 steps:

  1. Crawl page content + schema (if missing or stale >24h)
  2. Sync first-party visit counts (from Visits table) -> data_points
  3. Sync GSC search performance (clicks, impressions, CTR) -> data_points
  4. Inspect URL indexing status via GSC URL Inspection API
  5. Sync GA4 engagement metrics (page views, sessions, engagement) -> data_points
  6. Sync keywords from Ahrefs (ranking data)
  7. Gather context for AI analysis
  8. Run AI-powered SEO analysis
  9. Save results

Analysis-only mode (skip_syncs: true) — 3 steps:

  1. Gather context
  2. Run AI-powered SEO analysis
  3. Save results

Usage:
job_id = SeoPageAnalysisWorker.perform_async(site_map_id, { return_path: '/path' })
redirect_to job_path(job_id)

Defined Under Namespace

Classes: SeoAnalysisError

Instance Attribute Summary

Attributes included from Workers::StatusBroadcastable

#broadcast_status_updates

Instance Method Summary collapse

Methods included from Workers::StatusBroadcastable::Overrides

#at, #store, #total

Instance Method Details

#perform(site_map_id, options = {}) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'app/workers/seo_page_analysis_worker.rb', line 34

def perform(site_map_id, options = {})
  options = options.with_indifferent_access
  store status: 'processing'
  store site_map_id: site_map_id

  site_map = SiteMap.find(site_map_id)
  skip_syncs = options[:skip_syncs].present?
  store page_path: site_map.path
  store locale: site_map.locale
  store mode: skip_syncs ? 'analysis_only' : 'full'

  if skip_syncs
    # Analysis-only mode: 3 steps (gather context, AI analysis, save results)
    total 3
    step = 0

    log_info "Analysis-only mode — skipping crawl and data syncs"
  else
    # Full mode: 9 steps (crawl, sync visits, sync GSC, inspect URL, sync GA4, sync keywords, gather context, AI analysis, save results)
    total 9
    step = 0

    # Step 1: Crawl page if content/schema missing or stale (>24h)
    step += 1
    at(step, 'Crawling page content...')
    crawl_stale = site_map.extracted_at.blank? || site_map.extracted_at < 24.hours.ago
    crawl_missing = site_map.extracted_content.blank? || site_map.rendered_schema.blank?
    if crawl_missing || crawl_stale
      reason = if crawl_missing
                 "missing: #{[('content' if site_map.extracted_content.blank?), ('schema' if site_map.rendered_schema.blank?)].compact.join(', ')}"
               else
                 "stale: last crawled #{((Time.current - site_map.extracted_at) / 1.hour).round}h ago"
               end
      log_info "Crawling page for content and schema (#{reason})"
      begin
        Cache::SiteCrawler.new.process(pages: SiteMap.where(id: site_map.id), extract_content: true)
        site_map.reload
        inbound_count  = site_map.inbound_links.editorial.count
        outbound_count = site_map.outbound_links.editorial.count
        log_info "Crawl complete — status: #{site_map.last_status}, content: #{site_map.extracted_content&.length || 0} chars, schemas: #{site_map.rendered_schema_types.join(', ').presence || 'none'}, links: #{outbound_count} outbound / #{inbound_count} inbound editorial"
      rescue StandardError => e
        log_warn "Crawl failed: #{e.message} - continuing"
      end
    else
      log_info "Content and schema present and fresh (crawled #{((Time.current - site_map.extracted_at) / 1.hour).round}h ago), skipping crawl"
    end

    # Step 2: Sync first-party visit counts
    step += 1
    at(step, 'Syncing visit counts...')
    log_info "Syncing visit counts for SiteMap #{site_map_id}"
    begin
      Seo::VisitsSyncService.new(site_map_ids: [site_map_id]).process
      site_map.reload
      record_visit_data_point(site_map)
      store visit_count_30d: site_map.visit_count_30d
      log_info "Visit count (30d): #{site_map.visit_count_30d}"
    rescue StandardError => e
      log_warn "Visit sync failed: #{e.message} - continuing"
    end

    # Step 3: Sync GSC search performance (skip if data is fresh <24h)
    step += 1
    at(step, 'Syncing Google Search Console data...')
    if site_map.seo_synced_at.present? && site_map.seo_synced_at > 24.hours.ago
      log_info "GSC data is fresh (synced #{((Time.current - site_map.seo_synced_at) / 1.hour).round}h ago), skipping"
      store seo_clicks: site_map.seo_clicks
    else
      log_info "Syncing GSC data for SiteMap #{site_map_id}"
      begin
        sync_gsc_for_page(site_map)
        site_map.reload
        store seo_clicks: site_map.seo_clicks
        log_info "GSC clicks (28d): #{site_map.seo_clicks}"
      rescue StandardError => e
        log_warn "GSC sync failed: #{e.message} - continuing"
      end
    end

    # Step 4: Inspect URL indexing status (skip if inspected <7d ago)
    step += 1
    at(step, 'Inspecting URL indexing status...')
    if site_map.google_inspected_at.present? && site_map.google_inspected_at > 7.days.ago
      log_info "URL inspection is fresh (inspected #{((Time.current - site_map.google_inspected_at) / 1.hour).round}h ago), skipping"
    else
      SiteMap.with_advisory_lock_result("gsc_inspect_#{site_map.id}", timeout_seconds: 0) do
        site_map.reload
        if site_map.google_inspected_at.present? && site_map.google_inspected_at > 7.days.ago
          log_info "URL inspection completed by another job, skipping"
        else
          begin
            inspect_url_indexing(site_map)
            site_map.reload
          rescue StandardError => e
            log_warn "URL inspection failed: #{e.message} - continuing"
          end
        end
      end
    end

    # Step 5: Sync GA4 engagement metrics
    step += 1
    at(step, 'Syncing Google Analytics data...')
    log_info "Syncing GA4 data for SiteMap #{site_map_id}"
    begin
      sync_ga4_for_page(site_map)
      log_info "GA4 data synced"
    rescue StandardError => e
      log_warn "GA4 sync failed: #{e.message} - continuing"
    end

    # Mark as synced now that all metric sources have been queried
    site_map.update_columns(seo_synced_at: Time.current)

    # Step 6: Sync keywords from Ahrefs (skip if data is fresh <24h)
    step += 1
    at(step, 'Syncing keywords from Ahrefs...')
    keywords_last_synced = site_map.seo_page_keywords.maximum(:updated_at)
    if keywords_last_synced.present? && keywords_last_synced > 24.hours.ago
      log_info "Keywords are fresh (updated #{((Time.current - keywords_last_synced) / 1.hour).round}h ago), skipping"
      store keywords_synced: site_map.seo_page_keywords.count
    else
      log_info "Syncing keywords for SiteMap #{site_map_id}"

      keyword_result = Seo::KeywordSyncService.new(site_map: site_map).process

      if keyword_result[:error]
        store keyword_sync_error: keyword_result[:error]
        log_warn "Keyword sync failed: #{keyword_result[:error]} - continuing with analysis"
      else
        store keywords_synced: keyword_result[:keywords_synced]
        log_info "Synced #{keyword_result[:keywords_synced]} keywords"
      end
    end
  end

  # Gather related content for analysis
  step += 1
  at(step, 'Gathering related content for analysis...')
  log_info "Gathering context for AI analysis"

  # Run AI analysis
  step += 1
  at(step, 'Running AI SEO analysis...')
  log_info "Running SEO analysis for SiteMap #{site_map_id}"

  analysis_model = options[:premium] ? Seo::PageAnalysisService::ANALYSIS_MODEL_PREMIUM : nil
  analysis_opts = { site_map: site_map, force: true }
  analysis_opts[:model] = analysis_model if analysis_model
  result = Seo::PageAnalysisService.new(analysis_opts).process

  # Store results
  step += 1
  at(step, 'Saving analysis results...')

  return_path = options[:return_path] || site_map_path(site_map)

  if result[:error]
    store status: 'error'
    store error: result[:error]
    store redirect_to: return_path
    log_error "SEO analysis failed for SiteMap #{site_map_id}: #{result[:error]}"
    raise SeoAnalysisError, result[:error]
  else
    store status: 'completed'
    store overall_score: result['overall_score']
    store recommendations_count: result['recommendations']&.size || 0
    store redirect_to: return_path
    log_info "SEO analysis completed for SiteMap #{site_map_id}: Score #{result['overall_score']}/100"

    begin
      site_map.reload
      extract_stats = Seo::RecommendationExtractorService.new(site_map: site_map).process
      log_info "Recommendations extracted: #{extract_stats[:extracted]} new, #{extract_stats[:updated]} updated"
    rescue StandardError => e
      log_warn "Recommendation extraction failed: #{e.message} - continuing"
    end
  end
rescue StandardError => e
  store status: 'error'
  store error: e.message
  store redirect_to: options[:return_path] || site_maps_path
  log_error "SEO analysis failed for SiteMap #{site_map_id}: #{e.message}"
  raise e
end