Class: SeoPageAnalysisWorker
- Inherits:
-
Object
- Object
- SeoPageAnalysisWorker
- Includes:
- Sidekiq::Worker, Workers::StatusBroadcastable
- Defined in:
- app/workers/seo_page_analysis_worker.rb
Overview
Background worker for SEO page analysis.
Full mode (default) — orchestrates 9 steps:
- Crawl page content + schema (if missing or stale >24h)
- Sync first-party visit counts (from Visits table) -> data_points
- Sync GSC search performance (clicks, impressions, CTR) -> data_points
- Inspect URL indexing status via GSC URL Inspection API
- Sync GA4 engagement metrics (page views, sessions, engagement) -> data_points
- Sync keywords from Ahrefs (ranking data)
- Gather context for AI analysis
- Run AI-powered SEO analysis
- Save results
Analysis-only mode (skip_syncs: true) — 3 steps:
- Gather context
- Run AI-powered SEO analysis
- Save results
Usage:
job_id = SeoPageAnalysisWorker.perform_async(site_map_id, { return_path: '/path' })
redirect_to job_path(job_id)
Defined Under Namespace
Classes: SeoAnalysisError
Instance Attribute Summary
Attributes included from Workers::StatusBroadcastable
Instance Method Summary collapse
Methods included from Workers::StatusBroadcastable::Overrides
Instance Method Details
#perform(site_map_id, options = {}) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
# File 'app/workers/seo_page_analysis_worker.rb', line 34 def perform(site_map_id, = {}) = .with_indifferent_access store status: 'processing' store site_map_id: site_map_id site_map = SiteMap.find(site_map_id) skip_syncs = [:skip_syncs].present? store page_path: site_map.path store locale: site_map.locale store mode: skip_syncs ? 'analysis_only' : 'full' if skip_syncs # Analysis-only mode: 3 steps (gather context, AI analysis, save results) total 3 step = 0 log_info "Analysis-only mode — skipping crawl and data syncs" else # Full mode: 9 steps (crawl, sync visits, sync GSC, inspect URL, sync GA4, sync keywords, gather context, AI analysis, save results) total 9 step = 0 # Step 1: Crawl page if content/schema missing or stale (>24h) step += 1 at(step, 'Crawling page content...') crawl_stale = site_map.extracted_at.blank? || site_map.extracted_at < 24.hours.ago crawl_missing = site_map.extracted_content.blank? || site_map.rendered_schema.blank? if crawl_missing || crawl_stale reason = if crawl_missing "missing: #{[('content' if site_map.extracted_content.blank?), ('schema' if site_map.rendered_schema.blank?)].compact.join(', ')}" else "stale: last crawled #{((Time.current - site_map.extracted_at) / 1.hour).round}h ago" end log_info "Crawling page for content and schema (#{reason})" begin Cache::SiteCrawler.new.process(pages: SiteMap.where(id: site_map.id), extract_content: true) site_map.reload inbound_count = site_map.inbound_links.editorial.count outbound_count = site_map.outbound_links.editorial.count log_info "Crawl complete — status: #{site_map.last_status}, content: #{site_map.extracted_content&.length || 0} chars, schemas: #{site_map.rendered_schema_types.join(', ').presence || 'none'}, links: #{outbound_count} outbound / #{inbound_count} inbound editorial" rescue StandardError => e log_warn "Crawl failed: #{e.} - continuing" end else log_info "Content and schema present and fresh (crawled #{((Time.current - site_map.extracted_at) / 1.hour).round}h ago), skipping crawl" end # Step 2: Sync first-party visit counts step += 1 at(step, 'Syncing visit counts...') log_info "Syncing visit counts for SiteMap #{site_map_id}" begin Seo::VisitsSyncService.new(site_map_ids: [site_map_id]).process site_map.reload record_visit_data_point(site_map) store visit_count_30d: site_map.visit_count_30d log_info "Visit count (30d): #{site_map.visit_count_30d}" rescue StandardError => e log_warn "Visit sync failed: #{e.} - continuing" end # Step 3: Sync GSC search performance (skip if data is fresh <24h) step += 1 at(step, 'Syncing Google Search Console data...') if site_map.seo_synced_at.present? && site_map.seo_synced_at > 24.hours.ago log_info "GSC data is fresh (synced #{((Time.current - site_map.seo_synced_at) / 1.hour).round}h ago), skipping" store seo_clicks: site_map.seo_clicks else log_info "Syncing GSC data for SiteMap #{site_map_id}" begin sync_gsc_for_page(site_map) site_map.reload store seo_clicks: site_map.seo_clicks log_info "GSC clicks (28d): #{site_map.seo_clicks}" rescue StandardError => e log_warn "GSC sync failed: #{e.} - continuing" end end # Step 4: Inspect URL indexing status (skip if inspected <7d ago) step += 1 at(step, 'Inspecting URL indexing status...') if site_map.google_inspected_at.present? && site_map.google_inspected_at > 7.days.ago log_info "URL inspection is fresh (inspected #{((Time.current - site_map.google_inspected_at) / 1.hour).round}h ago), skipping" else SiteMap.with_advisory_lock_result("gsc_inspect_#{site_map.id}", timeout_seconds: 0) do site_map.reload if site_map.google_inspected_at.present? && site_map.google_inspected_at > 7.days.ago log_info "URL inspection completed by another job, skipping" else begin inspect_url_indexing(site_map) site_map.reload rescue StandardError => e log_warn "URL inspection failed: #{e.} - continuing" end end end end # Step 5: Sync GA4 engagement metrics step += 1 at(step, 'Syncing Google Analytics data...') log_info "Syncing GA4 data for SiteMap #{site_map_id}" begin sync_ga4_for_page(site_map) log_info "GA4 data synced" rescue StandardError => e log_warn "GA4 sync failed: #{e.} - continuing" end # Mark as synced now that all metric sources have been queried site_map.update_columns(seo_synced_at: Time.current) # Step 6: Sync keywords from Ahrefs (skip if data is fresh <24h) step += 1 at(step, 'Syncing keywords from Ahrefs...') keywords_last_synced = site_map.seo_page_keywords.maximum(:updated_at) if keywords_last_synced.present? && keywords_last_synced > 24.hours.ago log_info "Keywords are fresh (updated #{((Time.current - keywords_last_synced) / 1.hour).round}h ago), skipping" store keywords_synced: site_map.seo_page_keywords.count else log_info "Syncing keywords for SiteMap #{site_map_id}" keyword_result = Seo::KeywordSyncService.new(site_map: site_map).process if keyword_result[:error] store keyword_sync_error: keyword_result[:error] log_warn "Keyword sync failed: #{keyword_result[:error]} - continuing with analysis" else store keywords_synced: keyword_result[:keywords_synced] log_info "Synced #{keyword_result[:keywords_synced]} keywords" end end end # Gather related content for analysis step += 1 at(step, 'Gathering related content for analysis...') log_info "Gathering context for AI analysis" # Run AI analysis step += 1 at(step, 'Running AI SEO analysis...') log_info "Running SEO analysis for SiteMap #{site_map_id}" analysis_model = [:premium] ? Seo::PageAnalysisService::ANALYSIS_MODEL_PREMIUM : nil analysis_opts = { site_map: site_map, force: true } analysis_opts[:model] = analysis_model if analysis_model result = Seo::PageAnalysisService.new(analysis_opts).process # Store results step += 1 at(step, 'Saving analysis results...') return_path = [:return_path] || site_map_path(site_map) if result[:error] store status: 'error' store error: result[:error] store redirect_to: return_path log_error "SEO analysis failed for SiteMap #{site_map_id}: #{result[:error]}" raise SeoAnalysisError, result[:error] else store status: 'completed' store overall_score: result['overall_score'] store recommendations_count: result['recommendations']&.size || 0 store redirect_to: return_path log_info "SEO analysis completed for SiteMap #{site_map_id}: Score #{result['overall_score']}/100" begin site_map.reload extract_stats = Seo::RecommendationExtractorService.new(site_map: site_map).process log_info "Recommendations extracted: #{extract_stats[:extracted]} new, #{extract_stats[:updated]} updated" rescue StandardError => e log_warn "Recommendation extraction failed: #{e.} - continuing" end end rescue StandardError => e store status: 'error' store error: e. store redirect_to: [:return_path] || site_maps_path log_error "SEO analysis failed for SiteMap #{site_map_id}: #{e.}" raise e end |