Class: Seo::PageAnalysisService

Inherits:
BaseService show all
Defined in:
app/services/seo/page_analysis_service.rb

Overview

AI-powered SEO analysis for individual pages using RubyLLM.
Generates strategic recommendations based on content type, current metrics,
and semantically related content from the site's embedding database.

Examples:

Analyze a page

site_map = SiteMap.find(123)
result = Seo::PageAnalysisService.new(site_map: site_map).process

Force re-analysis

Seo::PageAnalysisService.new(site_map: site_map, force: true).process

Constant Summary collapse

ANALYSIS_MODEL =
AiModelConstants.id(:seo_analysis)
ANALYSIS_MODEL_PREMIUM =
AiModelConstants.id(:anthropic_opus)
MAX_OUTPUT_TOKENS =
32_768
TEMPERATURE =
0.3
THINKING_BUDGET =

Thinking budget for premium analyses (Opus with extended thinking).
Medium effort gives meaningful reasoning improvement without excessive cost.

10_000
ANALYSIS_SCHEMA =

Schema for structured AI output.

  • Gemini (standard): enforced natively via responseJsonSchema (Gemini 2.5+)
  • OpenAI: enforced natively via response_format
  • Anthropic (premium): schema exceeds grammar compiler limits, so we use
    prompt-based enforcement + JSON parsing fallbacks for Claude models.
    Uses Chain-of-Thought pattern: current_state_analysis -> recommendations
{
  type: 'object',
  additionalProperties: false,
  properties: {
    # PHASE 1: Chain-of-Thought Analysis (reasoning before recommendations)
    current_state_analysis: {
      type: 'object',
      additionalProperties: false,
      description: 'ANALYZE FIRST before recommending. Document what you observe.',
      properties: {
        title_analysis: {
          type: 'object',
          additionalProperties: false,
          properties: {
            current_title: { type: 'string', description: 'Copy the exact current title from the input' },
            contains_primary_keyword: { type: 'boolean', description: 'Does the title contain the primary topic keyword?' },
            title_length: { type: 'string', enum: %w[too_short optimal too_long] },
            assessment: { type: 'string', description: 'Brief assessment of title quality' }
          },
          required: %w[current_title contains_primary_keyword title_length assessment]
        },
        content_analysis: {
          type: 'object',
          additionalProperties: false,
          properties: {
            word_count_estimate: { type: 'string', enum: %w[thin adequate comprehensive] },
            has_faq_section: { type: 'boolean' },
            has_internal_links: { type: 'boolean' },
            missing_elements: { type: 'array', items: { type: 'string' }, description: 'What SEO elements are missing' }
          },
          required: %w[word_count_estimate has_faq_section has_internal_links missing_elements]
        },
        keyword_analysis: {
          type: 'object',
          additionalProperties: false,
          properties: {
            current_ranking_keywords: { type: 'array', items: { type: 'string' }, description: 'Top 3 keywords page currently ranks for' },
            topic_match: { type: 'string', enum: %w[strong moderate weak none], description: 'How well do ranking keywords match page topic?' },
            untapped_opportunities: { type: 'array', items: { type: 'string' }, description: 'Keywords from research not yet targeted' }
          },
          required: %w[current_ranking_keywords topic_match untapped_opportunities]
        },
        schema_analysis: {
          type: 'object',
          additionalProperties: false,
          description: 'Analysis of JSON-LD structured data currently rendered on the page',
          properties: {
            schemas_present: { type: 'array', items: { type: 'string' }, description: 'Schema types found on the rendered page (e.g., FAQPage, Article, HowTo, Product, BreadcrumbList)' },
            schemas_expected: { type: 'array', items: { type: 'string' }, description: 'Schema types that SHOULD be on this page type but are missing' },
            faq_count: { type: 'integer', description: 'Number of FAQ questions in FAQPage schema (0 if no FAQPage)' },
            schema_quality: { type: 'string', enum: %w[excellent good needs_improvement poor not_crawled], description: 'Overall quality assessment of structured data' },
            aio_readiness: { type: 'string', enum: %w[strong moderate weak], description: 'How well the structured data supports AI Overview / AI search citation' },
            assessment: { type: 'string', description: 'Brief assessment of schema coverage and AIO readiness' }
          },
          required: %w[schemas_present schemas_expected faq_count schema_quality aio_readiness assessment]
        }
      },
      required: %w[title_analysis content_analysis keyword_analysis schema_analysis]
    },

    # PHASE 2: Scored Assessment
    overall_score: { type: 'integer', description: 'SEO health score 0-100 based on Phase 1 analysis' },
    summary: { type: 'string', description: '2-3 sentence summary referencing specific findings from Phase 1' },
    competitive_position: { type: 'string', enum: %w[leading competitive average struggling] },

    # Strengths (what's already working - from Phase 1)
    strengths: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          finding: { type: 'string', description: 'What is working well' },
          evidence: { type: 'string', description: 'Data point supporting this (quote from input data)' }
        },
        required: %w[finding evidence]
      },
      description: 'What the page is doing well (3-5 items with evidence)'
    },

    # Opportunities (gaps found in Phase 1)
    opportunities: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          gap: { type: 'string', description: 'What is missing or could be improved' },
          evidence: { type: 'string', description: 'Data supporting this gap exists' },
          recommendation: { type: 'string', description: 'Specific action to address the gap' }
        },
        required: %w[gap evidence recommendation]
      },
      description: 'Improvement opportunities (3-5 items with evidence)'
    },

    # PHASE 3: Specific Recommendations (grounded in provided data)
    keyword_strategy: {
      type: 'object',
      additionalProperties: false,
      properties: {
        primary_keyword: { type: 'string', description: 'Best keyword from PROVIDED keyword research' },
        primary_keyword_source: { type: 'string', description: 'Where this keyword came from: designated_targets, ranking_data, keyword_research, or people_also_ask' },
        keyword_in_title: { type: 'boolean', description: 'Is this keyword already in the current title? (check Phase 1)' },
        title_action: { type: 'string', enum: %w[keep_current modify], description: 'Should title be changed?' },
        suggested_title: { type: 'string', description: 'Only if title_action is modify. Otherwise copy current title.' },
        secondary_keywords: {
          type: 'array',
          items: {
            type: 'object',
            additionalProperties: false,
            properties: {
              keyword: { type: 'string' },
              source: { type: 'string', description: 'Where this keyword appears in the input data' },
              placement: { type: 'string' }
            },
            required: %w[keyword source placement]
          }
        }
      },
      required: %w[primary_keyword primary_keyword_source keyword_in_title title_action suggested_title secondary_keywords]
    },

    internal_linking: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          target_url: { type: 'string', description: 'URL from "Available Content" section ONLY' },
          target_title: { type: 'string' },
          anchor_text: { type: 'string' },
          placement: { type: 'string' },
          rationale: { type: 'string' }
        },
        required: %w[target_url target_title anchor_text placement rationale]
      },
      description: 'Links from "Available Content" section only - NOT from "Existing Links"'
    },

    faq_recommendations: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          faq_id: { type: 'integer', description: 'ID from "FAQs to Add" section ONLY' },
          question: { type: 'string' },
          rationale: { type: 'string' }
        },
        required: %w[faq_id question rationale]
      },
      description: 'FAQs from "FAQs to Add" section only - NOT from "Existing FAQs"'
    },

    people_also_ask_content: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          question: { type: 'string', description: 'Question from People Also Ask section' },
          suggested_answer: { type: 'string' },
          placement: { type: 'string' },
          ai_citation_value: { type: 'string', description: 'How this helps the page get cited by AI search engines' }
        },
        required: %w[question suggested_answer placement ai_citation_value]
      }
    },

    content_recommendations: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          recommendation: { type: 'string' },
          location: { type: 'string' },
          rationale: { type: 'string' },
          ai_search_benefit: { type: 'string', description: 'How this helps with AI search citations (ChatGPT, Perplexity, etc.)' }
        },
        required: %w[recommendation location rationale ai_search_benefit]
      }
    },

    technical_recommendations: {
      type: 'array',
      items: { type: 'string' }
    },

    structured_data_recommendations: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          schema_type: { type: 'string', description: 'Schema.org type (e.g., FAQPage, HowTo, Product, Article)' },
          action: { type: 'string', enum: %w[add expand update remove], description: 'What to do with this schema' },
          details: { type: 'string', description: 'Specific changes needed (e.g., "Add 8 more FAQ questions covering lifespan, maintenance, disadvantages")' },
          aio_benefit: { type: 'string', description: 'How this helps with AI search citations and AIO' }
        },
        required: %w[schema_type action details aio_benefit]
      },
      description: 'Specific actions to improve JSON-LD structured data for SEO and AIO'
    },

    aio_recommendations: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          recommendation: { type: 'string', description: 'Specific action to improve AI search visibility' },
          category: { type: 'string', enum: %w[content_structure answer_targeting schema_markup authority_signals freshness], description: 'Category of AIO recommendation' },
          rationale: { type: 'string', description: 'Why this matters for AI search engines (ChatGPT, Perplexity, Google AI Overview)' },
          impact: { type: 'string', enum: %w[high medium low] }
        },
        required: %w[recommendation category rationale impact]
      },
      description: 'Recommendations specifically for AI search optimization (AIO / GEO)'
    },

    paid_organic_synergy: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          keyword: { type: 'string', description: 'The keyword or search term' },
          action: { type: 'string', description: 'Specific action: reduce bid, create content, pause campaign, etc.' },
          rationale: { type: 'string', description: 'Why — reference organic position, CPC, spend, and potential savings' },
          estimated_monthly_impact: { type: 'string', description: 'Estimated monthly savings or revenue impact (e.g. "Save ~$500/mo")' },
          impact: { type: 'string', enum: %w[high medium low] }
        },
        required: %w[keyword action rationale estimated_monthly_impact impact]
      },
      description: 'Paid/organic synergy recommendations — only when Google Ads data is provided'
    },

    # Priority actions with evidence chain
    priority_actions: {
      type: 'array',
      items: {
        type: 'object',
        additionalProperties: false,
        properties: {
          action: { type: 'string', description: 'Specific action with exact details' },
          evidence: { type: 'string', description: 'Why this action is needed (reference input data)' },
          impact: { type: 'string', enum: %w[high medium low] },
          effort: { type: 'string', enum: %w[high medium low] }
        },
        required: %w[action evidence impact effort]
      },
      description: 'Top 3-5 priority actions with evidence'
    }
  },
  required: %w[current_state_analysis overall_score summary competitive_position strengths opportunities keyword_strategy internal_linking faq_recommendations people_also_ask_content content_recommendations technical_recommendations
               structured_data_recommendations aio_recommendations paid_organic_synergy priority_actions]
}.freeze

Instance Method Summary collapse

Methods inherited from BaseService

#log_debug, #log_error, #log_info, #log_warning, #logger, #options, #tagged_logger

Constructor Details

#initialize(options = {}) ⇒ PageAnalysisService

Returns a new instance of PageAnalysisService.

Raises:

  • (ArgumentError)


292
293
294
295
296
297
298
# File 'app/services/seo/page_analysis_service.rb', line 292

def initialize(options = {})
  super
  @site_map = options[:site_map]
  @force = options[:force] || false
  @model = options[:model] || ANALYSIS_MODEL
  raise ArgumentError, 'site_map is required' unless @site_map
end

Instance Method Details

#build_promptsObject

Build prompts for the Claude Batch API without making any AI call.
Returns a hash with everything needed to submit a batch request:
{ system_prompt:, user_prompt:, model:, schema:, max_tokens:, temperature: }

Used by SeoBatchCollectorWorker to prepare batch items for the
Anthropic Message Batches API (50% cost reduction).



328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
# File 'app/services/seo/page_analysis_service.rb', line 328

def build_prompts
  gather_context

  params = { system_prompt: system_prompt, user_prompt: user_prompt,
             model: @model, schema: ANALYSIS_SCHEMA }

  if premium?
    params.merge!(max_tokens: THINKING_BUDGET + MAX_OUTPUT_TOKENS, temperature: 1,
                   thinking: { type: 'enabled', budget_tokens: THINKING_BUDGET })
  else
    params.merge!(max_tokens: MAX_OUTPUT_TOKENS, temperature: TEMPERATURE)
  end

  params
rescue StandardError => e
  @logger.error "[PageAnalysisService] Prompt build failed for SiteMap #{@site_map.id}: #{e.message}"
  @logger.error e.backtrace.first(5).join("\n")
  { error: e.message }
end

#processObject



300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'app/services/seo/page_analysis_service.rb', line 300

def process
  # Skip if recent analysis exists and not forcing
  if !@force && recent_analysis?
    @logger.info "[PageAnalysisService] Using cached analysis for SiteMap #{@site_map.id}"
    return @site_map.seo_report
  end

  @logger.info "[PageAnalysisService] Analyzing SiteMap #{@site_map.id}"

  # Gather all context needed for analysis
  gather_context

  analysis = generate_analysis
  save_analysis(analysis)

  analysis
rescue StandardError => e
  @logger.error "[PageAnalysisService] Analysis failed: #{e.message}"
  @logger.error e.backtrace.first(5).join("\n")
  { error: e.message }
end