Class: ContentEmbedding

Inherits:
ApplicationRecord show all
Includes:
UnifiedSearchable
Defined in:
app/models/content_embedding.rb,
app/models/content_embedding/item_embedding.rb,
app/models/content_embedding/post_embedding.rb,
app/models/content_embedding/image_embedding.rb,
app/models/content_embedding/video_embedding.rb,
app/models/content_embedding/article_embedding.rb,
app/models/content_embedding/activity_embedding.rb,
app/models/content_embedding/showcase_embedding.rb,
app/models/content_embedding/site_map_embedding.rb,
app/models/content_embedding/reviews_io_embedding.rb,
app/models/content_embedding/call_record_embedding.rb,
app/models/content_embedding/product_line_embedding.rb

Overview

== Schema Information

Table name: content_embeddings_product_lines
Database name: primary

id :bigint not null, primary key
content_hash :string(32)
content_type :string default("primary"), not null
embeddable_type :string not null
embedding_dimensions :integer
embedding_model :string
locale :string default("en")
model :string
token_count :integer
unified_embedding :vector
created_at :timestamp not null
updated_at :timestamp not null
embeddable_id :bigint not null

Indexes

idx_content_embeddings_product_lines_embedding_model (embedding_model)
idx_content_embeddings_product_lines_unique (embeddable_id,content_type,locale) UNIQUE

Foreign Keys

fk_content_embeddings_product_lines_embeddable (embeddable_id => product_lines.id) ON DELETE => cascade

Defined Under Namespace

Modules: TextSearchable, UnifiedSearchable Classes: ActivityEmbedding, ArticleEmbedding, CallRecordEmbedding, ImageEmbedding, ItemEmbedding, PostEmbedding, ProductLineEmbedding, ReviewsIoEmbedding, ShowcaseEmbedding, SiteMapEmbedding, VideoEmbedding

Constant Summary collapse

UNIFIED_MODEL =

Unified model (GA) — the only embedding model written. Sourced from the
canonical registry (config/initializers/ai_model_constants.rb); one
cross-modal model embeds both text and images.

AiModelConstants.id(:unified_embedding)
UNIFIED_DIMENSIONS =

MRL output width used for all embeddings (HNSW-compatible; ≤ pgvector 2000 limit).

1536
UNIFIED_MODEL_PREVIEW =

Transitional pre-GA id, still matched on read while older rows re-embed.
Drop once the backfill reaches zero (by_model(UNIFIED_MODEL_PREVIEW).none?).

"#{UNIFIED_MODEL}-preview"
UNIFIED_MODELS =

GA id + the transitional preview id. Searches match both while the corpus
re-embeds.

[UNIFIED_MODEL, UNIFIED_MODEL_PREVIEW].freeze
LEGACY_UNIFIED_MODEL =

Legacy model pending migration to UNIFIED_MODEL (not in the active registry).

'jina-embeddings-v4'
EMBEDDING_MODELS =

Known embedding models and their dimensions.
NOTE: HNSW indexes have a 2000 dimension limit in pgvector.

{
  UNIFIED_MODEL => { dimensions: UNIFIED_DIMENSIONS, type: :multimodal },
  UNIFIED_MODEL_PREVIEW => { dimensions: UNIFIED_DIMENSIONS, type: :multimodal },
  LEGACY_UNIFIED_MODEL => { dimensions: UNIFIED_DIMENSIONS, type: :multimodal }
}.freeze
SENSITIVE_TYPES =

Types that contain sensitive data and should NOT be exposed via MCP

%w[CallRecord Activity Communication].freeze
SEMANTIC_SIMILARITY_THRESHOLD =

Minimum similarity threshold for semantic search (0.0-1.0)
Results below this similarity are excluded as noise
0 = no filtering (default), 0.1 = very permissive, 0.3 = moderate, 0.5 = strict

0.0

Constants included from Schedulable

Schedulable::SIMPLE_FORM_OPTIONS

Instance Attribute Summary collapse

Belongs to collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from ApplicationRecord

ransackable_associations, ransackable_attributes, ransackable_scopes, ransortable_attributes, #to_relation

Methods included from Schedulable

config

Methods included from Models::AfterCommittable

#after_commit

Methods included from Models::EventPublishable

#publish_event

Instance Attribute Details

#content_hashObject (readonly)

content_hash is only required for text embeddings, not unified embeddings
Unified rows (content_type='unified') don't need content_hash

Validations (unless => -> { content_type == 'unified' } ):



154
# File 'app/models/content_embedding.rb', line 154

validates :content_hash, presence: true, unless: -> { content_type == 'unified' }

#content_typeObject (readonly)

Validations

Validations:



151
# File 'app/models/content_embedding.rb', line 151

validates :content_type, presence: true

#embeddable_typeObject (readonly)



155
156
157
158
# File 'app/models/content_embedding.rb', line 155

validates :embeddable_type, inclusion: {
  in: %w[Post Article Showcase Video Image Item ProductLine SiteMap ReviewsIo CallRecord AssistantBrainEntry Activity Communication],
  message: '%<value>s is not a supported embeddable type'
}

Class Method Details

.active_imagesActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are active images. Active Record Scope

Returns:

See Also:



243
244
245
246
247
# File 'app/models/content_embedding.rb', line 243

scope :active_images, -> {
  where(embeddable_type: 'Image')
    .joins('INNER JOIN digital_assets ON digital_assets.id = content_embeddings.embeddable_id')
    .where(digital_assets: { inactive: false })
}

.active_publicationsActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are active publications. Active Record Scope

Returns:

See Also:



279
280
281
282
283
# File 'app/models/content_embedding.rb', line 279

scope :active_publications, -> {
  where(embeddable_type: 'Item')
    .joins('INNER JOIN items ON items.id = content_embeddings.embeddable_id')
    .where(items: { is_discontinued: false })
}

.active_reviewsActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are active reviews. Active Record Scope

Returns:

See Also:



272
273
274
275
276
# File 'app/models/content_embedding.rb', line 272

scope :active_reviews, -> {
  where(embeddable_type: 'ReviewsIo')
    .joins('INNER JOIN reviews_io ON reviews_io.id = content_embeddings.embeddable_id')
    .where(reviews_io: { status: 'active' })
}

.active_videosActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are active videos. Active Record Scope

Returns:

See Also:



236
237
238
239
240
# File 'app/models/content_embedding.rb', line 236

scope :active_videos, -> {
  where(embeddable_type: 'Video')
    .joins('INNER JOIN digital_assets ON digital_assets.id = content_embeddings.embeddable_id')
    .where(digital_assets: { inactive: false })
}

.by_dimensionsActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are by dimensions. Active Record Scope

Returns:

See Also:



203
# File 'app/models/content_embedding.rb', line 203

scope :by_dimensions, ->(dims) { where(embedding_dimensions: dims) }

.by_modelActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are by model. Active Record Scope

Returns:

See Also:



197
# File 'app/models/content_embedding.rb', line 197

scope :by_model, ->(model) { where(embedding_model: model) }

.by_typeActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are by type. Active Record Scope

Returns:

See Also:



167
168
169
170
# File 'app/models/content_embedding.rb', line 167

scope :by_type, ->(types) {
  types = Array(types).flatten.compact
  types.present? ? where(embeddable_type: types) : all
}

.calculate_rrf_scores(vector_results, keyword_results, k) ⇒ Hash{Integer => Float}

Calculate Reciprocal Rank Fusion scores
RRF Score = sum of 1/(k + rank) for each result list

Parameters:

  • vector_results (Array<ContentEmbedding>)

    Results from vector search

  • keyword_results (Array<ContentEmbedding>)

    Results from keyword search

  • k (Integer)

    RRF constant (typically 60)

Returns:

  • (Hash{Integer => Float})

    Map of embedding ID to RRF score



372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# File 'app/models/content_embedding.rb', line 372

def self.calculate_rrf_scores(vector_results, keyword_results, k)
  scores = Hash.new(0.0)

  # Add vector search contribution (rank is 0-indexed)
  vector_results.each_with_index do |result, rank|
    scores[result.id] += 1.0 / (k + rank + 1)
  end

  # Add keyword search contribution
  keyword_results.each_with_index do |result, rank|
    scores[result.id] += 1.0 / (k + rank + 1)
  end

  scores
end

.faqs_onlyActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are faqs only. Active Record Scope

Returns:

See Also:



219
220
221
222
223
# File 'app/models/content_embedding.rb', line 219

scope :faqs_only, -> {
  where(embeddable_type: 'Article')
    .joins('INNER JOIN articles ON articles.id = content_embeddings.embeddable_id')
    .where(articles: { type: 'ArticleFaq' })
}

.find_similar(record, limit: 5, same_type_only: false) ⇒ ActiveRecord::Relation

Find content similar to a given record

Examples:

Find similar showcases

ContentEmbedding.find_similar(showcase, same_type_only: true)

Parameters:

  • record (ApplicationRecord)

    The record to find similar content for

  • limit (Integer) (defaults to: 5)

    Maximum number of results (default: 5)

  • same_type_only (Boolean) (defaults to: false)

    Only return same type of content (default: false)

Returns:

  • (ActiveRecord::Relation)

    Similar embeddings ordered by similarity



321
322
323
324
325
326
327
328
329
# File 'app/models/content_embedding.rb', line 321

def self.find_similar(record, limit: 5, same_type_only: false)
  embedding = record.content_embeddings.with_unified_embedding.first
  return none unless embedding&.unified_embedding

  scope = where.not(embeddable_type: embedding.embeddable_type, embeddable_id: record.id) # rubocop:disable Rails/WhereNotWithMultipleConditions
  scope = scope.by_type(embedding.embeddable_type) if same_type_only
  scope.by_model(UNIFIED_MODELS).with_unified_embedding
       .nearest_neighbors(:unified_embedding, embedding.unified_embedding, distance: :cosine).limit(limit)
end

.for_localeActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are for locale. Active Record Scope

Returns:

See Also:



179
180
181
182
183
184
185
186
187
188
189
# File 'app/models/content_embedding.rb', line 179

scope :for_locale, ->(locale) {
  locale_str = locale.to_s
  if locale_str.include?('-')
    # Exact match for regional locales (en-US, en-CA, fr-CA)
    where(locale: locale_str)
  else
    # Base locale matches itself and all regional variants
    # Use Arel (qualified by table name) to avoid ambiguity when joined
    where(ContentEmbedding[:locale].eq(locale_str).or(ContentEmbedding[:locale].matches("#{locale_str}-%", nil, true)))
  end
}

.gemini_embeddingActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are gemini embedding. Active Record Scope

Returns:

See Also:



206
# File 'app/models/content_embedding.rb', line 206

scope :gemini_embedding, -> { by_model(UNIFIED_MODELS) }

.generate_query_embedding(query, model: UNIFIED_MODEL) ⇒ Array<Float>?

Returns Gemini query embedding (model arg accepted for
back-compat; always embedded via Gemini).

Returns:

  • (Array<Float>, nil)

    Gemini query embedding (model arg accepted for
    back-compat; always embedded via Gemini)



307
308
309
# File 'app/models/content_embedding.rb', line 307

def self.generate_query_embedding(query, model: UNIFIED_MODEL)
  generate_unified_query_embedding(query, model: model, dimensions: UNIFIED_DIMENSIONS)
end

.hybrid_search(query) ⇒ Array<ContentEmbedding>

Returns vector + keyword RRF over the unified space.

Returns:



301
302
303
# File 'app/models/content_embedding.rb', line 301

def self.hybrid_search(query, **)
  unified_hybrid_search(query, **)
end

.images_onlyActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are images only. Active Record Scope

Returns:

See Also:



401
# File 'app/models/content_embedding.rb', line 401

scope :images_only, -> { where(embeddable_type: 'Image') }

.jina_v4ActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are jina v4. Active Record Scope

Returns:

See Also:



209
# File 'app/models/content_embedding.rb', line 209

scope :jina_v4, -> { by_model(LEGACY_UNIFIED_MODEL) }

.keyword_search_for_rrf(query, limit, types, locale, published_only, exclude_sensitive: true, base_scope: all) ⇒ Object

Keyword search component for RRF (used by unified_hybrid_search).
Uses ILIKE across multiple content types with proper joins.



334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# File 'app/models/content_embedding.rb', line 334

def self.keyword_search_for_rrf(query, limit, types, locale, published_only, exclude_sensitive: true, base_scope: all)
  scope = base_scope
  scope = scope.mcp_safe if exclude_sensitive
  scope = scope.by_type(types) if types.present?
  scope = scope.for_locale(locale)
  scope = scope.published_only if published_only

  # Build comprehensive keyword search across all content types
  scope
    .joins("LEFT JOIN articles ON embeddable_type IN ('Article', 'Post') AND articles.id = embeddable_id")
    .joins("LEFT JOIN showcases ON embeddable_type = 'Showcase' AND showcases.id = embeddable_id")
    .joins("LEFT JOIN digital_assets ON embeddable_type IN ('Video', 'Image') AND digital_assets.id = embeddable_id")
    .joins("LEFT JOIN items ON embeddable_type = 'Item' AND items.id = embeddable_id")
    .joins("LEFT JOIN product_lines ON embeddable_type = 'ProductLine' AND product_lines.id = embeddable_id")
    .joins("LEFT JOIN site_maps ON embeddable_type = 'SiteMap' AND site_maps.id = embeddable_id")
    .where(
      <<~SQL.squish,
        articles.subject ILIKE :q OR articles.description ILIKE :q OR
        showcases.name ILIKE :q OR showcases.description ILIKE :q OR
        digital_assets.title ILIKE :q OR digital_assets.meta_description ILIKE :q OR
        items.name ILIKE :q OR items.sku ILIKE :q OR items.search_text ILIKE :q OR
        product_lines.name ILIKE :q OR product_lines.tag_line ILIKE :q OR
        site_maps.extracted_title ILIKE :q OR site_maps.extracted_content ILIKE :q
      SQL
      q: "%#{query}%"
    )
    .limit(limit)
    .to_a
end

.mcp_safeActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are mcp safe. Active Record Scope

Returns:

See Also:



164
# File 'app/models/content_embedding.rb', line 164

scope :mcp_safe, -> { where.not(embeddable_type: SENSITIVE_TYPES) }

.posts_onlyActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are posts only. Active Record Scope

Returns:

See Also:



226
# File 'app/models/content_embedding.rb', line 226

scope :posts_only, -> { where(embeddable_type: 'Post') }

.primary_contentActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are primary content. Active Record Scope

Returns:

See Also:



171
# File 'app/models/content_embedding.rb', line 171

scope :primary_content, -> { where(content_type: 'primary') }

.published_articlesActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are published articles. Active Record Scope

Returns:

See Also:



213
214
215
216
217
# File 'app/models/content_embedding.rb', line 213

scope :published_articles, -> {
  where(embeddable_type: %w[Article Post])
    .joins('INNER JOIN articles ON articles.id = content_embeddings.embeddable_id')
    .where(articles: { state: 'published' })
}

.published_onlyActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are published only. Active Record Scope

Returns:

See Also:



250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# File 'app/models/content_embedding.rb', line 250

scope :published_only, -> {
  where(<<~SQL.squish)
    (embeddable_type IN ('Article', 'Post') AND EXISTS (
      SELECT 1 FROM articles WHERE articles.id = content_embeddings.embeddable_id AND articles.state = 'published'
    ))
    OR (embeddable_type = 'Showcase' AND EXISTS (
      SELECT 1 FROM showcases WHERE showcases.id = content_embeddings.embeddable_id AND showcases.state = 'published'
    ))
    OR (embeddable_type IN ('Video', 'Image') AND EXISTS (
      SELECT 1 FROM digital_assets WHERE digital_assets.id = content_embeddings.embeddable_id AND digital_assets.inactive = false
    ))
    OR (embeddable_type = 'ReviewsIo' AND EXISTS (
      SELECT 1 FROM reviews_io WHERE reviews_io.id = content_embeddings.embeddable_id AND reviews_io.status = 'active'
    ))
    OR (embeddable_type = 'Item' AND EXISTS (
      SELECT 1 FROM items WHERE items.id = content_embeddings.embeddable_id AND items.is_discontinued = false
    ))
    OR embeddable_type NOT IN ('Article', 'Post', 'Showcase', 'Video', 'Image', 'ReviewsIo', 'Item')
  SQL
}

.published_showcasesActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are published showcases. Active Record Scope

Returns:

See Also:



229
230
231
232
233
# File 'app/models/content_embedding.rb', line 229

scope :published_showcases, -> {
  where(embeddable_type: 'Showcase')
    .joins('INNER JOIN showcases ON showcases.id = content_embeddings.embeddable_id')
    .where(showcases: { state: 'published' })
}

.recent_firstActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are recent first. Active Record Scope

Returns:

See Also:



190
# File 'app/models/content_embedding.rb', line 190

scope :recent_first, -> { order(created_at: :desc) }

.semantic_search(query) ⇒ ActiveRecord::Relation

Returns vector search over the unified Gemini space.

Returns:

  • (ActiveRecord::Relation)

    vector search over the unified Gemini space



296
297
298
# File 'app/models/content_embedding.rb', line 296

def self.semantic_search(query, **)
  unified_search(query, **)
end

.unified_contentActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are unified content. Active Record Scope

Returns:

See Also:



173
# File 'app/models/content_embedding.rb', line 173

scope :unified_content, -> { where(content_type: 'unified') }

.with_unified_embeddingActiveRecord::Relation<ContentEmbedding>

A relation of ContentEmbeddings that are with unified embedding. Active Record Scope

Returns:

See Also:



200
# File 'app/models/content_embedding.rb', line 200

scope :with_unified_embedding, -> { where.not(unified_embedding: nil) }

Instance Method Details

#embeddableEmbeddable

Returns:

  • (Embeddable)

See Also:



148
# File 'app/models/content_embedding.rb', line 148

belongs_to :embeddable, polymorphic: true

#similarity_scoreObject

Calculate similarity score (0-1, higher is more similar)



393
394
395
396
397
398
# File 'app/models/content_embedding.rb', line 393

def similarity_score
  return nil unless respond_to?(:neighbor_distance)

  # Cosine distance is 0-2, convert to similarity 0-1
  1.0 - (neighbor_distance / 2.0)
end