Module: Heatwave::Normalizers

Defined in:
app/lib/heatwave/normalizers.rb

Overview

Library code: normalizers.

Constant Summary collapse

DEFAULT_CHAIN =

Default chain.

%i[strip blank].freeze
TRUE_VALUES =

True values.

[true, 1, '1', 't', 'T', 'true', 'TRUE', 'on', 'ON'].freeze
NULL_STRINGS =

Null strings.

['', '{}', '[]', 'null', 'nil'].freeze
ASCII_PUNCTUATION_MAP =

Common Unicode punctuation that breaks ASCII-only protocols (e.g.
X12 EDI N3 Address Information segments). Mapped to their plain
ASCII equivalents so a downstream I18n.transliterate call doesn't
have to fall back to ?.

{
  "" => "'",  "" => "'",  "" => "'",  "" => "'",
  "" => '"',  "" => '"',  "" => '"',  "" => '"',
  "" => "'",  "" => '"',
  "" => '-',  "" => '-',  "" => '-',  "" => '-',
  "" => '-',  "" => '-',  "" => '-',
  "" => '...',
  " " => ' ',  "" => ' ',  "" => '',   "" => ''
}.freeze
ASCII_PUNCTUATION_REGEXP =
Regexp.union(ASCII_PUNCTUATION_MAP.keys).freeze
/[™®©℠℗ⒸⓇ]/

Class Method Summary collapse

Class Method Details

.ascii_safe(value) ⇒ Object

Map common Unicode punctuation (smart quotes, primes, en/em dashes,
ellipsis, NBSP, ZWSP, BOM) to ASCII, then I18n.transliterate to
strip accents. Use for outbound payloads that must be ASCII-only,
e.g. X12 EDI N3 Address Information segments which reject "Anne’s".



126
127
128
129
130
# File 'app/lib/heatwave/normalizers.rb', line 126

def ascii_safe(value)
  return value unless value.is_a?(String) && value.present?

  I18n.transliterate(value.gsub(ASCII_PUNCTUATION_REGEXP, ASCII_PUNCTUATION_MAP))
end

.blank(value) ⇒ Object

Returns nil when the value is a String composed entirely of
whitespace (or empty); leaves non-strings alone.



86
87
88
89
90
# File 'app/lib/heatwave/normalizers.rb', line 86

def blank(value)
  return value unless value.is_a?(String)

  value.match?(/\A[[:space:]]*\z/) ? nil : value
end

.boolean(value) ⇒ Object



153
154
155
156
157
# File 'app/lib/heatwave/normalizers.rb', line 153

def boolean(value)
  return nil if value.is_a?(String) && value.blank?

  TRUE_VALUES.include?(value)
end

.capitalize(value) ⇒ Object



108
109
110
# File 'app/lib/heatwave/normalizers.rb', line 108

def capitalize(value)
  value.is_a?(String) ? value.capitalize : value
end

.chain(value, *names, **options) ⇒ Object

Apply a sequence of named normalizers. options is a hash keyed
by normalizer name → options hash for that step.

chain(' Foo ', :strip, :blank, :downcase) # → 'foo'
chain(' 12.5 ', :strip, :numeric, numeric: { precision: 2 })

Arrays are normalized per-element (mirroring the legacy normalizr
gem's behaviour for array-typed attributes); resulting nil entries
are dropped so callers don't accidentally write nils into PG array
columns.



68
69
70
71
72
73
74
75
76
# File 'app/lib/heatwave/normalizers.rb', line 68

def chain(value, *names, **options)
  return value.map { |item| chain(item, *names, **options) }.compact if value.is_a?(Array)

  names.inject(value) do |acc, name|
    method_name = name.to_sym
    opts = options[method_name]
    opts.is_a?(Hash) ? public_send(method_name, acc, **opts) : public_send(method_name, acc)
  end
end

.control_chars(value) ⇒ Object



112
113
114
# File 'app/lib/heatwave/normalizers.rb', line 112

def control_chars(value)
  value.is_a?(String) ? value.gsub(/[[:cntrl:]&&[^[:space:]]]/, '') : value
end

.currency(value) ⇒ Object



172
173
174
# File 'app/lib/heatwave/normalizers.rb', line 172

def currency(value)
  value.is_a?(String) ? value.gsub(/[^0-9.]+/, '') : value
end

.date(value) ⇒ Object



208
209
210
211
212
213
# File 'app/lib/heatwave/normalizers.rb', line 208

def date(value)
  case value
  when String then Date.parse(value)
  else value.to_date if value.respond_to?(:to_date)
  end
end

.deep_ascii_safe(value) ⇒ Object

Recursively apply ascii_safe to every String leaf in a Hash/Array
structure. Hash keys are left alone.



134
135
136
137
138
139
140
141
# File 'app/lib/heatwave/normalizers.rb', line 134

def deep_ascii_safe(value)
  case value
  when Hash   then value.transform_values { |v| deep_ascii_safe(v) }
  when Array  then value.map { |v| deep_ascii_safe(v) }
  when String then ascii_safe(value)
  else value
  end
end

.default(value) ⇒ Object

Apply the configured default chain ([:strip, :blank]).



54
55
56
# File 'app/lib/heatwave/normalizers.rb', line 54

def default(value)
  chain(value, *DEFAULT_CHAIN)
end

.downcase(value) ⇒ Object



92
93
94
# File 'app/lib/heatwave/normalizers.rb', line 92

def downcase(value)
  value.is_a?(String) && value.present? ? value.downcase : value
end

.email(value) ⇒ Object



215
216
217
218
219
# File 'app/lib/heatwave/normalizers.rb', line 215

def email(value)
  return unless value.is_a?(String)

  value.scan(Truemail::RegexConstant::REGEX_EMAIL_PATTERN)&.first&.first&.strip&.presence
end

.hash_compactor(value) ⇒ Object



240
241
242
243
244
# File 'app/lib/heatwave/normalizers.rb', line 240

def hash_compactor(value)
  return {} unless value.is_a?(Hash)

  compact_hash(value) || {}
end

.html_scrubber(value) ⇒ Object



182
183
184
185
186
187
188
189
# File 'app/lib/heatwave/normalizers.rb', line 182

def html_scrubber(value)
  return unless value.is_a?(String) && value.present?

  helper_class = Class.new { include Models::Utilities::Html }
  cleaned = helper_class.new.clean_and_compress_html(value)
  curled = Heatwave::TypographicQuotes.curl_html(cleaned)
  curled&.strip&.presence
end

.integer(value) ⇒ Object



201
202
203
204
205
206
# File 'app/lib/heatwave/normalizers.rb', line 201

def integer(value)
  case value
  when String then value.scan(/[\d+.]+/).join.to_i
  when Numeric, Float then value.to_i
  end
end

.json_to_hash(value) ⇒ Object



441
442
443
444
445
446
447
448
449
450
451
# File 'app/lib/heatwave/normalizers.rb', line 441

def json_to_hash(value)
  return nil if value.blank?
  return value if value.is_a?(Array) || value.is_a?(Hash)
  return nil unless value.is_a?(String)

  begin
    JSON.parse(value)
  rescue JSON::ParserError
    nil
  end
end

.new_lines(value) ⇒ Object



282
283
284
# File 'app/lib/heatwave/normalizers.rb', line 282

def new_lines(value)
  value.presence&.gsub("\r\n", "\n")
end

.no_undefined(value) ⇒ Object



176
177
178
179
180
# File 'app/lib/heatwave/normalizers.rb', line 176

def no_undefined(value)
  return nil if value.is_a?(String) && %w[undefined null nil].include?(value)

  value
end

.nullify_zero(value) ⇒ Object



246
247
248
# File 'app/lib/heatwave/normalizers.rb', line 246

def nullify_zero(value)
  value.to_i.zero? ? nil : value
end

.numeric(value, precision: 0, strip_zeros: true, ceil: false) ⇒ Object



191
192
193
194
195
196
197
198
199
# File 'app/lib/heatwave/normalizers.rb', line 191

def numeric(value, precision: 0, strip_zeros: true, ceil: false)
  return if value.blank?

  helper = Class.new { include ActionView::Helpers::NumberHelper }.new
  digit_string = value.to_s.scan(/[\d+.]+/).join
  big_decimal = BigDecimal(digit_string)
  big_decimal = big_decimal.ceil(precision) if ceil
  helper.number_with_precision(big_decimal, strip_insignificant_zeros: strip_zeros, precision: precision)
end

.parameterize(value) ⇒ Object



104
105
106
# File 'app/lib/heatwave/normalizers.rb', line 104

def parameterize(value)
  value.is_a?(String) ? value.parameterize : value
end

.phone(value, country_iso: nil) ⇒ Object



221
222
223
224
225
226
# File 'app/lib/heatwave/normalizers.rb', line 221

def phone(value, country_iso: nil)
  return unless value.is_a?(String) && value.present?

  cleaned = value.gsub(/\P{ASCII}/u, '').strip.downcase
  PhoneNumber.parse_and_format(cleaned, country_iso: country_iso)
end

.resolve_shipping_carrier(carrier, tracking_number: nil) ⇒ String, Object

Chains the name-based shipping_carrier normalizer with the
tracking-number-format fallback shipping_carrier_from_tracking_number.
Use this anywhere you need a best-effort canonical carrier name
from a (possibly garbage) shipments.carrier value plus an
optional tracking number. Mirrors the resolution rule in
WyShipping.class_for_carrier.

Examples:

Manual entry where carrier is a placeholder but the

tracking number is recoverable
resolve_shipping_carrier('Override', tracking_number: '1Z999AA10123456784')
# => 'UPS'

Parameters:

  • carrier (String, nil)

    raw shipments.carrier value

  • tracking_number (String, nil) (defaults to: nil)

    optional fallback used only
    when name-based normalization leaves the input unchanged

Returns:

  • (String, Object)

    canonical carrier name or the original
    input when neither rule matches



433
434
435
436
437
438
439
# File 'app/lib/heatwave/normalizers.rb', line 433

def resolve_shipping_carrier(carrier, tracking_number: nil)
  normalized = shipping_carrier(carrier)
  return normalized unless tracking_number.present? && normalized == carrier

  sniffed = shipping_carrier_from_tracking_number(tracking_number)
  sniffed || normalized
end

.shipping_carrier(value) ⇒ String, Object

Map a free-form shipments.carrier value to a canonical
WyShipping.class_for_carrier key (matching the Shipping::<Name>
class name). Returns the input unchanged when no rule matches, so
unfamiliar carrier strings keep surfacing NameError instead of
being silently mis-mapped.

Covers UPS / FedEx / Canpar / Purolator / USPS / Canada Post
service-level and case-variant strings observed in production
(e.g. "Ups Standard", "FedEx Ground®", "Canpar Express",
"fedex_international_ground", "PUROLATOR_GROUND",
"Canada Post Expedited Parcel"). Carriers that already
constantize cleanly (FedEx, UPS, USPS, Canpar, Canadapost,
Purolator, AmazonSeller, WalmartSeller, SpeedeeDelivery,
Freightquote, RlCarriers, DPD, GLS, DhlExpress) are returned
untouched.

Amazon Logistics variants — "Amzl", "AMZL", "AMZL_CA_PREMIUM",
"Amazon Ground", "Amazon Shipping Ground" — canonicalize to
"Amazon Shipping", which is connected on the ShipEngine account
as carrier_code amazon_shipping_us (tracking-only,
carrier_id se-6604086). Rates/labels still go through
Shipping::AmazonSeller (SP-API direct), but tracking webhook
subscription is handled by SE just like every other parcel
carrier.

Bare "Amazon" is intentionally left alone — too ambiguous, could
be the parent corp / a marketplace name / a service. Same for
"AmazonSeller" — that's the marketplace flow where the
underlying carrier varies per shipment (USPS, FedEx, UPS, or
Amazon Logistics depending on Buy Shipping's choice). The
tracking-number sniffer chain in resolve_shipping_carrier
recovers the right carrier from the number's format.

Also leaves placeholder/free-text values alone ("Standard",
"Override", "Shipping override, please confirm", "Warehouse
Pickup", "AMJM").

Parameters:

  • value (String, nil)

    raw shipments.carrier string

Returns:

  • (String, Object)

    canonical carrier name, or the input
    value untouched



326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'app/lib/heatwave/normalizers.rb', line 326

def shipping_carrier(value)
  return value unless value.is_a?(String)

  stripped = value.strip
  return value if stripped.empty?

  cleaned = stripped.gsub(/[®™]/, '').sub(/\*+\z/, '').squeeze(' ').strip.downcase

  case cleaned
  when 'legacyfedex'                                              then 'LegacyFedEx'
  when /\Afed[\s_-]*ex[\s_-]*freight\z/                           then 'FedExFreight'
  when /\Afed[\s_-]?ex/, 'fdx'                                    then 'FedEx'
  when 'ups', /\Aups[\s_-]/                                       then 'UPS'
  when /\Acanpar\b/                                               then 'Canpar'
  when /\Apurolator/                                              then 'Purolator'
  when /\Ausps/, 'united states postal service'                   then 'USPS'
  when /\Acanadapost/, /\Acanada\s*post/                          then 'Canadapost'
  when 'dhl', /\Adhl[\s_-]*express\b/, /\Adhl[\s_-]*express\s+(international|intl)/ then 'DhlExpress'
  when 'amzl', /\Aamzl[\s_-]/, /\Aamazon[\s_-]*ground\z/,
       /\Aamazon[\s_-]+shipping\b/                               then 'Amazon Shipping'
  else value
  end
end

.shipping_carrier_from_tracking_number(value) ⇒ String?

Infer the canonical carrier name from a tracking number's format
(and checksum, when the carrier defines one). Defers to the
tracking_number gem, which encodes the full barcode-spec
families for every carrier we care about — UPS (1Z + mod-10),
FedEx (Express 12-digit, Ground 15-digit, SmartPost/Ground
Economy 20/22-digit, 96-prefix), USPS (13-char international
AA######US, IMpb 22-digit, Express Mail, Certified, etc.),
Canada Post (16-digit domestic, AA######CA international), DHL
Express (10/JJD-prefix), Canpar (D-prefix), Purolator (PIN +
variants), Spee-Dee, R+L Carriers. Mod-10 checksum validation
makes false positives rare on the families that have a check
digit (UPS, USPS IMpb, FedEx Ground, Canada Post).

Used as a fallback by WyShipping.class_for_carrier and
resolve_shipping_carrier when the name-based
shipping_carrier normalizer can't recognize the carrier
string but a tracking number is present — covers placeholder
cases like "Override" / "Shipping override, please confirm" /
"Standard" when the carrier is discoverable from the number.

Two-pass:

  1. TrackingNumber.new(s).courier_code (gem) — preferred,
    validates format + checksum.
  2. Regex fallback for the two patterns we'd accept even with
    a bad checksum (typo'd UPS / USPS international where
    registering is still better than skipping).

Returns nil only when neither the gem nor the fallback regex
matches.

Parameters:

  • value (String, nil)

    tracking number

Returns:

  • (String, nil)

    canonical carrier name or nil when no
    pattern matches



383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
# File 'app/lib/heatwave/normalizers.rb', line 383

def shipping_carrier_from_tracking_number(value)
  return nil unless value.is_a?(String)

  s = value.strip.gsub(/\s+/, '').upcase
  return nil if s.empty?

  require 'tracking_number'
  tn = TrackingNumber.new(s)
  # The gem sets courier_code from format match (independent of
  # checksum), so an invalid-checksum tracking number still
  # surfaces its likely courier. We accept that — SE re-validates
  # on registration and the carrier-side tracking lookup will
  # either succeed or fail on its own merits; skipping
  # registration here just because the checksum is off would
  # silently lose coverage on hand-keyed numbers.
  if tn.courier_code && (internal = PARCEL_TRACKING_NUMBER_GEM_TO_INTERNAL[tn.courier_code])
    return internal
  end

  # Regex fallback for carrier families the gem doesn't ship a
  # spec for (or where we want to accept format-only matches the
  # gem rejects on checksum). Kept narrow on purpose — ambiguous
  # numeric-only formats are intentionally not added so we don't
  # mis-route packages.
  case s
  when /\A1Z[0-9A-Z]{16}\z/      then 'UPS'           # UPS even with bad checksum (typo'd hand-key)
  when /\A[A-Z]{2}\d{9}US\z/     then 'USPS'          # USPS international (AA######US)
  when /\AD[A-Z0-9]{18,21}\z/    then 'Canpar'        # Canpar Express (D-prefix, 19-22 char) — gem has no canpar spec
  when /\ASP\d{18}\z/            then 'SpeedeeDelivery' # Spee-Dee: SP + 18 digits (20 total) — gem has no speedee spec
  when /\A520\d{9}\z/            then 'Purolator'     # Purolator domestic CA: 12-digit numeric starting with 520 — gem has no purolator spec
  end
end

.squish(value) ⇒ Object



100
101
102
# File 'app/lib/heatwave/normalizers.rb', line 100

def squish(value)
  value.is_a?(String) ? value.squish : value
end

.strip(value) ⇒ Object

── Built-ins (mirror normalizr core) ──────────────────────────



80
81
82
# File 'app/lib/heatwave/normalizers.rb', line 80

def strip(value)
  value.is_a?(String) ? value.strip : value
end

Strip legal / IP glyphs (™ ® © ℠ ℗ Ⓒ Ⓡ) and collapse any double space the
removal leaves. Unlike ascii_safe/transliterate, this touches ONLY
those marks — measurement symbols (″ ′ ° ½ × µ …) and accents are preserved.
Used for the OpenAI Ads product feed title/description.



147
148
149
150
151
# File 'app/lib/heatwave/normalizers.rb', line 147

def strip_legal_marks(value)
  return value unless value.is_a?(String) && value.present?

  value.gsub(LEGAL_MARK_REGEXP, '').gsub(/[ \t]{2,}/, ' ').strip
end

.tagify(value) ⇒ Object



250
251
252
# File 'app/lib/heatwave/normalizers.rb', line 250

def tagify(value)
  value.to_s.squish.parameterize.dasherize.downcase.presence if value.is_a?(String)
end

.titleize(value) ⇒ Object

── Heatwave-specific (from the old initializer) ───────────────



161
162
163
# File 'app/lib/heatwave/normalizers.rb', line 161

def titleize(value)
  value.to_s.squish.titleize.presence if value.is_a?(String)
end

.truncate(value, length: 30, omission: '...') ⇒ Object



165
166
167
168
169
170
# File 'app/lib/heatwave/normalizers.rb', line 165

def truncate(value, length: 30, omission: '...')
  return value unless value.is_a?(String)

  cutoff = length - omission.length
  value.length > length ? value[0...cutoff] + omission : value
end

.unit_symbolizer(value) ⇒ Object

Replaces ' and " with prime symbols when adjacent to digits.
4' → 4′ | 6" → 6″



256
257
258
259
260
# File 'app/lib/heatwave/normalizers.rb', line 256

def unit_symbolizer(value)
  return value unless value.is_a?(String) && /\d+/.match?(value)

  value.gsub(/(\d+)\s?'/, "\\1′").gsub(/(\d+)\s?"/, "\\1″")
end

.unitizer(value) ⇒ Object

Best-effort length normalizer: tolerates fractions, primes, and
the plain feet/inches notation customers paste in.



264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'app/lib/heatwave/normalizers.rb', line 264

def unitizer(value)
  return unless value.is_a?(String) && value.present?

  value = value.gsub("''", '"')
  ['.5', '.25', '.125', '.0625', '1/2', '1/4', '1/8', '1/16', '-', '+', ' ',
   '1/2', '1/4', '1/8', '1/16'].each { |s| value.gsub!(s, '') }
  value = "#{value} ft" unless value.match(/^\d+(\.\d+)?$/).nil?
  if value.index("'") && value.index('"').nil? && value.index('ft').nil?
    value = "#{value}\"" unless value.last == "'"
    value = "#{value} 0\"" if value.last == "'"
  end
  begin
    (RubyUnits::Unit.new(value) >> 'ft').to_s('%0.2f')
  rescue StandardError
    nil
  end
end

.upcase(value) ⇒ Object



96
97
98
# File 'app/lib/heatwave/normalizers.rb', line 96

def upcase(value)
  value.is_a?(String) && value.present? ? value.upcase : value
end

.whitespace(value) ⇒ Object



116
117
118
119
120
# File 'app/lib/heatwave/normalizers.rb', line 116

def whitespace(value)
  return value unless value.is_a?(String)

  value.gsub(/[^\S\n]+/, ' ').gsub(/\s?\n\s?/, "\n").strip
end

.zip_or_postal_code(value, country_iso: nil) ⇒ Object



228
229
230
231
232
233
234
235
236
237
238
# File 'app/lib/heatwave/normalizers.rb', line 228

def zip_or_postal_code(value, country_iso: nil)
  return if value.blank?

  if country_iso
    ValidatesZipcode.format(value, country_iso)
  elsif ValidatesZipcode.valid?(value, 'CA')
    ValidatesZipcode.format(value, 'CA')
  elsif ValidatesZipcode.valid?(value, 'US')
    ValidatesZipcode.format(value, 'US')
  end
end