Class: Seo::DeparameterizeLinks

Inherits:
BaseService show all
Defined in:
app/services/seo/deparameterize_links.rb

Defined Under Namespace

Classes: Result

Instance Method Summary collapse

Methods inherited from BaseService

#initialize, #log_debug, #log_error, #log_info, #log_warning, #logger, #options, #tagged_logger

Constructor Details

This class inherits a constructor from BaseService

Instance Method Details

#process(html_fragment) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'app/services/seo/deparameterize_links.rb', line 6

def process(html_fragment)
  logger.tagged('Seo::DeparameterizeLinks') do
    links_modified = []
    html_doc = Nokogiri::HTML(html_fragment)
    links = html_doc.css('a')
    links.each do |link|
      href = link.attribute('href').to_s
      next unless href.present?
      next if href.starts_with?('#')
      logger.info "Processing #{href}"
      uri = Addressable::URI.parse(href) rescue nil
      unless uri
        logger.error "Cannot parse #{href}, skipping"
        next
      end
      # Don't know your scheme? default to https
      uri.scheme ||= 'https'
      # Ignore anything non http, such as mailto
      next unless uri.scheme&.starts_with?('http')
      # Ignore media for now
      next if uri.path =~ /^\/media\//
      # If no param next
      next if uri.query.nil?
      # Ignore non warmlyyours link
      next if uri.host.present? && uri.host != 'www.warmlyyours.com'
      uri.query = nil
      new_url = uri.to_s
      if new_url != href
        links_modified << { in: href, out: new_url }
        link.attributes["href"].value = new_url
        logger.info "New URL: #{new_url}"
      end
    end
    links_modified.uniq!(&:inspect)
    links_modified.sort_by!(&:inspect)
    html = html_doc.at('body')&.inner_html
    logger.error "!!! HTML empty" unless html.present?
    Result.new(html_out: html,
               links_modified: links_modified )
  end
end