Class: Seo::HtmlLinkSanitizer

Inherits:
BaseService show all
Defined in:
app/services/seo/html_link_sanitizer.rb

Defined Under Namespace

Classes: Result

Instance Method Summary collapse

Methods inherited from BaseService

#initialize, #log_debug, #log_error, #log_info, #log_warning, #logger, #options, #tagged_logger

Constructor Details

This class inherits a constructor from BaseService

Instance Method Details

#process(html_fragment, relative: false) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'app/services/seo/html_link_sanitizer.rb', line 6

def process(html_fragment, relative: false)
  logger.tagged('Seo::HtmlLinkSanitizer') do
    errors = []
    links_modified = []
    html_doc = Nokogiri::HTML(html_fragment)
    links = html_doc.css('a')
    links.each do |link|
      href = link.attribute('href').to_s
      next unless href.present?

      uri = Addressable::URI.parse(href) rescue nil
      unless uri
        logger.error "Cannot parse #{href}, skipping"
        next
      end

      ld = Seo::LinkSanitizer.new
      ls_result = ld.process(href, relative: relative)
      new_url = ls_result.out
      if new_url.present? && new_url != href
        links_modified << { in: href, out: new_url }
        link.attributes["href"].value = new_url
        logger.info "New URL: #{new_url}"
      end
      if ls_result.status == :unparsable
        errors << { href: href, error: ls_result.error }
        logger.error "Error processing #{href}: #{ls_result.error}"
      end

      # Add noopener if target is _blank
      # see https://web.dev/external-anchors-use-rel-noopener
      if uri.host.present? &&
         uri.host != 'www.warmlyyours.com' &&
         link['target'] == '_blank' &&
         link.attributes['rel'].blank?

        link.set_attribute('rel','noopener')
      end

    end
    links_modified.uniq!(&:inspect)
    links_modified.sort_by!(&:inspect)
    Result.new(html_out: html_doc.at('body').inner_html,
               links_modified: links_modified,
               errors: errors )
  end
end