Class: LinkCheck

Inherits:
ApplicationRecord show all
Defined in:
app/models/link_check.rb

Overview

== Schema Information

Table name: link_checks
Database name: primary

id :integer not null, primary key
href :string
redirection :string
status :string not null
created_at :datetime not null
updated_at :datetime not null

Indexes

index_link_checks_on_href (href) UNIQUE
index_link_checks_on_status (status)

Constant Summary

Constants included from Schedulable

Schedulable::SIMPLE_FORM_OPTIONS

Has and belongs to many collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from ApplicationRecord

ransackable_associations, ransackable_attributes, ransackable_scopes, ransortable_attributes, #to_relation

Methods included from Schedulable

config

Methods included from Models::AfterCommittable

#after_commit

Methods included from Models::EventPublishable

#publish_event

Class Method Details

.blacklisted?(href) ⇒ Boolean

Here are urls we are not going to check

Returns:

  • (Boolean)


47
48
49
50
51
52
# File 'app/models/link_check.rb', line 47

def self.blacklisted?(href)
  href.starts_with?('https://warmlyyours.basecamphq.com') ||
    href.starts_with?('https://www.pinterest.com') ||
    href.starts_with?('https://crm.warmlyyours.com') ||
    href.starts_with?('https://ik.warmlyyours.com')
end

.process(article_scope = nil) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'app/models/link_check.rb', line 22

def self.process(article_scope = nil)
  la = Seo::LinkAnalyzer.new
  all_links = []
  article_scope ||= Article.where(type: %w[Post ArticleFaq ArticleTraining]).where.not(solution: nil).published
  LinkCheck.joins(:articles).merge(article_scope).where.not(status: 'whitelisted').delete_all
  LinkCheck.transaction do # Speeds up inserts
    article_scope.find_each do |article|
      discovered_links = la.extract_links(article.solution)
      discovered_links.each do |href|
        next if blacklisted?(href)

        lr = where(href: href).first_or_create(status: 'pending')
        next if lr.status == 'whitelisted'

        lr.articles << article
      end
      all_links += discovered_links
    end
  end
  # Now we will enqueue our links check, all pending
  ArticleLinkCheckerWorker.perform_async
  all_links = all_links.uniq.compact
end

Analyzes an array of links and reports back the http status code returned
link records are created or updated



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'app/models/link_check.rb', line 56

def self.process_links(all_links, link_analyzer: nil)
  link_records_created = []
  link_analyzer ||= Seo::LinkAnalyzer.new
  results = link_analyzer.check_links(all_links)
  results.each do |href, check_status|
    next unless (lr = where(href: href).first)

    lr.status = check_status[:result]
    if lr.status.to_s == '200'
      lr.destroy # We don't need to keep the good stuff
    else
      lr.redirection = check_status[:location]
      lr.save
    end
    link_records_created << lr
  end
  link_records_created
end

Instance Method Details

#articlesActiveRecord::Relation<Article>

Returns:

  • (ActiveRecord::Relation<Article>)

See Also:



20
# File 'app/models/link_check.rb', line 20

has_and_belongs_to_many :articles, uniq: true

#checkObject



75
76
77
# File 'app/models/link_check.rb', line 75

def check
  self.class.process_links([href])
end

#replace_urlObject



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'app/models/link_check.rb', line 83

def replace_url
  return if redirection.blank?

  new_links = []
  articles.each do |article|
    html_doc = Nokogiri::HTML(article.solution)
    html_doc.css('a').each do |link|
      existing_href = link.attribute('href').to_s
      # test against original link or its locale token equivalent
      if href == existing_href || existing_href == href.gsub(%r{/[a-z]{2}-[A-Z]{2}}, '/{{locale}}')
        new_links << redirection
        link.attributes["href"].value = redirection
      end
    end
    html_doc.css('img').each do |img|
      existing_href = img.attribute('src').to_s
      # test against original link or its locale token equivalent
      img.attributes["src"].value = redirection if href == existing_href || existing_href == href.gsub(%r{/[a-z]{2}-[A-Z]{2}}, '/{{locale}}')
    end
    article.auto_sanitize_urls = true
    article.solution = html_doc.at('body').inner_html
    article.save
  end
  # Delete link entries replaced
  destroy
  # Rerun check for our new links
  ArticleLinkCheckerWorker.perform_async(new_links)
end

Converts the link to a plain text representation, useful for 404 replacement when no target found



113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'app/models/link_check.rb', line 113

def unlink_url
  articles.each do |article|
    html_doc = Nokogiri::HTML(article.solution)
    html_doc.css('a').each do |link|
      existing_href = link.attribute('href').to_s
      # test against original link or its locale token equivalent
      link.replace(link.content) if href == existing_href || existing_href == href.gsub(%r{/[a-z]{2}-[A-Z]{2}/}, '/{{locale}}/')
    end
    article.solution = html_doc.at('body').inner_html
    article.save
  end
  # Delete self
  destroy
end

#whitelistObject



79
80
81
# File 'app/models/link_check.rb', line 79

def whitelist
  update_column(:status, 'whitelisted')
end