Class: LinkCheck

Inherits:
ApplicationRecord show all
Defined in:
app/models/link_check.rb

Overview

== Schema Information

Table name: link_checks
Database name: primary

id :integer not null, primary key
href :string
redirection :string
status :string not null
created_at :datetime not null
updated_at :datetime not null

Indexes

index_link_checks_on_href (href) UNIQUE
index_link_checks_on_status (status)

Has and belongs to many collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from ApplicationRecord

ransackable_associations, ransackable_attributes, ransackable_scopes, ransortable_attributes, #to_relation

Methods included from Models::EventPublishable

#publish_event

Class Method Details

.blacklisted?(href) ⇒ Boolean

Here are urls we are not going to check

Returns:

  • (Boolean)


45
46
47
48
49
50
# File 'app/models/link_check.rb', line 45

def self.blacklisted?(href)
  href.starts_with?('https://warmlyyours.basecamphq.com') ||
  href.starts_with?('https://www.pinterest.com') ||
  href.starts_with?('https://crm.warmlyyours.com') ||
  href.starts_with?('https://ik.warmlyyours.com')
end

.process(article_scope = nil) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'app/models/link_check.rb', line 22

def self.process(article_scope=nil)
  la = Seo::LinkAnalyzer.new
  all_links = []
  article_scope ||= Article.where(type: %w[Post ArticleFaq ArticleTraining]).where.not(solution: nil).published
  LinkCheck.joins(:articles).merge(article_scope).where.not(status: 'whitelisted').delete_all
  LinkCheck.transaction do #Speeds up inserts
    article_scope.find_each do |article|
      discovered_links = la.extract_links(article.solution)
      discovered_links.each do |href|
        next if blacklisted?(href)
        lr = where(href: href).first_or_create(status: 'pending')
        next if lr.status == 'whitelisted'
        lr.articles << article
      end
      all_links += discovered_links
    end
  end
  # Now we will enqueue our links check, all pending
  ArticleLinkCheckerWorker.perform_async
  all_links = all_links.uniq.compact
end

Analyzes an array of links and reports back the http status code returned
link records are created or updated



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'app/models/link_check.rb', line 54

def self.process_links(all_links, link_analyzer: nil)
  link_records_created = []
  link_analyzer ||= Seo::LinkAnalyzer.new
  results = link_analyzer.check_links(all_links)
  results.each do |href, check_status|
    next unless lr = where(href: href).first
    lr.status = check_status[:result]
    if lr.status.to_s == '200'
      lr.destroy # We don't need to keep the good stuff
    else
      lr.redirection = check_status[:location]
      lr.save
    end
    link_records_created << lr
  end
  link_records_created
end

Instance Method Details

#articlesActiveRecord::Relation<Article>

Returns:

  • (ActiveRecord::Relation<Article>)

See Also:



20
# File 'app/models/link_check.rb', line 20

has_and_belongs_to_many :articles, uniq: true

#checkObject



72
73
74
# File 'app/models/link_check.rb', line 72

def check
  self.class.process_links([href])
end

#replace_urlObject



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'app/models/link_check.rb', line 80

def replace_url
  return unless redirection.present?
  new_links = []
  articles.each do |article|
    html_doc = Nokogiri::HTML(article.solution)
    html_doc.css('a').each do |link|
      existing_href = link.attribute('href').to_s
      # test against original link or its locale token equivalent
      if href == existing_href || existing_href == href.gsub(/\/[a-z]{2}\-[A-Z]{2}/, '/{{locale}}')
        new_links << redirection
        link.attributes["href"].value = redirection
      end
    end
    html_doc.css('img').each do |img|
      existing_href = img.attribute('src').to_s
      # test against original link or its locale token equivalent
      if href == existing_href || existing_href == href.gsub(/\/[a-z]{2}\-[A-Z]{2}/, '/{{locale}}')
        img.attributes["src"].value = redirection
      end
    end
    article.auto_sanitize_urls = true
    article.solution = html_doc.at('body').inner_html
    article.save
  end
  # Delete link entries replaced
  destroy
  # Rerun check for our new links
  ArticleLinkCheckerWorker.perform_async(new_links)
end

Converts the link to a plain text representation, useful for 404 replacement when no target found



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'app/models/link_check.rb', line 111

def unlink_url
  articles.each do |article|
    html_doc = Nokogiri::HTML(article.solution)
    html_doc.css('a').each do |link|
      existing_href = link.attribute('href').to_s
      # test against original link or its locale token equivalent
      if href == existing_href || existing_href == href.gsub(/\/[a-z]{2}\-[A-Z]{2}\//, '/{{locale}}/')
        link.replace(link.content)
      end
    end
    article.solution = html_doc.at('body').inner_html
    article.save
  end
  # Delete self
  destroy
end

#whitelistObject



76
77
78
# File 'app/models/link_check.rb', line 76

def whitelist
  update_column(:status, 'whitelisted')
end