Class: Image::ImageUrlScrubber

Inherits:
Object
  • Object
show all
Defined in:
app/services/image/image_url_scrubber.rb

Class Method Summary collapse

Class Method Details

.clean_url(url) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
# File 'app/services/image/image_url_scrubber.rb', line 41

def self.clean_url(url)
  return url unless url.present?
  return url unless uri = (Addressable::URI.parse(url) rescue nil)
  return url unless uri.path.present?
  return url unless uri.host == 'ik.warmlyyours.com'

  uri.query = nil #"ik-sdk-version=ruby-1.0.10"
  # Fix the file extension, JPG, JPEG, to jpeg
  uri.path = uri.path.gsub(/\.jpe?g$/i,'.jpeg')

  uri.to_s
end

.process(articles: nil, limit: nil) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'app/services/image/image_url_scrubber.rb', line 3

def self.process(articles: nil, limit: nil)
  articles ||= Article.where("solution like '%ik.warmlyyours.com%'")
  articles = articles.limit(limit) if limit
  results = []
  index = 0
  total = articles.size
  articles.each do |article|
    index += 1
    puts "[#{index}/#{total}] Evaluating article #{article.id} #{article.slug}"
    html_doc = Nokogiri::HTML(article.solution)
    links = html_doc.css('a')
    links.each do |link|
      href = link.attribute('href').to_s
      new_url = clean_url(href)
      if new_url != href
        results << { article_id: article.id, in: href, out: new_url }
        link.attributes["href"].value = new_url
      end
    end

    img_tags = html_doc.css('img')
    img_tags.each do |img_tag|
      src = img_tag.attribute('src').to_s
      new_url = clean_url(src)
      if new_url != src
        results << { article_id: article.id, in: src, out: new_url }
        img_tag.attributes["src"].value = new_url
      end
    end

    html_out = html_doc.at('body').inner_html
    article.update_columns(solution: html_out, updated_at: Time.current)
  end
  results

end