Class: Image::ImageUrlScrubber

Inherits:
Object
  • Object
show all
Defined in:
app/services/image/image_url_scrubber.rb

Overview

Service object: image url scrubber.

Class Method Summary collapse

Class Method Details

.clean_url(url) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'app/services/image/image_url_scrubber.rb', line 40

def self.clean_url(url)
  return url if url.blank?
  return url unless (uri = begin
    Addressable::URI.parse(url)
  rescue StandardError
    nil
  end)
  return url if uri.path.blank?
  return url unless uri.host == 'ik.warmlyyours.com'

  uri.query = nil # "ik-sdk-version=ruby-1.0.10"
  # Fix the file extension, JPG, JPEG, to jpeg
  uri.path = uri.path.gsub(/\.jpe?g$/i, '.jpeg')

  uri.to_s
end

.process(articles: nil, limit: nil) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'app/services/image/image_url_scrubber.rb', line 4

def self.process(articles: nil, limit: nil)
  articles ||= Article.where("solution like '%ik.warmlyyours.com%'")
  articles = articles.limit(limit) if limit
  results = []
  index = 0
  total = articles.size
  articles.each do |article|
    index += 1
    puts "[#{index}/#{total}] Evaluating article #{article.id} #{article.slug}"
    html_doc = Nokogiri::HTML(article.solution)
    links = html_doc.css('a')
    links.each do |link|
      href = link.attribute('href').to_s
      new_url = clean_url(href)
      if new_url != href
        results << { article_id: article.id, in: href, out: new_url }
        link.attributes["href"].value = new_url
      end
    end

    img_tags = html_doc.css('img')
    img_tags.each do |img_tag|
      src = img_tag.attribute('src').to_s
      new_url = clean_url(src)
      if new_url != src
        results << { article_id: article.id, in: src, out: new_url }
        img_tag.attributes["src"].value = new_url
      end
    end

    html_out = html_doc.at('body').inner_html
    article.update_columns(solution: html_out, updated_at: Time.current)
  end
  results
end