Module: PageAssetExtractor

Defined in:
lib/page_asset_extractor.rb

Overview

Utility module to extract statically referenced assets from CMS page templates

Class Method Summary collapse

Class Method Details

.extract(template) ⇒ Object



7
8
9
10
11
12
13
# File 'lib/page_asset_extractor.rb', line 7

def extract(template)
  {
    posts: extract_post_ids(template),
    videos: extract_video_identifiers(template),
    publications: extract_publication_skus(template)
  }
end

.extract_integers(str) ⇒ Object



63
64
65
# File 'lib/page_asset_extractor.rb', line 63

def extract_integers(str)
  str.to_s.scan(/\d+/).map(&:to_i)
end

.extract_post_ids(template) ⇒ Object



15
16
17
18
19
20
# File 'lib/page_asset_extractor.rb', line 15

def extract_post_ids(template)
  ids = []
  template.scan(/blog_post_ids:\s*\[([^\]]*)\]/m) { |m| ids |= extract_integers(m[0]) }
  template.scan(/post_ids:\s*\[([^\]]*)\]/m) { |m| ids |= extract_integers(m[0]) }
  ids.uniq
end

.extract_publication_skus(template) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/page_asset_extractor.rb', line 34

def extract_publication_skus(template)
  exact_skus = []
  wildcard_prefixes = []

  template.scan(/find_publication_url\(([^\"]*)\)/m) do |m|
    args = m[0]
    args.scan(/[\'\"]([^\'\"]+)[\'\"]/).flatten.each do |arg|
      if arg.include?('#{')
        prefix = arg.split('#{').first.to_s
        wildcard_prefixes << prefix if prefix.present?
      else
        exact_skus << arg
      end
    end
  end

  template.scan(/publication_code:\s*[\'\"]([^\'\"]+)[\'\"]/m) do |m|
    val = m[0]
    if val.include?('#{')
      prefix = val.split('#{').first.to_s
      wildcard_prefixes << prefix if prefix.present?
    else
      exact_skus << val
    end
  end

  { exact_skus: exact_skus.uniq, wildcard_prefixes: wildcard_prefixes.uniq }
end

.extract_strings(str) ⇒ Object



67
68
69
# File 'lib/page_asset_extractor.rb', line 67

def extract_strings(str)
  str.to_s.scan(/[\'\"]([^\'\"]+)[\'\"]/).flatten.map(&:strip)
end

.extract_video_identifiers(template) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
# File 'lib/page_asset_extractor.rb', line 22

def extract_video_identifiers(template)
  slugs = []
  uids = []
  ids = []
  template.scan(/slugs:\s*%w\[([^\]]*)\]/m) { |m| slugs |= m[0].to_s.split(/\s+/).map(&:strip).reject(&:blank?) }
  template.scan(/slugs:\s*\[([^\]]*)\]/m) { |m| slugs |= extract_strings(m[0]) }
  template.scan(/cloudflare_uids:\s*%w\[([^\]]*)\]/m) { |m| uids |= m[0].to_s.split(/\s+/).map(&:strip).reject(&:blank?) }
  template.scan(/cloudflare_uids:\s*\[([^\]]*)\]/m) { |m| uids |= extract_strings(m[0]) }
  template.scan(/ids:\s*\[([^\]]*)\]/m) { |m| ids |= extract_integers(m[0]) }
  { slugs: slugs.uniq, uids: uids.uniq, ids: ids.uniq }
end