Class: CatalogItemUrlWorker
- Inherits:
-
Object
- Object
- CatalogItemUrlWorker
- Includes:
- Sidekiq::Job
- Defined in:
- app/workers/catalog_item_url_worker.rb
Constant Summary collapse
- HEADERS =
The following headers were extracted from a chrome inspector mimic as curl request
{ 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' }
Instance Method Summary collapse
-
#http_get(url) ⇒ Object
Using net/http instead of curb for better WebMock compatibility.
- #perform(catalog_item_id) ⇒ Object
-
#test_costco ⇒ Object
If your http_get works against costco it will work against most anyone.
- #test_walmart ⇒ Object
Instance Method Details
#http_get(url) ⇒ Object
Using net/http instead of curb for better WebMock compatibility
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'app/workers/catalog_item_url_worker.rb', line 14 def http_get(url) require 'net/http' require 'uri' uri = URI(url) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = (uri.scheme == 'https') http.open_timeout = 10 http.read_timeout = 10 request = Net::HTTP::Get.new(uri) HEADERS.each { |k, v| request[k] = v } http.request(request) end |
#perform(catalog_item_id) ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'app/workers/catalog_item_url_worker.rb', line 39 def perform(catalog_item_id) catalog_item = CatalogItem.where.not(skip_url_checks: true).find(catalog_item_id) return unless catalog_item.url.present? # Skip items that have a recent successful Oxylabs probe (within 7 days). # Probes use JS rendering and geo-location, making their url_valid result # authoritative. A naive HTTP GET overwrites it with false negatives on # React/SPA sites (e.g. Costco) that block simple HTTP requests. if catalog_item.retailer_probes.where(status: 'success').where('created_at > ?', 7.days.ago).exists? logger.info "Skipping URL check for #{catalog_item.id} — recent successful probe exists" return end res = nil begin res = http_get(catalog_item.url) logger.info "#{catalog_item.url} result was #{res.code}" rescue StandardError => e logger.warn "Exception while retrieving url #{catalog_item.url}. #{e}" ErrorReporting.warning(e, source: :background, catalog_item_id: catalog_item.id, url: catalog_item.url) end valid = res&.code&.start_with?('200') || false catalog_item.update_columns(url_valid: valid, url_last_checked: Time.current) valid end |
#test_costco ⇒ Object
If your http_get works against costco it will work against most anyone
31 32 33 |
# File 'app/workers/catalog_item_url_worker.rb', line 31 def test_costco http_get('https://www.costco.ca/warmlyyours-riviera-towel-warmer.product.100802733.html').code end |
#test_walmart ⇒ Object
35 36 37 |
# File 'app/workers/catalog_item_url_worker.rb', line 35 def test_walmart http_get('https://www.walmart.com/ip/Grande-10-Towel-Warmer-Black-Hardwired-10-Bars/595189227').code end |