From cd675c0d6a433c1f46a32107d1d45a97b69d1ce2 Mon Sep 17 00:00:00 2001 From: christian Date: Wed, 19 Jun 2024 20:26:36 +0200 Subject: [PATCH] moved cache to details collector --- main.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/main.go b/main.go index 9edba70..a3b703a 100644 --- a/main.go +++ b/main.go @@ -91,7 +91,7 @@ func scrapeHub() { jobs []job jobCount int fName = "thehub.json" - maxJobs = 30 + maxJobs = 20 baseUrl = "https://thehub.io" searchString = "https://thehub.io/jobs?roles=frontenddeveloper&roles=fullstackdeveloper&roles=backenddeveloper&roles=devops&paid=true&countryCode=DK&sorting=newJobs" file, err = os.Create(fName) @@ -106,13 +106,13 @@ func scrapeHub() { c := colly.NewCollector( // visit only the hub colly.AllowedDomains("www.thehub.io", "thehub.io"), - - // Cache responses to prevent multiple requests - colly.CacheDir("./thehub_cache"), ) // Instantiate a new collector to visit the job details page - detailsCollector := c.Clone() + detailsCollector := colly.NewCollector( + colly.AllowedDomains("www.thehub.io", "thehub.io"), + colly.CacheDir("./thehub_cache"), + ) // On every
element with class "card__content attribute call callback c.OnHTML("div[class=card__content]", func(e *colly.HTMLElement) { if jobCount >= maxJobs { @@ -192,7 +192,7 @@ func scrapeItJobBank() { jobs []job jobCount int fName = "it-jobbank.json" - maxJobs = 30 + maxJobs = 20 baseUrl = "https://www.it-jobbank.dk" searchString = "https://www.it-jobbank.dk/jobsoegning/udvikling" file, err = os.Create(fName) @@ -208,11 +208,15 @@ func scrapeItJobBank() { colly.AllowedDomains("www.it-jobbank.dk", "it-jobbank.dk"), // Cache responses to prevent multiple requests - colly.CacheDir("./itjobbank_cache"), + // colly.CacheDir("./itjobbank_cache"), ) // Instantiate a new collector to visit the job details page - detailsCollector := c.Clone() + detailsCollector := colly.NewCollector( + colly.AllowedDomains("www.it-jobbank.dk", "it-jobbank.dk"), + // Cache responses to prevent multiple requests + colly.CacheDir("./itjobbank_cache")) + // On every
element with class "card__content attribute call callback c.OnHTML("div[class=result]", func(e *colly.HTMLElement) { if jobCount >= maxJobs {