From 20511db91ffb2c2e4735696dfbec01a98d60c502 Mon Sep 17 00:00:00 2001 From: christian Date: Sat, 8 Jun 2024 16:56:35 +0200 Subject: [PATCH] added in memory caching for 5 minutes --- scraper.go | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/scraper.go b/scraper.go index f64bf31..6bc9922 100644 --- a/scraper.go +++ b/scraper.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/aws/aws-lambda-go/lambda" "github.com/gocolly/colly" @@ -29,6 +30,12 @@ type skills struct { Typescript bool `json:"typescript"` } +var ( + jobs []job + lastFetch time.Time + cacheTTL = time.Minute * 5 +) + func skillChecker(description string) skills { return skills{ React: strings.Contains(description, "React"), @@ -40,13 +47,9 @@ func skillChecker(description string) skills { } } -// Slice to store job details -var jobs []job - -func handler(ctx context.Context) ([]job, error) { +func fetchData() error { baseUrl := "https://thehub.io" - searchString := "https://thehub.io/jobs?roles=frontenddeveloper&roles=fullstackdeveloper&roles=backenddeveloper&roles=devops&paid=true&countryCode=DK&sorting=newJobs" // Instantiate default collector c := colly.NewCollector( // visit only the hub @@ -109,8 +112,29 @@ func handler(ctx context.Context) ([]job, error) { e.Request.Visit(fullNextPage) } }) + // Visit the initial URL to start scraping + err := c.Visit("https://thehub.io/jobs?roles=frontenddeveloper&roles=fullstackdeveloper&roles=backenddeveloper&search=developer&paid=true&countryCode=DK&sorting=newJobs") + if err != nil { + return err + } + return nil +} + +func handler(ctx context.Context) ([]job, error) { + // Check if cache is valid + if time.Since(lastFetch) < cacheTTL && len(jobs) > 0 { + return jobs, nil + } + + // Fetch new data + err := fetchData() + if err != nil { + return nil, err + } + + // Update cache timestamp + lastFetch = time.Now() - c.Visit(searchString) return jobs, nil }