diff --git a/scraper.go b/scraper.go
index 00a6023..8a2b32d 100644
--- a/scraper.go
+++ b/scraper.go
@@ -40,13 +40,27 @@ func skillChecker(description string) skills {
Typescript: strings.Contains(description, "TypeScript"),
}
}
+func jobsToJson(file *os.File) {
+ // Encode jobs slice to JSON
+ encoder := json.NewEncoder(file)
+ encoder.SetIndent("", " ") // Pretty-print with indentation
+ if err := encoder.Encode(jobs); err != nil {
+ log.Fatalf("Cannot write to file %q: %s", fName, err)
+ }
+
+ fmt.Println("Job details successfully written to", fName)
+}
// Slice to store job details
-var jobs []job
+var (
+ jobs []job
+ jobCount int
+ maxJobs int = 30
+ fName string = "jobs.json"
+)
func main() {
- fName := "jobs.json"
file, err := os.Create(fName)
if err != nil {
log.Fatalf("Cannot create file %q: %s", fName, err)
@@ -69,6 +83,8 @@ func main() {
detailsCollector := c.Clone()
// On every
element with class "card__content attribute call callback
c.OnHTML("div[class=card__content]", func(e *colly.HTMLElement) {
+ //ensure only scrape the amount of jobs specified
+
// Get the title and ensure it doesn't contain any excluded words
title := e.ChildText("span.card-job-find-list__position")
for _, excludedWord := range excluded {
@@ -80,15 +96,16 @@ func main() {
fullLink := baseUrl + link
detailsCollector.Visit(fullLink)
+
})
detailsCollector.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL.String())
})
- detailsCollector.OnHTML("div.view-job-details", func(e *colly.HTMLElement) {
- // Get logo and trim the url
+ detailsCollector.OnHTML("div[class='view-job-details']", func(e *colly.HTMLElement) {
+ // Get logo and trim the url
logo := e.ChildAttr("div.media-item__image", "style")
cutLeft := "background-image:url("
cutRight := ");"
@@ -106,6 +123,12 @@ func main() {
Skills: skillChecker(e.ChildText("content.text-block__content > span")),
}
jobs = append(jobs, jobDetails)
+ jobCount++
+ fmt.Println("Scraped job", jobCount)
+ if jobCount == maxJobs {
+ jobsToJson(file)
+ os.Exit(0)
+ }
})
// Handle pagination
c.OnHTML("a.page-link", func(e *colly.HTMLElement) {
@@ -119,12 +142,4 @@ func main() {
c.Visit(searchString)
- // Encode jobs slice to JSON
- encoder := json.NewEncoder(file)
- encoder.SetIndent("", " ") // Pretty-print with indentation
- if err := encoder.Encode(jobs); err != nil {
- log.Fatalf("Cannot write to file %q: %s", fName, err)
- }
-
- fmt.Println("Job details successfully written to", fName)
}