fixed issue where it would write the jobs twice
This commit is contained in:
		
							parent
							
								
									9fe9efbd18
								
							
						
					
					
						commit
						38023c1aa5
					
				
							
								
								
									
										39
									
								
								scraper.go
									
									
									
									
									
								
							
							
						
						
									
										39
									
								
								scraper.go
									
									
									
									
									
								
							@ -40,13 +40,27 @@ func skillChecker(description string) skills {
 | 
			
		||||
		Typescript: strings.Contains(description, "TypeScript"),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
func jobsToJson(file *os.File) {
 | 
			
		||||
	// Encode jobs slice to JSON
 | 
			
		||||
	encoder := json.NewEncoder(file)
 | 
			
		||||
	encoder.SetIndent("", "  ") // Pretty-print with indentation
 | 
			
		||||
	if err := encoder.Encode(jobs); err != nil {
 | 
			
		||||
		log.Fatalf("Cannot write to file %q: %s", fName, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fmt.Println("Job details successfully written to", fName)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Slice to store job details
 | 
			
		||||
var jobs []job
 | 
			
		||||
var (
 | 
			
		||||
	jobs     []job
 | 
			
		||||
	jobCount int
 | 
			
		||||
	maxJobs  int    = 30
 | 
			
		||||
	fName    string = "jobs.json"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
 | 
			
		||||
	fName := "jobs.json"
 | 
			
		||||
	file, err := os.Create(fName)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Fatalf("Cannot create file %q: %s", fName, err)
 | 
			
		||||
@ -69,6 +83,8 @@ func main() {
 | 
			
		||||
	detailsCollector := c.Clone()
 | 
			
		||||
	// On every <div> element with class "card__content attribute call callback
 | 
			
		||||
	c.OnHTML("div[class=card__content]", func(e *colly.HTMLElement) {
 | 
			
		||||
		//ensure only scrape the amount of jobs specified
 | 
			
		||||
 | 
			
		||||
		// Get the title and ensure it doesn't contain any excluded words
 | 
			
		||||
		title := e.ChildText("span.card-job-find-list__position")
 | 
			
		||||
		for _, excludedWord := range excluded {
 | 
			
		||||
@ -80,15 +96,16 @@ func main() {
 | 
			
		||||
		fullLink := baseUrl + link
 | 
			
		||||
 | 
			
		||||
		detailsCollector.Visit(fullLink)
 | 
			
		||||
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	detailsCollector.OnRequest(func(r *colly.Request) {
 | 
			
		||||
		fmt.Println("Visiting", r.URL.String())
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	detailsCollector.OnHTML("div.view-job-details", func(e *colly.HTMLElement) {
 | 
			
		||||
		// Get logo and trim the url
 | 
			
		||||
	detailsCollector.OnHTML("div[class='view-job-details']", func(e *colly.HTMLElement) {
 | 
			
		||||
 | 
			
		||||
		// Get logo and trim the url
 | 
			
		||||
		logo := e.ChildAttr("div.media-item__image", "style")
 | 
			
		||||
		cutLeft := "background-image:url("
 | 
			
		||||
		cutRight := ");"
 | 
			
		||||
@ -106,6 +123,12 @@ func main() {
 | 
			
		||||
			Skills:      skillChecker(e.ChildText("content.text-block__content > span")),
 | 
			
		||||
		}
 | 
			
		||||
		jobs = append(jobs, jobDetails)
 | 
			
		||||
		jobCount++
 | 
			
		||||
		fmt.Println("Scraped job", jobCount)
 | 
			
		||||
		if jobCount == maxJobs {
 | 
			
		||||
			jobsToJson(file)
 | 
			
		||||
			os.Exit(0)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
	// Handle pagination
 | 
			
		||||
	c.OnHTML("a.page-link", func(e *colly.HTMLElement) {
 | 
			
		||||
@ -119,12 +142,4 @@ func main() {
 | 
			
		||||
 | 
			
		||||
	c.Visit(searchString)
 | 
			
		||||
 | 
			
		||||
	// Encode jobs slice to JSON
 | 
			
		||||
	encoder := json.NewEncoder(file)
 | 
			
		||||
	encoder.SetIndent("", "  ") // Pretty-print with indentation
 | 
			
		||||
	if err := encoder.Encode(jobs); err != nil {
 | 
			
		||||
		log.Fatalf("Cannot write to file %q: %s", fName, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fmt.Println("Job details successfully written to", fName)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user