initial commit
This commit is contained in:
		
						commit
						9fe9efbd18
					
				
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,2 @@
 | 
			
		||||
/thehub_cache
 | 
			
		||||
/jobs.json
 | 
			
		||||
							
								
								
									
										22
									
								
								go.mod
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								go.mod
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,22 @@
 | 
			
		||||
module go-scraper
 | 
			
		||||
 | 
			
		||||
go 1.22.3
 | 
			
		||||
 | 
			
		||||
require (
 | 
			
		||||
	github.com/PuerkitoBio/goquery v1.9.2 // indirect
 | 
			
		||||
	github.com/andybalholm/cascadia v1.3.2 // indirect
 | 
			
		||||
	github.com/antchfx/htmlquery v1.3.1 // indirect
 | 
			
		||||
	github.com/antchfx/xmlquery v1.4.0 // indirect
 | 
			
		||||
	github.com/antchfx/xpath v1.3.0 // indirect
 | 
			
		||||
	github.com/gobwas/glob v0.2.3 // indirect
 | 
			
		||||
	github.com/gocolly/colly v1.2.0 // indirect
 | 
			
		||||
	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 | 
			
		||||
	github.com/golang/protobuf v1.5.2 // indirect
 | 
			
		||||
	github.com/kennygrant/sanitize v1.2.4 // indirect
 | 
			
		||||
	github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
 | 
			
		||||
	github.com/temoto/robotstxt v1.1.2 // indirect
 | 
			
		||||
	golang.org/x/net v0.26.0 // indirect
 | 
			
		||||
	golang.org/x/text v0.16.0 // indirect
 | 
			
		||||
	google.golang.org/appengine v1.6.8 // indirect
 | 
			
		||||
	google.golang.org/protobuf v1.26.0 // indirect
 | 
			
		||||
)
 | 
			
		||||
							
								
								
									
										76
									
								
								go.sum
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								go.sum
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,76 @@
 | 
			
		||||
github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
 | 
			
		||||
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
 | 
			
		||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
 | 
			
		||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
 | 
			
		||||
github.com/antchfx/htmlquery v1.3.1 h1:wm0LxjLMsZhRHfQKKZscDf2COyH4vDYA3wyH+qZ+Ylc=
 | 
			
		||||
github.com/antchfx/htmlquery v1.3.1/go.mod h1:PTj+f1V2zksPlwNt7uVvZPsxpKNa7mlVliCRxLX6Nx8=
 | 
			
		||||
github.com/antchfx/xmlquery v1.4.0 h1:xg2HkfcRK2TeTbdb0m1jxCYnvsPaGY/oeZWTGqX/0hA=
 | 
			
		||||
github.com/antchfx/xmlquery v1.4.0/go.mod h1:Ax2aeaeDjfIw3CwXKDQ0GkwZ6QlxoChlIBP+mGnDFjI=
 | 
			
		||||
github.com/antchfx/xpath v1.3.0 h1:nTMlzGAK3IJ0bPpME2urTuFL76o4A96iYvoKFHRXJgc=
 | 
			
		||||
github.com/antchfx/xpath v1.3.0/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
 | 
			
		||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 | 
			
		||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
 | 
			
		||||
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
 | 
			
		||||
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
 | 
			
		||||
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
 | 
			
		||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
 | 
			
		||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 | 
			
		||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
 | 
			
		||||
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
 | 
			
		||||
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 | 
			
		||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 | 
			
		||||
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
 | 
			
		||||
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
 | 
			
		||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 | 
			
		||||
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
 | 
			
		||||
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
 | 
			
		||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 | 
			
		||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 | 
			
		||||
github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
 | 
			
		||||
github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
 | 
			
		||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 | 
			
		||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 | 
			
		||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 | 
			
		||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 | 
			
		||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 | 
			
		||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 | 
			
		||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 | 
			
		||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 | 
			
		||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 | 
			
		||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 | 
			
		||||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 | 
			
		||||
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
 | 
			
		||||
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
 | 
			
		||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 | 
			
		||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 | 
			
		||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 | 
			
		||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 | 
			
		||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 | 
			
		||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 | 
			
		||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 | 
			
		||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 | 
			
		||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 | 
			
		||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 | 
			
		||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 | 
			
		||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 | 
			
		||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 | 
			
		||||
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
 | 
			
		||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 | 
			
		||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 | 
			
		||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 | 
			
		||||
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
 | 
			
		||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 | 
			
		||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 | 
			
		||||
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
 | 
			
		||||
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
 | 
			
		||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 | 
			
		||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 | 
			
		||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 | 
			
		||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 | 
			
		||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 | 
			
		||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 | 
			
		||||
google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
 | 
			
		||||
google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
 | 
			
		||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 | 
			
		||||
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
 | 
			
		||||
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 | 
			
		||||
							
								
								
									
										13
									
								
								readme.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								readme.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,13 @@
 | 
			
		||||
# The Hub Scraper
 | 
			
		||||
 | 
			
		||||
This is a simple scraper that extracts job details from the [The Hub](https://thehub.io) website.
 | 
			
		||||
 | 
			
		||||
## Usage
 | 
			
		||||
 | 
			
		||||
To run the scraper, simply execute the following command:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
go run scraper.go
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The scraper will create a `jobs.json` file in the current directory, which contains a list of job details in JSON format.
 | 
			
		||||
							
								
								
									
										130
									
								
								scraper.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								scraper.go
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,130 @@
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"log"
 | 
			
		||||
	"os"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"github.com/gocolly/colly"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type job struct {
 | 
			
		||||
	Title       string `json:"title"`
 | 
			
		||||
	Logo        string `json:"logo"`
 | 
			
		||||
	Company     string `json:"company"`
 | 
			
		||||
	Location    string `json:"location"`
 | 
			
		||||
	Type        string `json:"type"`
 | 
			
		||||
	Description string `json:"description"`
 | 
			
		||||
	Link        string `json:"link"`
 | 
			
		||||
	Skills      skills `json:"skills"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type skills struct {
 | 
			
		||||
	React      bool `json:"react"`
 | 
			
		||||
	Python     bool `json:"python"`
 | 
			
		||||
	Golang     bool `json:"golang"`
 | 
			
		||||
	Svelte     bool `json:"svelte"`
 | 
			
		||||
	Nextjs     bool `json:"nextjs"`
 | 
			
		||||
	Typescript bool `json:"typescript"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func skillChecker(description string) skills {
 | 
			
		||||
	return skills{
 | 
			
		||||
		React:      strings.Contains(description, "React"),
 | 
			
		||||
		Python:     strings.Contains(description, "Python"),
 | 
			
		||||
		Golang:     strings.Contains(description, "Go"),
 | 
			
		||||
		Svelte:     strings.Contains(description, "Svelte"),
 | 
			
		||||
		Nextjs:     strings.Contains(description, "Next.js"),
 | 
			
		||||
		Typescript: strings.Contains(description, "TypeScript"),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Slice to store job details
 | 
			
		||||
var jobs []job
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
 | 
			
		||||
	fName := "jobs.json"
 | 
			
		||||
	file, err := os.Create(fName)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Fatalf("Cannot create file %q: %s", fName, err)
 | 
			
		||||
	}
 | 
			
		||||
	defer file.Close()
 | 
			
		||||
	baseUrl := "https://thehub.io"
 | 
			
		||||
	searchString := "https://thehub.io/jobs?roles=frontenddeveloper&roles=fullstackdeveloper&roles=backenddeveloper&roles=devops&paid=true&countryCode=DK&sorting=newJobs"
 | 
			
		||||
	// Instantiate default collector
 | 
			
		||||
	c := colly.NewCollector(
 | 
			
		||||
		// visit only the hub
 | 
			
		||||
		colly.AllowedDomains("www.thehub.io", "thehub.io"),
 | 
			
		||||
 | 
			
		||||
		// Cache responses to prevent multiple requests
 | 
			
		||||
		colly.CacheDir("./thehub_cache"),
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	// Slice of excluded words in the job titles
 | 
			
		||||
	excluded := []string{"senior", "lead"}
 | 
			
		||||
	// Instantiate a new collector to visit the job details page
 | 
			
		||||
	detailsCollector := c.Clone()
 | 
			
		||||
	// On every <div> element with class "card__content attribute call callback
 | 
			
		||||
	c.OnHTML("div[class=card__content]", func(e *colly.HTMLElement) {
 | 
			
		||||
		// Get the title and ensure it doesn't contain any excluded words
 | 
			
		||||
		title := e.ChildText("span.card-job-find-list__position")
 | 
			
		||||
		for _, excludedWord := range excluded {
 | 
			
		||||
			if strings.Contains(strings.ToLower(title), excludedWord) {
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		link := e.ChildAttr("a", "href")
 | 
			
		||||
		fullLink := baseUrl + link
 | 
			
		||||
 | 
			
		||||
		detailsCollector.Visit(fullLink)
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	detailsCollector.OnRequest(func(r *colly.Request) {
 | 
			
		||||
		fmt.Println("Visiting", r.URL.String())
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	detailsCollector.OnHTML("div.view-job-details", func(e *colly.HTMLElement) {
 | 
			
		||||
		// Get logo and trim the url
 | 
			
		||||
 | 
			
		||||
		logo := e.ChildAttr("div.media-item__image", "style")
 | 
			
		||||
		cutLeft := "background-image:url("
 | 
			
		||||
		cutRight := ");"
 | 
			
		||||
		trimmedLogo := strings.Trim(logo, cutLeft+cutRight)
 | 
			
		||||
 | 
			
		||||
		// Get company name
 | 
			
		||||
		jobDetails := job{
 | 
			
		||||
			Title:       e.ChildText("h2[class=view-job-details__title]"),
 | 
			
		||||
			Logo:        trimmedLogo,
 | 
			
		||||
			Company:     e.ChildText(".bullet-inline-list > a:first-child"),
 | 
			
		||||
			Location:    e.ChildText(".bullet-inline-list > a:nth-child(2)"),
 | 
			
		||||
			Type:        e.ChildText(".bullet-inline-list > a:nth-child(3)"),
 | 
			
		||||
			Description: e.ChildText("content.text-block__content > span"),
 | 
			
		||||
			Link:        e.Request.URL.String(),
 | 
			
		||||
			Skills:      skillChecker(e.ChildText("content.text-block__content > span")),
 | 
			
		||||
		}
 | 
			
		||||
		jobs = append(jobs, jobDetails)
 | 
			
		||||
	})
 | 
			
		||||
	// Handle pagination
 | 
			
		||||
	c.OnHTML("a.page-link", func(e *colly.HTMLElement) {
 | 
			
		||||
		nextPage := e.Attr("href")
 | 
			
		||||
		if nextPage != "" {
 | 
			
		||||
			fullNextPage := baseUrl + nextPage
 | 
			
		||||
			fmt.Println("Visiting next page:", fullNextPage)
 | 
			
		||||
			e.Request.Visit(fullNextPage)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	c.Visit(searchString)
 | 
			
		||||
 | 
			
		||||
	// Encode jobs slice to JSON
 | 
			
		||||
	encoder := json.NewEncoder(file)
 | 
			
		||||
	encoder.SetIndent("", "  ") // Pretty-print with indentation
 | 
			
		||||
	if err := encoder.Encode(jobs); err != nil {
 | 
			
		||||
		log.Fatalf("Cannot write to file %q: %s", fName, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fmt.Println("Job details successfully written to", fName)
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user