This commit is contained in:
commit
7049dcc992
50
.gitea/workflows/build-all-prod.yaml
Normal file
50
.gitea/workflows/build-all-prod.yaml
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
name: Build and Push Docker Images
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
services:
|
||||||
|
docker:
|
||||||
|
image: docker:19.03.12
|
||||||
|
options: --privileged
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Gitea
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: gitea.rannes.dev
|
||||||
|
username: christian
|
||||||
|
password: ${{ secrets.REGISTRY_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push Flask API image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: ./api
|
||||||
|
push: true
|
||||||
|
tags: gitea.rannes.dev/rannes.dev/sw-jobs-api:latest
|
||||||
|
|
||||||
|
- name: Build and push Svelte client image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: ./client
|
||||||
|
push: true
|
||||||
|
tags: gitea.rannes.dev/rannes.dev/sw-jobs-client:latest
|
||||||
|
|
||||||
|
- name: Build and push scraper image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: ./scrapers
|
||||||
|
push: true
|
||||||
|
tags: gitea.rannes.dev/rannes.dev/sw-jobs-scraper:latest
|
17
api/Dockerfile
Normal file
17
api/Dockerfile
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Use an official Python runtime as a parent image
|
||||||
|
FROM python:3.9-slim
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the current directory contents into the container at /app
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
|
# Install any needed packages specified in requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Make port 5000 available to the world outside this container
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
# Run gunicorn server
|
||||||
|
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
|
BIN
api/__pycache__/api.cpython-312.pyc
Normal file
BIN
api/__pycache__/api.cpython-312.pyc
Normal file
Binary file not shown.
20
api/api.py
Normal file
20
api/api.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from flask import Flask, jsonify
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
# Path to your JSON file
|
||||||
|
db_file = os.path.join('scrapers', 'jobs_db.json')
|
||||||
|
|
||||||
|
@app.route('/api/jobs', methods=['GET'])
|
||||||
|
def get_jobs():
|
||||||
|
if os.path.exists(db_file):
|
||||||
|
with open(db_file, 'r') as file:
|
||||||
|
jobs = json.load(file)
|
||||||
|
else:
|
||||||
|
jobs = []
|
||||||
|
return jsonify(jobs)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=False, host='0.0.0.0')
|
10
client/.gitignore
vendored
Normal file
10
client/.gitignore
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
.DS_Store
|
||||||
|
node_modules
|
||||||
|
/build
|
||||||
|
/.svelte-kit
|
||||||
|
/package
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
vite.config.js.timestamp-*
|
||||||
|
vite.config.ts.timestamp-*
|
1
client/.npmrc
Normal file
1
client/.npmrc
Normal file
@ -0,0 +1 @@
|
|||||||
|
engine-strict=true
|
4
client/.prettierignore
Normal file
4
client/.prettierignore
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Ignore files for PNPM, NPM and YARN
|
||||||
|
pnpm-lock.yaml
|
||||||
|
package-lock.json
|
||||||
|
yarn.lock
|
8
client/.prettierrc
Normal file
8
client/.prettierrc
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"useTabs": true,
|
||||||
|
"singleQuote": true,
|
||||||
|
"trailingComma": "none",
|
||||||
|
"printWidth": 100,
|
||||||
|
"plugins": ["prettier-plugin-svelte"],
|
||||||
|
"overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }]
|
||||||
|
}
|
21
client/Dockerfile
Normal file
21
client/Dockerfile
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# Use an official node runtime as a parent image
|
||||||
|
FROM node:20
|
||||||
|
|
||||||
|
# Set the working directory to /client
|
||||||
|
WORKDIR /client
|
||||||
|
|
||||||
|
# Copy package.json and package-lock.json to /client
|
||||||
|
COPY package*.json ./
|
||||||
|
|
||||||
|
# Install any needed packages specified in package.json
|
||||||
|
RUN npm install
|
||||||
|
|
||||||
|
# Copy the current directory contents into the container at /client
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Build the app
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Serve the app
|
||||||
|
RUN npm install -g serve
|
||||||
|
CMD ["serve", "-s", "public", "-l", "3000"]
|
38
client/README.md
Normal file
38
client/README.md
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# create-svelte
|
||||||
|
|
||||||
|
Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/main/packages/create-svelte).
|
||||||
|
|
||||||
|
## Creating a project
|
||||||
|
|
||||||
|
If you're seeing this, you've probably already done this step. Congrats!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# create a new project in the current directory
|
||||||
|
npm create svelte@latest
|
||||||
|
|
||||||
|
# create a new project in my-app
|
||||||
|
npm create svelte@latest my-app
|
||||||
|
```
|
||||||
|
|
||||||
|
## Developing
|
||||||
|
|
||||||
|
Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run dev
|
||||||
|
|
||||||
|
# or start the server and open the app in a new browser tab
|
||||||
|
npm run dev -- --open
|
||||||
|
```
|
||||||
|
|
||||||
|
## Building
|
||||||
|
|
||||||
|
To create a production version of your app:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run build
|
||||||
|
```
|
||||||
|
|
||||||
|
You can preview the production build with `npm run preview`.
|
||||||
|
|
||||||
|
> To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.
|
33
client/eslint.config.js
Normal file
33
client/eslint.config.js
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import js from '@eslint/js';
|
||||||
|
import ts from 'typescript-eslint';
|
||||||
|
import svelte from 'eslint-plugin-svelte';
|
||||||
|
import prettier from 'eslint-config-prettier';
|
||||||
|
import globals from 'globals';
|
||||||
|
|
||||||
|
/** @type {import('eslint').Linter.FlatConfig[]} */
|
||||||
|
export default [
|
||||||
|
js.configs.recommended,
|
||||||
|
...ts.configs.recommended,
|
||||||
|
...svelte.configs['flat/recommended'],
|
||||||
|
prettier,
|
||||||
|
...svelte.configs['flat/prettier'],
|
||||||
|
{
|
||||||
|
languageOptions: {
|
||||||
|
globals: {
|
||||||
|
...globals.browser,
|
||||||
|
...globals.node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
files: ['**/*.svelte'],
|
||||||
|
languageOptions: {
|
||||||
|
parserOptions: {
|
||||||
|
parser: ts.parser
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ignores: ['build/', '.svelte-kit/', 'dist/']
|
||||||
|
}
|
||||||
|
];
|
33
client/package.json
Normal file
33
client/package.json
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"name": "client",
|
||||||
|
"version": "0.0.1",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"dev": "vite dev",
|
||||||
|
"build": "vite build",
|
||||||
|
"preview": "vite preview",
|
||||||
|
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
||||||
|
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
||||||
|
"lint": "prettier --check . && eslint .",
|
||||||
|
"format": "prettier --write ."
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@sveltejs/adapter-auto": "^3.0.0",
|
||||||
|
"@sveltejs/kit": "^2.0.0",
|
||||||
|
"@sveltejs/vite-plugin-svelte": "^3.0.0",
|
||||||
|
"@types/eslint": "^8.56.7",
|
||||||
|
"eslint": "^9.0.0",
|
||||||
|
"eslint-config-prettier": "^9.1.0",
|
||||||
|
"eslint-plugin-svelte": "^2.36.0",
|
||||||
|
"globals": "^15.0.0",
|
||||||
|
"prettier": "^3.1.1",
|
||||||
|
"prettier-plugin-svelte": "^3.1.2",
|
||||||
|
"svelte": "^5.0.0-next.1",
|
||||||
|
"svelte-check": "^3.6.0",
|
||||||
|
"tslib": "^2.4.1",
|
||||||
|
"typescript": "^5.0.0",
|
||||||
|
"typescript-eslint": "^8.0.0-alpha.20",
|
||||||
|
"vite": "^5.0.3"
|
||||||
|
},
|
||||||
|
"type": "module"
|
||||||
|
}
|
13
client/src/app.d.ts
vendored
Normal file
13
client/src/app.d.ts
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
// See https://kit.svelte.dev/docs/types#app
|
||||||
|
// for information about these interfaces
|
||||||
|
declare global {
|
||||||
|
namespace App {
|
||||||
|
// interface Error {}
|
||||||
|
// interface Locals {}
|
||||||
|
// interface PageData {}
|
||||||
|
// interface PageState {}
|
||||||
|
// interface Platform {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export {};
|
12
client/src/app.html
Normal file
12
client/src/app.html
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<link rel="icon" href="%sveltekit.assets%/favicon.png" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
%sveltekit.head%
|
||||||
|
</head>
|
||||||
|
<body data-sveltekit-preload-data="hover">
|
||||||
|
<div style="display: contents">%sveltekit.body%</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
1
client/src/lib/index.ts
Normal file
1
client/src/lib/index.ts
Normal file
@ -0,0 +1 @@
|
|||||||
|
// place files you want to import through the `$lib` alias in this folder.
|
2
client/src/routes/+page.svelte
Normal file
2
client/src/routes/+page.svelte
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
<h1>Welcome to SvelteKit</h1>
|
||||||
|
<p>Visit <a href="https://kit.svelte.dev">kit.svelte.dev</a> to read the documentation</p>
|
BIN
client/static/favicon.png
Normal file
BIN
client/static/favicon.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.5 KiB |
18
client/svelte.config.js
Normal file
18
client/svelte.config.js
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
import adapter from '@sveltejs/adapter-auto';
|
||||||
|
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
|
||||||
|
|
||||||
|
/** @type {import('@sveltejs/kit').Config} */
|
||||||
|
const config = {
|
||||||
|
// Consult https://kit.svelte.dev/docs/integrations#preprocessors
|
||||||
|
// for more information about preprocessors
|
||||||
|
preprocess: vitePreprocess(),
|
||||||
|
|
||||||
|
kit: {
|
||||||
|
// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
|
||||||
|
// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
|
||||||
|
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
|
||||||
|
adapter: adapter()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export default config;
|
19
client/tsconfig.json
Normal file
19
client/tsconfig.json
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"extends": "./.svelte-kit/tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"allowJs": true,
|
||||||
|
"checkJs": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"sourceMap": true,
|
||||||
|
"strict": true,
|
||||||
|
"moduleResolution": "bundler"
|
||||||
|
}
|
||||||
|
// Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
|
||||||
|
// except $lib which is handled by https://kit.svelte.dev/docs/configuration#files
|
||||||
|
//
|
||||||
|
// If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
|
||||||
|
// from the referenced tsconfig.json - TypeScript does not merge them in
|
||||||
|
}
|
6
client/vite.config.ts
Normal file
6
client/vite.config.ts
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
import { sveltekit } from '@sveltejs/kit/vite';
|
||||||
|
import { defineConfig } from 'vite';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
plugins: [sveltekit()]
|
||||||
|
});
|
27
docker-compose.yaml
Normal file
27
docker-compose.yaml
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ./api
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
ports:
|
||||||
|
- "5000:5000"
|
||||||
|
volumes:
|
||||||
|
- ./scrapers:/app/scrapers
|
||||||
|
environment:
|
||||||
|
- FLASK_ENV=production
|
||||||
|
|
||||||
|
client:
|
||||||
|
build:
|
||||||
|
context: ./client
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
|
||||||
|
scraper:
|
||||||
|
build:
|
||||||
|
context: ./scrapers
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
volumes:
|
||||||
|
- ./scrapers:/app
|
BIN
requirements.txt
Normal file
BIN
requirements.txt
Normal file
Binary file not shown.
29
scrapers/Dockerfile
Normal file
29
scrapers/Dockerfile
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# Use an official Python runtime as a parent image
|
||||||
|
FROM python:3.9-slim
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the current directory contents into the container at /app
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
|
# Install any needed packages specified in requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy the crontab file to the cron.d directory
|
||||||
|
COPY crontab /etc/cron.d/scraper-cron
|
||||||
|
|
||||||
|
# Give execution rights on the cron job
|
||||||
|
RUN chmod 0644 /etc/cron.d/scraper-cron
|
||||||
|
|
||||||
|
# Create the log file to be able to run tail
|
||||||
|
RUN touch /var/log/cron.log
|
||||||
|
|
||||||
|
# Copy the script to run the scraper
|
||||||
|
COPY run_scraper.sh /usr/local/bin/run_scraper.sh
|
||||||
|
|
||||||
|
# Grant execution rights to the script
|
||||||
|
RUN chmod +x /usr/local/bin/run_scraper.sh
|
||||||
|
|
||||||
|
# Run the command on container startup
|
||||||
|
CMD ["cron", "-f"]
|
2
scrapers/crontab
Normal file
2
scrapers/crontab
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# Run the scraper every hour
|
||||||
|
0 * * * * root /usr/local/bin/run_scraper.sh >> /var/log/cron.log 2>&1
|
128
scrapers/jobindex.py
Normal file
128
scrapers/jobindex.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Base URL of the job listings page for software development jobs
|
||||||
|
base_url = 'https://www.jobindex.dk/jobsoegning/it/'
|
||||||
|
|
||||||
|
# Keywords to include in the job listings
|
||||||
|
include_keywords = ['software', 'nextjs', 'svelte']
|
||||||
|
|
||||||
|
# Keywords to exclude from the job titles
|
||||||
|
exclude_keywords = ['senior']
|
||||||
|
|
||||||
|
# File path for the JSON database
|
||||||
|
db_file = 'jobs_db.json'
|
||||||
|
|
||||||
|
# Load existing jobs from the JSON file if it exists
|
||||||
|
if os.path.exists(db_file):
|
||||||
|
try:
|
||||||
|
with open(db_file, 'r') as file:
|
||||||
|
existing_jobs = json.load(file)
|
||||||
|
print(f"Loaded {len(existing_jobs)} existing jobs from {db_file}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Error reading JSON file. Starting with an empty list.")
|
||||||
|
existing_jobs = []
|
||||||
|
else:
|
||||||
|
print("JSON file not found. Starting with an empty list.")
|
||||||
|
existing_jobs = []
|
||||||
|
|
||||||
|
# Convert existing jobs to a set of IDs for duplicate checking
|
||||||
|
existing_job_ids = set(job['id'] for job in existing_jobs)
|
||||||
|
|
||||||
|
# Function to fetch and parse a page
|
||||||
|
def fetch_jobs_from_page(url):
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Failed to load page {url}")
|
||||||
|
return BeautifulSoup(response.content, 'html.parser')
|
||||||
|
|
||||||
|
# Function to extract jobs from the parsed HTML
|
||||||
|
def extract_jobs(soup):
|
||||||
|
job_listings = soup.find_all('div', class_='jobsearch-result')
|
||||||
|
jobs = []
|
||||||
|
today = datetime.today()
|
||||||
|
for job in job_listings:
|
||||||
|
title_element = job.find('h4').find('a')
|
||||||
|
company_element = job.find('div', class_='jix-toolbar-top__company')
|
||||||
|
location_element = job.find('span', class_='jix_robotjob--area')
|
||||||
|
date_posted_element = job.find('time')
|
||||||
|
job_link_element = job.find('h4').find('a')
|
||||||
|
description_element = job.find('p')
|
||||||
|
|
||||||
|
title = title_element.get_text(strip=True) if title_element else ''
|
||||||
|
url = company_element.find('a', rel='noopener')['href'] if company_element and company_element.find('a', rel='noopener') else ''
|
||||||
|
img = f"https://www.jobindex.dk{job.find('img')['src']}" if job.find('img') else ''
|
||||||
|
company = company_element.find('a').get_text(strip=True) if company_element else ''
|
||||||
|
location = location_element.get_text(strip=True) if location_element else ''
|
||||||
|
date_posted = date_posted_element['datetime'] if date_posted_element else ''
|
||||||
|
job_link = job_link_element['href'] if job_link_element else ''
|
||||||
|
description = description_element.get_text(strip=True) if description_element else ''
|
||||||
|
|
||||||
|
print(f"Debug: title={title}, url={url}, img={img}, company={company}, location={location}, date_posted={date_posted}, job_link={job_link}, description={description}") # Detailed debug print
|
||||||
|
|
||||||
|
# Convert date_posted to datetime object
|
||||||
|
try:
|
||||||
|
date_posted_dt = datetime.strptime(date_posted, '%Y-%m-%d')
|
||||||
|
if (today - date_posted_dt).days > 3:
|
||||||
|
print("Job older than 3 days found. Stopping the scraper.")
|
||||||
|
return jobs, False # Returning jobs and False to indicate stopping
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
combined_text = f"{title} {description}".lower()
|
||||||
|
if not any(exclude_keyword in title.lower() for exclude_keyword in exclude_keywords) and any(include_keyword in combined_text for include_keyword in include_keywords):
|
||||||
|
job_id = f"{title}-{url}-{location}-{date_posted}"
|
||||||
|
if job_id not in existing_job_ids:
|
||||||
|
jobs.append({
|
||||||
|
'id': job_id,
|
||||||
|
'title': title,
|
||||||
|
'url': url,
|
||||||
|
'img': img,
|
||||||
|
'company': company,
|
||||||
|
'location': location,
|
||||||
|
'date_posted': date_posted,
|
||||||
|
'link': job_link
|
||||||
|
})
|
||||||
|
existing_job_ids.add(job_id)
|
||||||
|
print(f"Added job: {job_id}") # Debug print for each added job
|
||||||
|
|
||||||
|
return jobs, True
|
||||||
|
|
||||||
|
# Function to find the next page URL
|
||||||
|
def get_next_page_url(soup):
|
||||||
|
next_page = soup.find('a', {'aria-label': 'Næste'})
|
||||||
|
return next_page['href'] if next_page else None
|
||||||
|
|
||||||
|
# Main scraping loop
|
||||||
|
current_url = base_url
|
||||||
|
all_jobs = []
|
||||||
|
|
||||||
|
while current_url:
|
||||||
|
print(f"Fetching jobs from: {current_url}")
|
||||||
|
soup = fetch_jobs_from_page(current_url)
|
||||||
|
jobs, continue_scraping = extract_jobs(soup)
|
||||||
|
all_jobs.extend(jobs)
|
||||||
|
print(f"Collected {len(jobs)} jobs from this page.")
|
||||||
|
if not continue_scraping:
|
||||||
|
break
|
||||||
|
current_url = get_next_page_url(soup)
|
||||||
|
|
||||||
|
# Combine the existing jobs with the new jobs
|
||||||
|
all_jobs = existing_jobs + all_jobs
|
||||||
|
|
||||||
|
# Remove jobs older than 30 days from the combined list
|
||||||
|
cutoff_date = datetime.today() - timedelta(days=30)
|
||||||
|
all_jobs = [job for job in all_jobs if datetime.strptime(job['date_posted'], '%Y-%m-%d') >= cutoff_date]
|
||||||
|
|
||||||
|
# Final debug print before saving
|
||||||
|
print(f"Total jobs to be saved: {len(all_jobs)}")
|
||||||
|
print(f"Jobs to be saved: {all_jobs}") # Debug print to show jobs to be saved
|
||||||
|
|
||||||
|
# Save the new jobs to the JSON file
|
||||||
|
with open(db_file, 'w') as file:
|
||||||
|
json.dump(all_jobs, file, indent=4)
|
||||||
|
|
||||||
|
print(f"Total jobs saved: {len(all_jobs)}") # Final output after saving to file
|
142
scrapers/jobs_db.json
Normal file
142
scrapers/jobs_db.json
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "Software developer-https://brodersen.com/-Randers NV-2024-05-29",
|
||||||
|
"title": "Software developer",
|
||||||
|
"url": "https://brodersen.com/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/BrodersenLogo.gif",
|
||||||
|
"company": "Brodersen A/S",
|
||||||
|
"location": "Randers NV",
|
||||||
|
"date_posted": "2024-05-29",
|
||||||
|
"link": "https://www.jobindex.dk/jobannonce/h1458379/software-developer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Vi s\u00f8ger en erfaren softwareudvikler til et ambiti\u00f8st produktteam-https://www.ok.dk/-Viby J-2024-05-29",
|
||||||
|
"title": "Vi s\u00f8ger en erfaren softwareudvikler til et ambiti\u00f8st produktteam",
|
||||||
|
"url": "https://www.ok.dk/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/okamba.gif",
|
||||||
|
"company": "OK a.m.b.a.",
|
||||||
|
"location": "Viby J",
|
||||||
|
"date_posted": "2024-05-29",
|
||||||
|
"link": "https://www.ok.dk/om-ok/job?hr=show-job%2f201641&linkref=204872&locale=da_DK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Software Engineer to energy trading in Denmark or Singapore-https://www.powermart.eu/--2024-05-29",
|
||||||
|
"title": "Software Engineer to energy trading in Denmark or Singapore",
|
||||||
|
"url": "https://www.powermart.eu/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/PMLogo-2019.jpg",
|
||||||
|
"company": "PowerMart ApS",
|
||||||
|
"location": "",
|
||||||
|
"date_posted": "2024-05-29",
|
||||||
|
"link": "https://www.jobindex.dk/img/pdf/PM_SOFTWARE_ENGINEER_20240529_LOST.pdf"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Lyngsoe Systems s\u00f8ger erfaren Softwareudvikler - Bliv en del af vores dynamiske team!-http://www.hviidoglarsen.dk/-Aars-2024-05-29",
|
||||||
|
"title": "Lyngsoe Systems s\u00f8ger erfaren Softwareudvikler - Bliv en del af vores dynamiske team!",
|
||||||
|
"url": "http://www.hviidoglarsen.dk/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/LyngsoeSystems_logo_2018.png",
|
||||||
|
"company": "Hviid & Larsen ApS",
|
||||||
|
"location": "Aars",
|
||||||
|
"date_posted": "2024-05-29",
|
||||||
|
"link": "https://www.hviidoglarsen.dk/jobs?hr=show-job/201545&linkref=204720&locale=da_DK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Software Test Engineer for the Combat & Self Protection team-http://www.terma.com/-S\u00f8borg-2024-05-28",
|
||||||
|
"title": "Software Test Engineer for the Combat & Self Protection team",
|
||||||
|
"url": "http://www.terma.com/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/terma_logo2012.gif",
|
||||||
|
"company": "Terma A/S",
|
||||||
|
"location": "S\u00f8borg",
|
||||||
|
"date_posted": "2024-05-28",
|
||||||
|
"link": "https://termaas.hr-on.com/show-job/201416&locale=en_US"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Teamchef til Software Development, IT-https://www.sparnord.dk/-Aalborg-2024-05-28",
|
||||||
|
"title": "Teamchef til Software Development, IT",
|
||||||
|
"url": "https://www.sparnord.dk/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/SparNord_2019_logo.gif",
|
||||||
|
"company": "Spar Nord Bank A/S",
|
||||||
|
"location": "Aalborg",
|
||||||
|
"date_posted": "2024-05-28",
|
||||||
|
"link": "https://candidate.hr-manager.net/ApplicationInit.aspx?cid=1148&ProjectId=145012&DepartmentId=19041&MediaId=59"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Developers, Software for Self-Protection Systems, Aeronautics-http://www.terma.com/-Lystrup-2024-05-28",
|
||||||
|
"title": "Developers, Software for Self-Protection Systems, Aeronautics",
|
||||||
|
"url": "http://www.terma.com/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/terma_logo2012.gif",
|
||||||
|
"company": "Terma A/S",
|
||||||
|
"location": "Lystrup",
|
||||||
|
"date_posted": "2024-05-28",
|
||||||
|
"link": "https://termaas.hr-on.com/show-job/178422&locale=en_US"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Postdoc for developing software for simulating nanostructures-https://europeanspallationsource.se/-Kongens Lyngby-2024-05-28",
|
||||||
|
"title": "Postdoc for developing software for simulating nanostructures",
|
||||||
|
"url": "https://europeanspallationsource.se/",
|
||||||
|
"img": "https://www.jobindex.dk/img/brand/_top-esse-200313.png",
|
||||||
|
"company": "European Spallation Source ERIC",
|
||||||
|
"location": "Kongens Lyngby",
|
||||||
|
"date_posted": "2024-05-28",
|
||||||
|
"link": "https://europeanspallationsource.se/careers/vacancies?rmpage=job&rmjob=1687&rmlang=UK"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Software- og automatiseringsudvikler til Sundhed og Omsorg-https://aarhus.dk/-Viby J-2024-05-27",
|
||||||
|
"title": "Software- og automatiseringsudvikler til Sundhed og Omsorg",
|
||||||
|
"url": "https://aarhus.dk/",
|
||||||
|
"img": "https://www.jobindex.dk/img/brand/45_spo_top_aarhuskommune_8742_20240502.png",
|
||||||
|
"company": "Aarhus Kommune",
|
||||||
|
"location": "Viby J",
|
||||||
|
"date_posted": "2024-05-27",
|
||||||
|
"link": "https://aarhus.career.emply.com/ad/software-og-automatiseringsudvikler-til-sundhed-og-omsorg/dewsvv/da"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Ambiti\u00f8s softwareudvikler-http://www.deoriginale.dk/-Viby J-2024-05-27",
|
||||||
|
"title": "Ambiti\u00f8s softwareudvikler",
|
||||||
|
"url": "http://www.deoriginale.dk/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/Gaming_2019.gif",
|
||||||
|
"company": "Gaming A/S",
|
||||||
|
"location": "Viby J",
|
||||||
|
"date_posted": "2024-05-27",
|
||||||
|
"link": "https://www.jobindex.dk/jobannonce/h1472652/ambitioes-softwareudvikler"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Integration Specialist med fokus p\u00e5 softwarel\u00f8sninger til dokument -og workflow-systemer-https://www.canon.dk/about_us/-Vejle, S\u00f8borg-2024-05-27",
|
||||||
|
"title": "Integration Specialist med fokus p\u00e5 softwarel\u00f8sninger til dokument -og workflow-systemer",
|
||||||
|
"url": "https://www.canon.dk/about_us/",
|
||||||
|
"img": "https://www.jobindex.dk/img/brand/_top-canon-060219.png",
|
||||||
|
"company": "Canon Danmark",
|
||||||
|
"location": "Vejle, S\u00f8borg",
|
||||||
|
"date_posted": "2024-05-27",
|
||||||
|
"link": "https://careers.peopleclick.eu.com/careerscp/client_canoneurope/external/jobDetails.do?functionName=getJobDetail&jobPostId=50920&localeCode=da"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Test Automation Engineer-https://xci.dk/?page=home-Aalborg \u00d8st-2024-05-27",
|
||||||
|
"title": "Test Automation Engineer",
|
||||||
|
"url": "https://xci.dk/?page=home",
|
||||||
|
"img": "https://www.jobindex.dk/img/brand/XCI_top20240408.png",
|
||||||
|
"company": "XCI A/S",
|
||||||
|
"location": "Aalborg \u00d8st",
|
||||||
|
"date_posted": "2024-05-27",
|
||||||
|
"link": "https://xci.teamtailor.com/jobs/4525685-test-automation-engineer?promotion=1031076-jobindex"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Nordic Solution Advisor for VMware or/and Veeam-https://dk.tdsynnex.com/-Birker\u00f8d-2024-05-27",
|
||||||
|
"title": "Nordic Solution Advisor for VMware or/and Veeam",
|
||||||
|
"url": "https://dk.tdsynnex.com/",
|
||||||
|
"img": "https://www.jobindex.dk/img/brand/TDSYNNEX_HR_Denmark_525x120px.png",
|
||||||
|
"company": "TD SYNNEX Denmark ApS",
|
||||||
|
"location": "Birker\u00f8d",
|
||||||
|
"date_posted": "2024-05-27",
|
||||||
|
"link": "https://synnex.wd5.myworkdayjobs.com/tdsynnexcareers/job/Birkerod-Denmark/Nordic-Solution-Advisor-for-VMware_R29639"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "Systems Engineer / System tekniker s\u00f8ges til snarlig tiltr\u00e6delse-https://www.fisker-it.dk/-Herlev-2024-05-27",
|
||||||
|
"title": "Systems Engineer / System tekniker s\u00f8ges til snarlig tiltr\u00e6delse",
|
||||||
|
"url": "https://www.fisker-it.dk/",
|
||||||
|
"img": "https://www.jobindex.dk/img/logo/Fisker.IT_logo.png",
|
||||||
|
"company": "Fisker IT ApS",
|
||||||
|
"location": "Herlev",
|
||||||
|
"date_posted": "2024-05-27",
|
||||||
|
"link": "https://www.fisker-it.dk/job-systems-engineer"
|
||||||
|
}
|
||||||
|
]
|
3
scrapers/run_scraper.sh
Normal file
3
scrapers/run_scraper.sh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
cd /app
|
||||||
|
python job_scraper.py
|
Loading…
Reference in New Issue
Block a user