initial commit
Some checks are pending
Build and Push Docker Images / build (push) Waiting to run

This commit is contained in:
ChrQR 2024-05-29 23:28:58 +02:00
commit 7049dcc992
27 changed files with 637 additions and 0 deletions

View File

@ -0,0 +1,50 @@
name: Build and Push Docker Images
on:
push:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
services:
docker:
image: docker:19.03.12
options: --privileged
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Gitea
uses: docker/login-action@v3
with:
registry: gitea.rannes.dev
username: christian
password: ${{ secrets.REGISTRY_TOKEN }}
- name: Build and push Flask API image
uses: docker/build-push-action@v5
with:
context: ./api
push: true
tags: gitea.rannes.dev/rannes.dev/sw-jobs-api:latest
- name: Build and push Svelte client image
uses: docker/build-push-action@v5
with:
context: ./client
push: true
tags: gitea.rannes.dev/rannes.dev/sw-jobs-client:latest
- name: Build and push scraper image
uses: docker/build-push-action@v5
with:
context: ./scrapers
push: true
tags: gitea.rannes.dev/rannes.dev/sw-jobs-scraper:latest

17
api/Dockerfile Normal file
View File

@ -0,0 +1,17 @@
# Use an official Python runtime as a parent image
FROM python:3.9-slim
# Set the working directory in the container
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . /app
# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Make port 5000 available to the world outside this container
EXPOSE 5000
# Run gunicorn server
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]

Binary file not shown.

20
api/api.py Normal file
View File

@ -0,0 +1,20 @@
from flask import Flask, jsonify
import json
import os
app = Flask(__name__)
# Path to your JSON file
db_file = os.path.join('scrapers', 'jobs_db.json')
@app.route('/api/jobs', methods=['GET'])
def get_jobs():
if os.path.exists(db_file):
with open(db_file, 'r') as file:
jobs = json.load(file)
else:
jobs = []
return jsonify(jobs)
if __name__ == '__main__':
app.run(debug=False, host='0.0.0.0')

10
client/.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
.DS_Store
node_modules
/build
/.svelte-kit
/package
.env
.env.*
!.env.example
vite.config.js.timestamp-*
vite.config.ts.timestamp-*

1
client/.npmrc Normal file
View File

@ -0,0 +1 @@
engine-strict=true

4
client/.prettierignore Normal file
View File

@ -0,0 +1,4 @@
# Ignore files for PNPM, NPM and YARN
pnpm-lock.yaml
package-lock.json
yarn.lock

8
client/.prettierrc Normal file
View File

@ -0,0 +1,8 @@
{
"useTabs": true,
"singleQuote": true,
"trailingComma": "none",
"printWidth": 100,
"plugins": ["prettier-plugin-svelte"],
"overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }]
}

21
client/Dockerfile Normal file
View File

@ -0,0 +1,21 @@
# Use an official node runtime as a parent image
FROM node:20
# Set the working directory to /client
WORKDIR /client
# Copy package.json and package-lock.json to /client
COPY package*.json ./
# Install any needed packages specified in package.json
RUN npm install
# Copy the current directory contents into the container at /client
COPY . .
# Build the app
RUN npm run build
# Serve the app
RUN npm install -g serve
CMD ["serve", "-s", "public", "-l", "3000"]

38
client/README.md Normal file
View File

@ -0,0 +1,38 @@
# create-svelte
Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/main/packages/create-svelte).
## Creating a project
If you're seeing this, you've probably already done this step. Congrats!
```bash
# create a new project in the current directory
npm create svelte@latest
# create a new project in my-app
npm create svelte@latest my-app
```
## Developing
Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
```bash
npm run dev
# or start the server and open the app in a new browser tab
npm run dev -- --open
```
## Building
To create a production version of your app:
```bash
npm run build
```
You can preview the production build with `npm run preview`.
> To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.

33
client/eslint.config.js Normal file
View File

@ -0,0 +1,33 @@
import js from '@eslint/js';
import ts from 'typescript-eslint';
import svelte from 'eslint-plugin-svelte';
import prettier from 'eslint-config-prettier';
import globals from 'globals';
/** @type {import('eslint').Linter.FlatConfig[]} */
export default [
js.configs.recommended,
...ts.configs.recommended,
...svelte.configs['flat/recommended'],
prettier,
...svelte.configs['flat/prettier'],
{
languageOptions: {
globals: {
...globals.browser,
...globals.node
}
}
},
{
files: ['**/*.svelte'],
languageOptions: {
parserOptions: {
parser: ts.parser
}
}
},
{
ignores: ['build/', '.svelte-kit/', 'dist/']
}
];

33
client/package.json Normal file
View File

@ -0,0 +1,33 @@
{
"name": "client",
"version": "0.0.1",
"private": true,
"scripts": {
"dev": "vite dev",
"build": "vite build",
"preview": "vite preview",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
"lint": "prettier --check . && eslint .",
"format": "prettier --write ."
},
"devDependencies": {
"@sveltejs/adapter-auto": "^3.0.0",
"@sveltejs/kit": "^2.0.0",
"@sveltejs/vite-plugin-svelte": "^3.0.0",
"@types/eslint": "^8.56.7",
"eslint": "^9.0.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-svelte": "^2.36.0",
"globals": "^15.0.0",
"prettier": "^3.1.1",
"prettier-plugin-svelte": "^3.1.2",
"svelte": "^5.0.0-next.1",
"svelte-check": "^3.6.0",
"tslib": "^2.4.1",
"typescript": "^5.0.0",
"typescript-eslint": "^8.0.0-alpha.20",
"vite": "^5.0.3"
},
"type": "module"
}

13
client/src/app.d.ts vendored Normal file
View File

@ -0,0 +1,13 @@
// See https://kit.svelte.dev/docs/types#app
// for information about these interfaces
declare global {
namespace App {
// interface Error {}
// interface Locals {}
// interface PageData {}
// interface PageState {}
// interface Platform {}
}
}
export {};

12
client/src/app.html Normal file
View File

@ -0,0 +1,12 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%sveltekit.assets%/favicon.png" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
%sveltekit.head%
</head>
<body data-sveltekit-preload-data="hover">
<div style="display: contents">%sveltekit.body%</div>
</body>
</html>

1
client/src/lib/index.ts Normal file
View File

@ -0,0 +1 @@
// place files you want to import through the `$lib` alias in this folder.

View File

@ -0,0 +1,2 @@
<h1>Welcome to SvelteKit</h1>
<p>Visit <a href="https://kit.svelte.dev">kit.svelte.dev</a> to read the documentation</p>

BIN
client/static/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

18
client/svelte.config.js Normal file
View File

@ -0,0 +1,18 @@
import adapter from '@sveltejs/adapter-auto';
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
/** @type {import('@sveltejs/kit').Config} */
const config = {
// Consult https://kit.svelte.dev/docs/integrations#preprocessors
// for more information about preprocessors
preprocess: vitePreprocess(),
kit: {
// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
adapter: adapter()
}
};
export default config;

19
client/tsconfig.json Normal file
View File

@ -0,0 +1,19 @@
{
"extends": "./.svelte-kit/tsconfig.json",
"compilerOptions": {
"allowJs": true,
"checkJs": true,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"skipLibCheck": true,
"sourceMap": true,
"strict": true,
"moduleResolution": "bundler"
}
// Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
// except $lib which is handled by https://kit.svelte.dev/docs/configuration#files
//
// If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
// from the referenced tsconfig.json - TypeScript does not merge them in
}

6
client/vite.config.ts Normal file
View File

@ -0,0 +1,6 @@
import { sveltekit } from '@sveltejs/kit/vite';
import { defineConfig } from 'vite';
export default defineConfig({
plugins: [sveltekit()]
});

27
docker-compose.yaml Normal file
View File

@ -0,0 +1,27 @@
version: "3.8"
services:
api:
build:
context: ./api
dockerfile: Dockerfile
ports:
- "5000:5000"
volumes:
- ./scrapers:/app/scrapers
environment:
- FLASK_ENV=production
client:
build:
context: ./client
dockerfile: Dockerfile
ports:
- "3000:3000"
scraper:
build:
context: ./scrapers
dockerfile: Dockerfile
volumes:
- ./scrapers:/app

BIN
requirements.txt Normal file

Binary file not shown.

29
scrapers/Dockerfile Normal file
View File

@ -0,0 +1,29 @@
# Use an official Python runtime as a parent image
FROM python:3.9-slim
# Set the working directory in the container
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . /app
# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Copy the crontab file to the cron.d directory
COPY crontab /etc/cron.d/scraper-cron
# Give execution rights on the cron job
RUN chmod 0644 /etc/cron.d/scraper-cron
# Create the log file to be able to run tail
RUN touch /var/log/cron.log
# Copy the script to run the scraper
COPY run_scraper.sh /usr/local/bin/run_scraper.sh
# Grant execution rights to the script
RUN chmod +x /usr/local/bin/run_scraper.sh
# Run the command on container startup
CMD ["cron", "-f"]

2
scrapers/crontab Normal file
View File

@ -0,0 +1,2 @@
# Run the scraper every hour
0 * * * * root /usr/local/bin/run_scraper.sh >> /var/log/cron.log 2>&1

128
scrapers/jobindex.py Normal file
View File

@ -0,0 +1,128 @@
import requests
from bs4 import BeautifulSoup
import json
import os
from datetime import datetime, timedelta
# Base URL of the job listings page for software development jobs
base_url = 'https://www.jobindex.dk/jobsoegning/it/'
# Keywords to include in the job listings
include_keywords = ['software', 'nextjs', 'svelte']
# Keywords to exclude from the job titles
exclude_keywords = ['senior']
# File path for the JSON database
db_file = 'jobs_db.json'
# Load existing jobs from the JSON file if it exists
if os.path.exists(db_file):
try:
with open(db_file, 'r') as file:
existing_jobs = json.load(file)
print(f"Loaded {len(existing_jobs)} existing jobs from {db_file}")
except json.JSONDecodeError:
print("Error reading JSON file. Starting with an empty list.")
existing_jobs = []
else:
print("JSON file not found. Starting with an empty list.")
existing_jobs = []
# Convert existing jobs to a set of IDs for duplicate checking
existing_job_ids = set(job['id'] for job in existing_jobs)
# Function to fetch and parse a page
def fetch_jobs_from_page(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Failed to load page {url}")
return BeautifulSoup(response.content, 'html.parser')
# Function to extract jobs from the parsed HTML
def extract_jobs(soup):
job_listings = soup.find_all('div', class_='jobsearch-result')
jobs = []
today = datetime.today()
for job in job_listings:
title_element = job.find('h4').find('a')
company_element = job.find('div', class_='jix-toolbar-top__company')
location_element = job.find('span', class_='jix_robotjob--area')
date_posted_element = job.find('time')
job_link_element = job.find('h4').find('a')
description_element = job.find('p')
title = title_element.get_text(strip=True) if title_element else ''
url = company_element.find('a', rel='noopener')['href'] if company_element and company_element.find('a', rel='noopener') else ''
img = f"https://www.jobindex.dk{job.find('img')['src']}" if job.find('img') else ''
company = company_element.find('a').get_text(strip=True) if company_element else ''
location = location_element.get_text(strip=True) if location_element else ''
date_posted = date_posted_element['datetime'] if date_posted_element else ''
job_link = job_link_element['href'] if job_link_element else ''
description = description_element.get_text(strip=True) if description_element else ''
print(f"Debug: title={title}, url={url}, img={img}, company={company}, location={location}, date_posted={date_posted}, job_link={job_link}, description={description}") # Detailed debug print
# Convert date_posted to datetime object
try:
date_posted_dt = datetime.strptime(date_posted, '%Y-%m-%d')
if (today - date_posted_dt).days > 3:
print("Job older than 3 days found. Stopping the scraper.")
return jobs, False # Returning jobs and False to indicate stopping
except ValueError:
continue
combined_text = f"{title} {description}".lower()
if not any(exclude_keyword in title.lower() for exclude_keyword in exclude_keywords) and any(include_keyword in combined_text for include_keyword in include_keywords):
job_id = f"{title}-{url}-{location}-{date_posted}"
if job_id not in existing_job_ids:
jobs.append({
'id': job_id,
'title': title,
'url': url,
'img': img,
'company': company,
'location': location,
'date_posted': date_posted,
'link': job_link
})
existing_job_ids.add(job_id)
print(f"Added job: {job_id}") # Debug print for each added job
return jobs, True
# Function to find the next page URL
def get_next_page_url(soup):
next_page = soup.find('a', {'aria-label': 'Næste'})
return next_page['href'] if next_page else None
# Main scraping loop
current_url = base_url
all_jobs = []
while current_url:
print(f"Fetching jobs from: {current_url}")
soup = fetch_jobs_from_page(current_url)
jobs, continue_scraping = extract_jobs(soup)
all_jobs.extend(jobs)
print(f"Collected {len(jobs)} jobs from this page.")
if not continue_scraping:
break
current_url = get_next_page_url(soup)
# Combine the existing jobs with the new jobs
all_jobs = existing_jobs + all_jobs
# Remove jobs older than 30 days from the combined list
cutoff_date = datetime.today() - timedelta(days=30)
all_jobs = [job for job in all_jobs if datetime.strptime(job['date_posted'], '%Y-%m-%d') >= cutoff_date]
# Final debug print before saving
print(f"Total jobs to be saved: {len(all_jobs)}")
print(f"Jobs to be saved: {all_jobs}") # Debug print to show jobs to be saved
# Save the new jobs to the JSON file
with open(db_file, 'w') as file:
json.dump(all_jobs, file, indent=4)
print(f"Total jobs saved: {len(all_jobs)}") # Final output after saving to file

142
scrapers/jobs_db.json Normal file
View File

@ -0,0 +1,142 @@
[
{
"id": "Software developer-https://brodersen.com/-Randers NV-2024-05-29",
"title": "Software developer",
"url": "https://brodersen.com/",
"img": "https://www.jobindex.dk/img/logo/BrodersenLogo.gif",
"company": "Brodersen A/S",
"location": "Randers NV",
"date_posted": "2024-05-29",
"link": "https://www.jobindex.dk/jobannonce/h1458379/software-developer"
},
{
"id": "Vi s\u00f8ger en erfaren softwareudvikler til et ambiti\u00f8st produktteam-https://www.ok.dk/-Viby J-2024-05-29",
"title": "Vi s\u00f8ger en erfaren softwareudvikler til et ambiti\u00f8st produktteam",
"url": "https://www.ok.dk/",
"img": "https://www.jobindex.dk/img/logo/okamba.gif",
"company": "OK a.m.b.a.",
"location": "Viby J",
"date_posted": "2024-05-29",
"link": "https://www.ok.dk/om-ok/job?hr=show-job%2f201641&linkref=204872&locale=da_DK"
},
{
"id": "Software Engineer to energy trading in Denmark or Singapore-https://www.powermart.eu/--2024-05-29",
"title": "Software Engineer to energy trading in Denmark or Singapore",
"url": "https://www.powermart.eu/",
"img": "https://www.jobindex.dk/img/logo/PMLogo-2019.jpg",
"company": "PowerMart ApS",
"location": "",
"date_posted": "2024-05-29",
"link": "https://www.jobindex.dk/img/pdf/PM_SOFTWARE_ENGINEER_20240529_LOST.pdf"
},
{
"id": "Lyngsoe Systems s\u00f8ger erfaren Softwareudvikler - Bliv en del af vores dynamiske team!-http://www.hviidoglarsen.dk/-Aars-2024-05-29",
"title": "Lyngsoe Systems s\u00f8ger erfaren Softwareudvikler - Bliv en del af vores dynamiske team!",
"url": "http://www.hviidoglarsen.dk/",
"img": "https://www.jobindex.dk/img/logo/LyngsoeSystems_logo_2018.png",
"company": "Hviid & Larsen ApS",
"location": "Aars",
"date_posted": "2024-05-29",
"link": "https://www.hviidoglarsen.dk/jobs?hr=show-job/201545&linkref=204720&locale=da_DK"
},
{
"id": "Software Test Engineer for the Combat & Self Protection team-http://www.terma.com/-S\u00f8borg-2024-05-28",
"title": "Software Test Engineer for the Combat & Self Protection team",
"url": "http://www.terma.com/",
"img": "https://www.jobindex.dk/img/logo/terma_logo2012.gif",
"company": "Terma A/S",
"location": "S\u00f8borg",
"date_posted": "2024-05-28",
"link": "https://termaas.hr-on.com/show-job/201416&locale=en_US"
},
{
"id": "Teamchef til Software Development, IT-https://www.sparnord.dk/-Aalborg-2024-05-28",
"title": "Teamchef til Software Development, IT",
"url": "https://www.sparnord.dk/",
"img": "https://www.jobindex.dk/img/logo/SparNord_2019_logo.gif",
"company": "Spar Nord Bank A/S",
"location": "Aalborg",
"date_posted": "2024-05-28",
"link": "https://candidate.hr-manager.net/ApplicationInit.aspx?cid=1148&ProjectId=145012&DepartmentId=19041&MediaId=59"
},
{
"id": "Developers, Software for Self-Protection Systems, Aeronautics-http://www.terma.com/-Lystrup-2024-05-28",
"title": "Developers, Software for Self-Protection Systems, Aeronautics",
"url": "http://www.terma.com/",
"img": "https://www.jobindex.dk/img/logo/terma_logo2012.gif",
"company": "Terma A/S",
"location": "Lystrup",
"date_posted": "2024-05-28",
"link": "https://termaas.hr-on.com/show-job/178422&locale=en_US"
},
{
"id": "Postdoc for developing software for simulating nanostructures-https://europeanspallationsource.se/-Kongens Lyngby-2024-05-28",
"title": "Postdoc for developing software for simulating nanostructures",
"url": "https://europeanspallationsource.se/",
"img": "https://www.jobindex.dk/img/brand/_top-esse-200313.png",
"company": "European Spallation Source ERIC",
"location": "Kongens Lyngby",
"date_posted": "2024-05-28",
"link": "https://europeanspallationsource.se/careers/vacancies?rmpage=job&rmjob=1687&rmlang=UK"
},
{
"id": "Software- og automatiseringsudvikler til Sundhed og Omsorg-https://aarhus.dk/-Viby J-2024-05-27",
"title": "Software- og automatiseringsudvikler til Sundhed og Omsorg",
"url": "https://aarhus.dk/",
"img": "https://www.jobindex.dk/img/brand/45_spo_top_aarhuskommune_8742_20240502.png",
"company": "Aarhus Kommune",
"location": "Viby J",
"date_posted": "2024-05-27",
"link": "https://aarhus.career.emply.com/ad/software-og-automatiseringsudvikler-til-sundhed-og-omsorg/dewsvv/da"
},
{
"id": "Ambiti\u00f8s softwareudvikler-http://www.deoriginale.dk/-Viby J-2024-05-27",
"title": "Ambiti\u00f8s softwareudvikler",
"url": "http://www.deoriginale.dk/",
"img": "https://www.jobindex.dk/img/logo/Gaming_2019.gif",
"company": "Gaming A/S",
"location": "Viby J",
"date_posted": "2024-05-27",
"link": "https://www.jobindex.dk/jobannonce/h1472652/ambitioes-softwareudvikler"
},
{
"id": "Integration Specialist med fokus p\u00e5 softwarel\u00f8sninger til dokument -og workflow-systemer-https://www.canon.dk/about_us/-Vejle, S\u00f8borg-2024-05-27",
"title": "Integration Specialist med fokus p\u00e5 softwarel\u00f8sninger til dokument -og workflow-systemer",
"url": "https://www.canon.dk/about_us/",
"img": "https://www.jobindex.dk/img/brand/_top-canon-060219.png",
"company": "Canon Danmark",
"location": "Vejle, S\u00f8borg",
"date_posted": "2024-05-27",
"link": "https://careers.peopleclick.eu.com/careerscp/client_canoneurope/external/jobDetails.do?functionName=getJobDetail&jobPostId=50920&localeCode=da"
},
{
"id": "Test Automation Engineer-https://xci.dk/?page=home-Aalborg \u00d8st-2024-05-27",
"title": "Test Automation Engineer",
"url": "https://xci.dk/?page=home",
"img": "https://www.jobindex.dk/img/brand/XCI_top20240408.png",
"company": "XCI A/S",
"location": "Aalborg \u00d8st",
"date_posted": "2024-05-27",
"link": "https://xci.teamtailor.com/jobs/4525685-test-automation-engineer?promotion=1031076-jobindex"
},
{
"id": "Nordic Solution Advisor for VMware or/and Veeam-https://dk.tdsynnex.com/-Birker\u00f8d-2024-05-27",
"title": "Nordic Solution Advisor for VMware or/and Veeam",
"url": "https://dk.tdsynnex.com/",
"img": "https://www.jobindex.dk/img/brand/TDSYNNEX_HR_Denmark_525x120px.png",
"company": "TD SYNNEX Denmark ApS",
"location": "Birker\u00f8d",
"date_posted": "2024-05-27",
"link": "https://synnex.wd5.myworkdayjobs.com/tdsynnexcareers/job/Birkerod-Denmark/Nordic-Solution-Advisor-for-VMware_R29639"
},
{
"id": "Systems Engineer / System tekniker s\u00f8ges til snarlig tiltr\u00e6delse-https://www.fisker-it.dk/-Herlev-2024-05-27",
"title": "Systems Engineer / System tekniker s\u00f8ges til snarlig tiltr\u00e6delse",
"url": "https://www.fisker-it.dk/",
"img": "https://www.jobindex.dk/img/logo/Fisker.IT_logo.png",
"company": "Fisker IT ApS",
"location": "Herlev",
"date_posted": "2024-05-27",
"link": "https://www.fisker-it.dk/job-systems-engineer"
}
]

3
scrapers/run_scraper.sh Normal file
View File

@ -0,0 +1,3 @@
#!/bin/bash
cd /app
python job_scraper.py