From 7049dcc99235500de65b53a6f6a0a359fe4066b4 Mon Sep 17 00:00:00 2001
From: ChrQR <christian@rannes.com>
Date: Wed, 29 May 2024 23:28:58 +0200
Subject: [PATCH] initial commit

---
 .gitea/workflows/build-all-prod.yaml |  50 ++++++++++
 api/Dockerfile                       |  17 ++++
 api/__pycache__/api.cpython-312.pyc  | Bin 0 -> 1128 bytes
 api/api.py                           |  20 ++++
 client/.gitignore                    |  10 ++
 client/.npmrc                        |   1 +
 client/.prettierignore               |   4 +
 client/.prettierrc                   |   8 ++
 client/Dockerfile                    |  21 ++++
 client/README.md                     |  38 +++++++
 client/eslint.config.js              |  33 +++++++
 client/package.json                  |  33 +++++++
 client/src/app.d.ts                  |  13 +++
 client/src/app.html                  |  12 +++
 client/src/lib/index.ts              |   1 +
 client/src/routes/+page.svelte       |   2 +
 client/static/favicon.png            | Bin 0 -> 1571 bytes
 client/svelte.config.js              |  18 ++++
 client/tsconfig.json                 |  19 ++++
 client/vite.config.ts                |   6 ++
 docker-compose.yaml                  |  27 +++++
 requirements.txt                     | Bin 0 -> 1052 bytes
 scrapers/Dockerfile                  |  29 ++++++
 scrapers/crontab                     |   2 +
 scrapers/jobindex.py                 | 128 ++++++++++++++++++++++++
 scrapers/jobs_db.json                | 142 +++++++++++++++++++++++++++
 scrapers/run_scraper.sh              |   3 +
 27 files changed, 637 insertions(+)
 create mode 100644 .gitea/workflows/build-all-prod.yaml
 create mode 100644 api/Dockerfile
 create mode 100644 api/__pycache__/api.cpython-312.pyc
 create mode 100644 api/api.py
 create mode 100644 client/.gitignore
 create mode 100644 client/.npmrc
 create mode 100644 client/.prettierignore
 create mode 100644 client/.prettierrc
 create mode 100644 client/Dockerfile
 create mode 100644 client/README.md
 create mode 100644 client/eslint.config.js
 create mode 100644 client/package.json
 create mode 100644 client/src/app.d.ts
 create mode 100644 client/src/app.html
 create mode 100644 client/src/lib/index.ts
 create mode 100644 client/src/routes/+page.svelte
 create mode 100644 client/static/favicon.png
 create mode 100644 client/svelte.config.js
 create mode 100644 client/tsconfig.json
 create mode 100644 client/vite.config.ts
 create mode 100644 docker-compose.yaml
 create mode 100644 requirements.txt
 create mode 100644 scrapers/Dockerfile
 create mode 100644 scrapers/crontab
 create mode 100644 scrapers/jobindex.py
 create mode 100644 scrapers/jobs_db.json
 create mode 100644 scrapers/run_scraper.sh

diff --git a/.gitea/workflows/build-all-prod.yaml b/.gitea/workflows/build-all-prod.yaml
new file mode 100644
index 0000000..f57d460
--- /dev/null
+++ b/.gitea/workflows/build-all-prod.yaml
@@ -0,0 +1,50 @@
+name: Build and Push Docker Images
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    services:
+      docker:
+        image: docker:19.03.12
+        options: --privileged
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Gitea
+        uses: docker/login-action@v3
+        with:
+          registry: gitea.rannes.dev
+          username: christian
+          password: ${{ secrets.REGISTRY_TOKEN }}
+
+      - name: Build and push Flask API image
+        uses: docker/build-push-action@v5
+        with:
+          context: ./api
+          push: true
+          tags: gitea.rannes.dev/rannes.dev/sw-jobs-api:latest
+
+      - name: Build and push Svelte client image
+        uses: docker/build-push-action@v5
+        with:
+          context: ./client
+          push: true
+          tags: gitea.rannes.dev/rannes.dev/sw-jobs-client:latest
+
+      - name: Build and push scraper image
+        uses: docker/build-push-action@v5
+        with:
+          context: ./scrapers
+          push: true
+          tags: gitea.rannes.dev/rannes.dev/sw-jobs-scraper:latest
diff --git a/api/Dockerfile b/api/Dockerfile
new file mode 100644
index 0000000..8978f26
--- /dev/null
+++ b/api/Dockerfile
@@ -0,0 +1,17 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the current directory contents into the container at /app
+COPY . /app
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Make port 5000 available to the world outside this container
+EXPOSE 5000
+
+# Run gunicorn server
+CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
diff --git a/api/__pycache__/api.cpython-312.pyc b/api/__pycache__/api.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1078a85801be71940c9bf600f49424ce0e60b1ff
GIT binary patch
literal 1128
zcmZuwO-vI(6rS0g?Y6X4enhMXAi)Gm2t<VgDj4EVjR#_a#?Um`Y}sk)(srAfZAu%!
zHk!1=0~j%I?A5^0vx#RfL^Lr2i5k6ltHFeXlQWcs#_%@t=6k<0^WNK!et#3-wDaa^
z>^%hFrw|-M$6$Y(gLPm4113QVhG9wyOHjyCQVz>UI|?HSV2G7Xo<&gDg^_u#jvKkz
z(kRujAsc8@s^hQ+yUg}8d=$eKhg+)SXSiR0&11K)7b}Kmt`7jx#-jsgL^o}Dotr_B
zm-153_glCHC8pd@>U55&CbiOuSbmbZ=?u{`n9zdHw4ziqqJzR#@C@mh#E?)iIdt!F
z5HdA|?ODs9GmR&@xFGYj4|plofqh~#kDqv=;J{QGr8>4nh9h#WOh#>wqdT=4&?&|U
zSOm7$@i2W3I^7q5lWP(3hVjpdg}^@V_$@%kYYN1{B0SsD1FixJb9@A*`4b!CpEbnK
z%hMnSgVH2}B<N<6MH$NI_AGPZ`2@8oQ;nz=OC&KvRtBdT5;bNhY3T+LlgB%Q>Sj@K
zDr(o=eK>OGM&t?Qy@|}sk_3%BB9@6~Y#O0f%+BcqKeVD#_4Y-0x5OIE<e58;ZB67M
z7kC~q^3>q02!5%aa;}ssFO`<o;-A&dJ%7tu@5c2Ve^(7aPfvAtudQQ!^wsFb<JY$<
zcMoK)`|Ae2w$<U6W83IbO$N=a6}0DVJysle{_{AbqA%|*PV~zk`jv?>mAN%7r6<yw
zCMHNRa5Nha`k4|FQ%J<%96g_irVw#c5s=nXSksuSXEH?O<JGbfY2p`9Ay(GLM-O8%
z$)?}($>T)YI6HLTN?ArWiN{G77sbCx=Qyj$5W;Vudlz_jf#-+ITY2!@D#|q%@cLJJ
zmwQ(RmIqb~)$yICYen@tMCCv!P`O!cud2ns4($6QxnN*l22!vlP^}ptU#0L#4tzyw
WIaCU*UMYsYpblOgbeVMW^Zo%Hkna8f

literal 0
HcmV?d00001

diff --git a/api/api.py b/api/api.py
new file mode 100644
index 0000000..ede00ec
--- /dev/null
+++ b/api/api.py
@@ -0,0 +1,20 @@
+from flask import Flask, jsonify
+import json
+import os
+
+app = Flask(__name__)
+
+# Path to your JSON file
+db_file = os.path.join('scrapers', 'jobs_db.json')
+
+@app.route('/api/jobs', methods=['GET'])
+def get_jobs():
+    if os.path.exists(db_file):
+        with open(db_file, 'r') as file:
+            jobs = json.load(file)
+    else:
+        jobs = []
+    return jsonify(jobs)
+
+if __name__ == '__main__':
+    app.run(debug=False, host='0.0.0.0')
diff --git a/client/.gitignore b/client/.gitignore
new file mode 100644
index 0000000..6635cf5
--- /dev/null
+++ b/client/.gitignore
@@ -0,0 +1,10 @@
+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
diff --git a/client/.npmrc b/client/.npmrc
new file mode 100644
index 0000000..b6f27f1
--- /dev/null
+++ b/client/.npmrc
@@ -0,0 +1 @@
+engine-strict=true
diff --git a/client/.prettierignore b/client/.prettierignore
new file mode 100644
index 0000000..cc41cea
--- /dev/null
+++ b/client/.prettierignore
@@ -0,0 +1,4 @@
+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock
diff --git a/client/.prettierrc b/client/.prettierrc
new file mode 100644
index 0000000..9573023
--- /dev/null
+++ b/client/.prettierrc
@@ -0,0 +1,8 @@
+{
+	"useTabs": true,
+	"singleQuote": true,
+	"trailingComma": "none",
+	"printWidth": 100,
+	"plugins": ["prettier-plugin-svelte"],
+	"overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }]
+}
diff --git a/client/Dockerfile b/client/Dockerfile
new file mode 100644
index 0000000..e7e0705
--- /dev/null
+++ b/client/Dockerfile
@@ -0,0 +1,21 @@
+# Use an official node runtime as a parent image
+FROM node:20
+
+# Set the working directory to /client
+WORKDIR /client
+
+# Copy package.json and package-lock.json to /client
+COPY package*.json ./
+
+# Install any needed packages specified in package.json
+RUN npm install
+
+# Copy the current directory contents into the container at /client
+COPY . .
+
+# Build the app
+RUN npm run build
+
+# Serve the app
+RUN npm install -g serve
+CMD ["serve", "-s", "public", "-l", "3000"]
diff --git a/client/README.md b/client/README.md
new file mode 100644
index 0000000..5ce6766
--- /dev/null
+++ b/client/README.md
@@ -0,0 +1,38 @@
+# create-svelte
+
+Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/main/packages/create-svelte).
+
+## Creating a project
+
+If you're seeing this, you've probably already done this step. Congrats!
+
+```bash
+# create a new project in the current directory
+npm create svelte@latest
+
+# create a new project in my-app
+npm create svelte@latest my-app
+```
+
+## Developing
+
+Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
+
+```bash
+npm run dev
+
+# or start the server and open the app in a new browser tab
+npm run dev -- --open
+```
+
+## Building
+
+To create a production version of your app:
+
+```bash
+npm run build
+```
+
+You can preview the production build with `npm run preview`.
+
+> To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.
diff --git a/client/eslint.config.js b/client/eslint.config.js
new file mode 100644
index 0000000..a351fa9
--- /dev/null
+++ b/client/eslint.config.js
@@ -0,0 +1,33 @@
+import js from '@eslint/js';
+import ts from 'typescript-eslint';
+import svelte from 'eslint-plugin-svelte';
+import prettier from 'eslint-config-prettier';
+import globals from 'globals';
+
+/** @type {import('eslint').Linter.FlatConfig[]} */
+export default [
+	js.configs.recommended,
+	...ts.configs.recommended,
+	...svelte.configs['flat/recommended'],
+	prettier,
+	...svelte.configs['flat/prettier'],
+	{
+		languageOptions: {
+			globals: {
+				...globals.browser,
+				...globals.node
+			}
+		}
+	},
+	{
+		files: ['**/*.svelte'],
+		languageOptions: {
+			parserOptions: {
+				parser: ts.parser
+			}
+		}
+	},
+	{
+		ignores: ['build/', '.svelte-kit/', 'dist/']
+	}
+];
diff --git a/client/package.json b/client/package.json
new file mode 100644
index 0000000..bac6219
--- /dev/null
+++ b/client/package.json
@@ -0,0 +1,33 @@
+{
+	"name": "client",
+	"version": "0.0.1",
+	"private": true,
+	"scripts": {
+		"dev": "vite dev",
+		"build": "vite build",
+		"preview": "vite preview",
+		"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
+		"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
+		"lint": "prettier --check . && eslint .",
+		"format": "prettier --write ."
+	},
+	"devDependencies": {
+		"@sveltejs/adapter-auto": "^3.0.0",
+		"@sveltejs/kit": "^2.0.0",
+		"@sveltejs/vite-plugin-svelte": "^3.0.0",
+		"@types/eslint": "^8.56.7",
+		"eslint": "^9.0.0",
+		"eslint-config-prettier": "^9.1.0",
+		"eslint-plugin-svelte": "^2.36.0",
+		"globals": "^15.0.0",
+		"prettier": "^3.1.1",
+		"prettier-plugin-svelte": "^3.1.2",
+		"svelte": "^5.0.0-next.1",
+		"svelte-check": "^3.6.0",
+		"tslib": "^2.4.1",
+		"typescript": "^5.0.0",
+		"typescript-eslint": "^8.0.0-alpha.20",
+		"vite": "^5.0.3"
+	},
+	"type": "module"
+}
diff --git a/client/src/app.d.ts b/client/src/app.d.ts
new file mode 100644
index 0000000..743f07b
--- /dev/null
+++ b/client/src/app.d.ts
@@ -0,0 +1,13 @@
+// See https://kit.svelte.dev/docs/types#app
+// for information about these interfaces
+declare global {
+	namespace App {
+		// interface Error {}
+		// interface Locals {}
+		// interface PageData {}
+		// interface PageState {}
+		// interface Platform {}
+	}
+}
+
+export {};
diff --git a/client/src/app.html b/client/src/app.html
new file mode 100644
index 0000000..77a5ff5
--- /dev/null
+++ b/client/src/app.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+	<head>
+		<meta charset="utf-8" />
+		<link rel="icon" href="%sveltekit.assets%/favicon.png" />
+		<meta name="viewport" content="width=device-width, initial-scale=1" />
+		%sveltekit.head%
+	</head>
+	<body data-sveltekit-preload-data="hover">
+		<div style="display: contents">%sveltekit.body%</div>
+	</body>
+</html>
diff --git a/client/src/lib/index.ts b/client/src/lib/index.ts
new file mode 100644
index 0000000..856f2b6
--- /dev/null
+++ b/client/src/lib/index.ts
@@ -0,0 +1 @@
+// place files you want to import through the `$lib` alias in this folder.
diff --git a/client/src/routes/+page.svelte b/client/src/routes/+page.svelte
new file mode 100644
index 0000000..5982b0a
--- /dev/null
+++ b/client/src/routes/+page.svelte
@@ -0,0 +1,2 @@
+<h1>Welcome to SvelteKit</h1>
+<p>Visit <a href="https://kit.svelte.dev">kit.svelte.dev</a> to read the documentation</p>
diff --git a/client/static/favicon.png b/client/static/favicon.png
new file mode 100644
index 0000000000000000000000000000000000000000..825b9e65af7c104cfb07089bb28659393b4f2097
GIT binary patch
literal 1571
zcmV+;2Hg3HP)<h;3K|Lk000e1NJLTq004jh004jp1ONa4X*a1r00001b5ch_0Itp)
z=>Px)-AP12RCwC$UE6KzI1p6{F2N<Fgp}YCTtZ542`(WexCEDw|A=A)1A-t30wEX>
z1VK2vi|pOpn{~#djwYcWXTI_im_u^TJgMZ4JMOsSj!0ma>B?-(Hr@X&W@|R-$}W@Z
zg<YDbY-gN-C@$NXr>j#$x=!~7LGqHW?IO8+*oE1MyDp!G=L<gz=E*n%PA*mnuaD-%
zU>0#^lUx?;!fXv@l^6SvTnf^ac{5OurzC#ZMYc20lI%HhX816AYVs1T3heS1*WaWH
z%;x>)-J}YB5#CLzU@GBR6sXYrD>Vw(Fmt#|JP;+}<#6b63Ike{Fuo!?M{yEffez;|
zp!PfsuaC)>h>-AdbnwN13g*1LowNjT5?+lFVd#9$!8Z9HA|$*6dQ8EHLu}U|obW6f
z2%uGv?vr=KNq7YYa2Roj;|zooo<)lf=&2yxM@e`kM$CmCR#x>gI>I|*Ubr({5Y^rb
zghxQU22N}F51}^yfDSt7<V*0Nvi!iKPY@n`1~$se=LwHTOE7`*@+};uU_gT$1{Nib
z9&~5~nc4J(EWez%_f0Ty7T;wB{7s*oNO=9p8(o}N*_V>86oMTc!W&V;d?76)9KXX1
z+6Okem(d}YXmmOiZq$!IPk5t8nnS{%?+vDFz3BevmFN<oPlV?qzG@=jl`t@|E8zV8
zJ4}It4e%+dFPWw16u{sAX42jlFu{_e!AKZE75iV2&Q7>gpIod~R{>@#@5x9zJK<vg
zI0+@80NMa{gtNoRhsR-w%piFI3d-5xrN)R}H!WtK=Gp%dC5(a`Q0V4_vS-mj<((Z~
zbV*PucDY#zFVglY>EHLHv!gHeK~n)Ld!M8DB|Kfe%~123&Hz1Z(86nU7*G5chmyDe
ziV7$pB7pJ=96hpxHv9rCR29%bLOXlKU<_13_M8x)6;P8E1Kz6G<&P?$P^%c!M5`2`
zfY2zg;VK5~^>TJGQzc+33-n~gKt{{of8GzUkWmU110IgI0DLxRIM>0US|TsM=L|@F
z0Bun8U!cRB7-2apz=y-7*UxOxz@Z0)@QM)9wSGki1AZ38ceG7Q72z5`i;i=J`ILzL
z@iUO?SBBG-0cQuo+an4TsL<j(YStU%^TNkjEqg808>y-g-x;8P4UVwk|D8{W@U1Zi
z!M)+jqy@nQ$p?5tsHp-6J304Q={v-B>66$P0IDx&YT(`IcZ~bZfmn11#rXd7<5s}y
zBi9eim&zQc0Dk|2>$bs0PnLmDfMP5lcXRY&cvJ=zKxI^f0%-d$tD!`LBf9^jMSYUA
zI8U?CWdY@}cRq6{5~y+<zzg4N+!Mudx;=&$zSp?cjRmyF-+404b-P66DA17$<$H}=
z7$P4)QXt>)#h1!*-HcGW@+gZ4B};0OnC~`xQOyH19z*TA!!<qA9l$G~6DRxiCy7Vo
zOl^oK!S3XbayWz&p33NSw$lxR1fm)O*rCE0^ZNmk0wnax!!?%g599+O37!666~F(y
z5fq?5T*Fm{^%YR^PiM%z#%x`fAC+;C&`X5JXN_40SieIEXoaUU=*}=c0OC7@a*rFE
z3xr3yyFB~zRl(lNY&B@$Fia%81M!xeIuTYNz!Dz6eBKONjL<@dJdT$H?ShKl6$p=F
ze!fdgzelJI&n`Hk9f}>BJ%9s0V3F?CAJ{hTd#*tf+ur-W9MOURF-@B77_-OshsY}6
zOXRY=5%C^*26z?l)1=$bz30!so5tfABdSYzO+H=CpV~aaUefmjvfZ3Ttu9W&W3Iu6
zROlh0MFA5h;my}8lB0tAV-Rvc2Zs_CCSJnx@d`<ao$!3dCkTn3@aF0ny6VrToO6qA
z-~&2=w&2nT&o5u>**$idgy-iMob4dJWWw|21b4NB=LfsYp0Aeh{Ov)yztQi;eL4y5
zMi>8^SzKqk8~k?UiQK^^-5d8c%bV?$F8%X~czyiaKCI2=UH<v&YL5FIcieFm_zQ2O
VHuiSbmk|H}002ovPDHLkV1f%e^4kCa

literal 0
HcmV?d00001

diff --git a/client/svelte.config.js b/client/svelte.config.js
new file mode 100644
index 0000000..4a82086
--- /dev/null
+++ b/client/svelte.config.js
@@ -0,0 +1,18 @@
+import adapter from '@sveltejs/adapter-auto';
+import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
+
+/** @type {import('@sveltejs/kit').Config} */
+const config = {
+	// Consult https://kit.svelte.dev/docs/integrations#preprocessors
+	// for more information about preprocessors
+	preprocess: vitePreprocess(),
+
+	kit: {
+		// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
+		// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
+		// See https://kit.svelte.dev/docs/adapters for more information about adapters.
+		adapter: adapter()
+	}
+};
+
+export default config;
diff --git a/client/tsconfig.json b/client/tsconfig.json
new file mode 100644
index 0000000..fc93cbd
--- /dev/null
+++ b/client/tsconfig.json
@@ -0,0 +1,19 @@
+{
+	"extends": "./.svelte-kit/tsconfig.json",
+	"compilerOptions": {
+		"allowJs": true,
+		"checkJs": true,
+		"esModuleInterop": true,
+		"forceConsistentCasingInFileNames": true,
+		"resolveJsonModule": true,
+		"skipLibCheck": true,
+		"sourceMap": true,
+		"strict": true,
+		"moduleResolution": "bundler"
+	}
+	// Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
+	// except $lib which is handled by https://kit.svelte.dev/docs/configuration#files
+	//
+	// If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
+	// from the referenced tsconfig.json - TypeScript does not merge them in
+}
diff --git a/client/vite.config.ts b/client/vite.config.ts
new file mode 100644
index 0000000..bbf8c7d
--- /dev/null
+++ b/client/vite.config.ts
@@ -0,0 +1,6 @@
+import { sveltekit } from '@sveltejs/kit/vite';
+import { defineConfig } from 'vite';
+
+export default defineConfig({
+	plugins: [sveltekit()]
+});
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 0000000..bc949cf
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,27 @@
+version: "3.8"
+
+services:
+  api:
+    build:
+      context: ./api
+      dockerfile: Dockerfile
+    ports:
+      - "5000:5000"
+    volumes:
+      - ./scrapers:/app/scrapers
+    environment:
+      - FLASK_ENV=production
+
+  client:
+    build:
+      context: ./client
+      dockerfile: Dockerfile
+    ports:
+      - "3000:3000"
+
+  scraper:
+    build:
+      context: ./scrapers
+      dockerfile: Dockerfile
+    volumes:
+      - ./scrapers:/app
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ef05e30f83ab5adf87ac2992d83cd96c636a1e9
GIT binary patch
literal 1052
zcma)5%T9zq5bU#w|G*EdfmwWffd>;46B9iTD~q@*AczM)Uajic2{S7gLk0$@>Z&f9
z@2@+tvXLFmm6Vb!91nQc%OJJ1(#WZt$m$<Q@(V*|fm&8Zc}KM^u6xvNaqT1z)N?tL
zObpIz)hmFjp}kWlOFlE;uds}AHi~sRbx=EI8%cqu9@RYa86Mej37;$Iwzzx6GrIUZ
zcvu6^<vD!EEaUp|9o^C8<-uW#imYw5dW`jVDe&}np`2?lES&wVLto^%^eW`rs5@#k
z3l5DYKvt?swLAf97tr1vm0UM9vKqj#U~!sy{jS3i-=*v|ZuG0ZfMbquzCN$iU~0Mg
zx&^vN|D2h7?6=Sg(QZ#2a?X|00q5tOb5zYR5_n+FuKNzox>;1H6*7AGc!R4FDKLo;
zH|^a_yEFd>KmGi>O8J=Wh0c5@BYNNK92UrBsvGZ!=gPGEM^t5EoZNAtcbD=sGd~%8
Bb*%sZ

literal 0
HcmV?d00001

diff --git a/scrapers/Dockerfile b/scrapers/Dockerfile
new file mode 100644
index 0000000..580a0a0
--- /dev/null
+++ b/scrapers/Dockerfile
@@ -0,0 +1,29 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the current directory contents into the container at /app
+COPY . /app
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the crontab file to the cron.d directory
+COPY crontab /etc/cron.d/scraper-cron
+
+# Give execution rights on the cron job
+RUN chmod 0644 /etc/cron.d/scraper-cron
+
+# Create the log file to be able to run tail
+RUN touch /var/log/cron.log
+
+# Copy the script to run the scraper
+COPY run_scraper.sh /usr/local/bin/run_scraper.sh
+
+# Grant execution rights to the script
+RUN chmod +x /usr/local/bin/run_scraper.sh
+
+# Run the command on container startup
+CMD ["cron", "-f"]
diff --git a/scrapers/crontab b/scrapers/crontab
new file mode 100644
index 0000000..a12aa0f
--- /dev/null
+++ b/scrapers/crontab
@@ -0,0 +1,2 @@
+# Run the scraper every hour
+0 * * * * root /usr/local/bin/run_scraper.sh >> /var/log/cron.log 2>&1
diff --git a/scrapers/jobindex.py b/scrapers/jobindex.py
new file mode 100644
index 0000000..9e510fd
--- /dev/null
+++ b/scrapers/jobindex.py
@@ -0,0 +1,128 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import os
+from datetime import datetime, timedelta
+
+# Base URL of the job listings page for software development jobs
+base_url = 'https://www.jobindex.dk/jobsoegning/it/'
+
+# Keywords to include in the job listings
+include_keywords = ['software', 'nextjs', 'svelte']
+
+# Keywords to exclude from the job titles
+exclude_keywords = ['senior']
+
+# File path for the JSON database
+db_file = 'jobs_db.json'
+
+# Load existing jobs from the JSON file if it exists
+if os.path.exists(db_file):
+    try:
+        with open(db_file, 'r') as file:
+            existing_jobs = json.load(file)
+            print(f"Loaded {len(existing_jobs)} existing jobs from {db_file}")
+    except json.JSONDecodeError:
+        print("Error reading JSON file. Starting with an empty list.")
+        existing_jobs = []
+else:
+    print("JSON file not found. Starting with an empty list.")
+    existing_jobs = []
+
+# Convert existing jobs to a set of IDs for duplicate checking
+existing_job_ids = set(job['id'] for job in existing_jobs)
+
+# Function to fetch and parse a page
+def fetch_jobs_from_page(url):
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise Exception(f"Failed to load page {url}")
+    return BeautifulSoup(response.content, 'html.parser')
+
+# Function to extract jobs from the parsed HTML
+def extract_jobs(soup):
+    job_listings = soup.find_all('div', class_='jobsearch-result')
+    jobs = []
+    today = datetime.today()
+    for job in job_listings:
+        title_element = job.find('h4').find('a')
+        company_element = job.find('div', class_='jix-toolbar-top__company')
+        location_element = job.find('span', class_='jix_robotjob--area')
+        date_posted_element = job.find('time')
+        job_link_element = job.find('h4').find('a')
+        description_element = job.find('p')
+
+        title = title_element.get_text(strip=True) if title_element else ''
+        url = company_element.find('a', rel='noopener')['href'] if company_element and company_element.find('a', rel='noopener') else ''
+        img = f"https://www.jobindex.dk{job.find('img')['src']}" if job.find('img') else ''
+        company = company_element.find('a').get_text(strip=True) if company_element else ''
+        location = location_element.get_text(strip=True) if location_element else ''
+        date_posted = date_posted_element['datetime'] if date_posted_element else ''
+        job_link = job_link_element['href'] if job_link_element else ''
+        description = description_element.get_text(strip=True) if description_element else ''
+
+        print(f"Debug: title={title}, url={url}, img={img}, company={company}, location={location}, date_posted={date_posted}, job_link={job_link}, description={description}")  # Detailed debug print
+
+        # Convert date_posted to datetime object
+        try:
+            date_posted_dt = datetime.strptime(date_posted, '%Y-%m-%d')
+            if (today - date_posted_dt).days > 3:
+                print("Job older than 3 days found. Stopping the scraper.")
+                return jobs, False  # Returning jobs and False to indicate stopping
+        except ValueError:
+            continue
+
+        combined_text = f"{title} {description}".lower()
+        if not any(exclude_keyword in title.lower() for exclude_keyword in exclude_keywords) and any(include_keyword in combined_text for include_keyword in include_keywords):
+            job_id = f"{title}-{url}-{location}-{date_posted}"
+            if job_id not in existing_job_ids:
+                jobs.append({
+                    'id': job_id,
+                    'title': title,
+                    'url': url,
+                    'img': img,
+                    'company': company,
+                    'location': location,
+                    'date_posted': date_posted,
+                    'link': job_link
+                })
+                existing_job_ids.add(job_id)
+                print(f"Added job: {job_id}")  # Debug print for each added job
+
+    return jobs, True
+
+# Function to find the next page URL
+def get_next_page_url(soup):
+    next_page = soup.find('a', {'aria-label': 'Næste'})
+    return next_page['href'] if next_page else None
+
+# Main scraping loop
+current_url = base_url
+all_jobs = []
+
+while current_url:
+    print(f"Fetching jobs from: {current_url}")
+    soup = fetch_jobs_from_page(current_url)
+    jobs, continue_scraping = extract_jobs(soup)
+    all_jobs.extend(jobs)
+    print(f"Collected {len(jobs)} jobs from this page.")
+    if not continue_scraping:
+        break
+    current_url = get_next_page_url(soup)
+
+# Combine the existing jobs with the new jobs
+all_jobs = existing_jobs + all_jobs
+
+# Remove jobs older than 30 days from the combined list
+cutoff_date = datetime.today() - timedelta(days=30)
+all_jobs = [job for job in all_jobs if datetime.strptime(job['date_posted'], '%Y-%m-%d') >= cutoff_date]
+
+# Final debug print before saving
+print(f"Total jobs to be saved: {len(all_jobs)}")
+print(f"Jobs to be saved: {all_jobs}")  # Debug print to show jobs to be saved
+
+# Save the new jobs to the JSON file
+with open(db_file, 'w') as file:
+    json.dump(all_jobs, file, indent=4)
+
+print(f"Total jobs saved: {len(all_jobs)}")  # Final output after saving to file
diff --git a/scrapers/jobs_db.json b/scrapers/jobs_db.json
new file mode 100644
index 0000000..55f8d4d
--- /dev/null
+++ b/scrapers/jobs_db.json
@@ -0,0 +1,142 @@
+[
+  {
+    "id": "Software developer-https://brodersen.com/-Randers NV-2024-05-29",
+    "title": "Software developer",
+    "url": "https://brodersen.com/",
+    "img": "https://www.jobindex.dk/img/logo/BrodersenLogo.gif",
+    "company": "Brodersen A/S",
+    "location": "Randers NV",
+    "date_posted": "2024-05-29",
+    "link": "https://www.jobindex.dk/jobannonce/h1458379/software-developer"
+  },
+  {
+    "id": "Vi s\u00f8ger en erfaren softwareudvikler til et ambiti\u00f8st produktteam-https://www.ok.dk/-Viby J-2024-05-29",
+    "title": "Vi s\u00f8ger en erfaren softwareudvikler til et ambiti\u00f8st produktteam",
+    "url": "https://www.ok.dk/",
+    "img": "https://www.jobindex.dk/img/logo/okamba.gif",
+    "company": "OK a.m.b.a.",
+    "location": "Viby J",
+    "date_posted": "2024-05-29",
+    "link": "https://www.ok.dk/om-ok/job?hr=show-job%2f201641&linkref=204872&locale=da_DK"
+  },
+  {
+    "id": "Software Engineer  to energy trading in Denmark  or Singapore-https://www.powermart.eu/--2024-05-29",
+    "title": "Software Engineer  to energy trading in Denmark  or Singapore",
+    "url": "https://www.powermart.eu/",
+    "img": "https://www.jobindex.dk/img/logo/PMLogo-2019.jpg",
+    "company": "PowerMart ApS",
+    "location": "",
+    "date_posted": "2024-05-29",
+    "link": "https://www.jobindex.dk/img/pdf/PM_SOFTWARE_ENGINEER_20240529_LOST.pdf"
+  },
+  {
+    "id": "Lyngsoe Systems s\u00f8ger erfaren Softwareudvikler - Bliv en del af vores dynamiske team!-http://www.hviidoglarsen.dk/-Aars-2024-05-29",
+    "title": "Lyngsoe Systems s\u00f8ger erfaren Softwareudvikler - Bliv en del af vores dynamiske team!",
+    "url": "http://www.hviidoglarsen.dk/",
+    "img": "https://www.jobindex.dk/img/logo/LyngsoeSystems_logo_2018.png",
+    "company": "Hviid & Larsen ApS",
+    "location": "Aars",
+    "date_posted": "2024-05-29",
+    "link": "https://www.hviidoglarsen.dk/jobs?hr=show-job/201545&linkref=204720&locale=da_DK"
+  },
+  {
+    "id": "Software Test Engineer for the Combat & Self Protection team-http://www.terma.com/-S\u00f8borg-2024-05-28",
+    "title": "Software Test Engineer for the Combat & Self Protection team",
+    "url": "http://www.terma.com/",
+    "img": "https://www.jobindex.dk/img/logo/terma_logo2012.gif",
+    "company": "Terma A/S",
+    "location": "S\u00f8borg",
+    "date_posted": "2024-05-28",
+    "link": "https://termaas.hr-on.com/show-job/201416&locale=en_US"
+  },
+  {
+    "id": "Teamchef til Software Development, IT-https://www.sparnord.dk/-Aalborg-2024-05-28",
+    "title": "Teamchef til Software Development, IT",
+    "url": "https://www.sparnord.dk/",
+    "img": "https://www.jobindex.dk/img/logo/SparNord_2019_logo.gif",
+    "company": "Spar Nord Bank A/S",
+    "location": "Aalborg",
+    "date_posted": "2024-05-28",
+    "link": "https://candidate.hr-manager.net/ApplicationInit.aspx?cid=1148&ProjectId=145012&DepartmentId=19041&MediaId=59"
+  },
+  {
+    "id": "Developers, Software for Self-Protection Systems, Aeronautics-http://www.terma.com/-Lystrup-2024-05-28",
+    "title": "Developers, Software for Self-Protection Systems, Aeronautics",
+    "url": "http://www.terma.com/",
+    "img": "https://www.jobindex.dk/img/logo/terma_logo2012.gif",
+    "company": "Terma A/S",
+    "location": "Lystrup",
+    "date_posted": "2024-05-28",
+    "link": "https://termaas.hr-on.com/show-job/178422&locale=en_US"
+  },
+  {
+    "id": "Postdoc for developing software for simulating nanostructures-https://europeanspallationsource.se/-Kongens Lyngby-2024-05-28",
+    "title": "Postdoc for developing software for simulating nanostructures",
+    "url": "https://europeanspallationsource.se/",
+    "img": "https://www.jobindex.dk/img/brand/_top-esse-200313.png",
+    "company": "European Spallation Source ERIC",
+    "location": "Kongens Lyngby",
+    "date_posted": "2024-05-28",
+    "link": "https://europeanspallationsource.se/careers/vacancies?rmpage=job&rmjob=1687&rmlang=UK"
+  },
+  {
+    "id": "Software- og automatiseringsudvikler til Sundhed og Omsorg-https://aarhus.dk/-Viby J-2024-05-27",
+    "title": "Software- og automatiseringsudvikler til Sundhed og Omsorg",
+    "url": "https://aarhus.dk/",
+    "img": "https://www.jobindex.dk/img/brand/45_spo_top_aarhuskommune_8742_20240502.png",
+    "company": "Aarhus Kommune",
+    "location": "Viby J",
+    "date_posted": "2024-05-27",
+    "link": "https://aarhus.career.emply.com/ad/software-og-automatiseringsudvikler-til-sundhed-og-omsorg/dewsvv/da"
+  },
+  {
+    "id": "Ambiti\u00f8s softwareudvikler-http://www.deoriginale.dk/-Viby J-2024-05-27",
+    "title": "Ambiti\u00f8s softwareudvikler",
+    "url": "http://www.deoriginale.dk/",
+    "img": "https://www.jobindex.dk/img/logo/Gaming_2019.gif",
+    "company": "Gaming A/S",
+    "location": "Viby J",
+    "date_posted": "2024-05-27",
+    "link": "https://www.jobindex.dk/jobannonce/h1472652/ambitioes-softwareudvikler"
+  },
+  {
+    "id": "Integration Specialist med fokus p\u00e5 softwarel\u00f8sninger til dokument -og workflow-systemer-https://www.canon.dk/about_us/-Vejle, S\u00f8borg-2024-05-27",
+    "title": "Integration Specialist med fokus p\u00e5 softwarel\u00f8sninger til dokument -og workflow-systemer",
+    "url": "https://www.canon.dk/about_us/",
+    "img": "https://www.jobindex.dk/img/brand/_top-canon-060219.png",
+    "company": "Canon Danmark",
+    "location": "Vejle, S\u00f8borg",
+    "date_posted": "2024-05-27",
+    "link": "https://careers.peopleclick.eu.com/careerscp/client_canoneurope/external/jobDetails.do?functionName=getJobDetail&jobPostId=50920&localeCode=da"
+  },
+  {
+    "id": "Test Automation Engineer-https://xci.dk/?page=home-Aalborg \u00d8st-2024-05-27",
+    "title": "Test Automation Engineer",
+    "url": "https://xci.dk/?page=home",
+    "img": "https://www.jobindex.dk/img/brand/XCI_top20240408.png",
+    "company": "XCI A/S",
+    "location": "Aalborg \u00d8st",
+    "date_posted": "2024-05-27",
+    "link": "https://xci.teamtailor.com/jobs/4525685-test-automation-engineer?promotion=1031076-jobindex"
+  },
+  {
+    "id": "Nordic Solution Advisor for VMware or/and Veeam-https://dk.tdsynnex.com/-Birker\u00f8d-2024-05-27",
+    "title": "Nordic Solution Advisor for VMware or/and Veeam",
+    "url": "https://dk.tdsynnex.com/",
+    "img": "https://www.jobindex.dk/img/brand/TDSYNNEX_HR_Denmark_525x120px.png",
+    "company": "TD SYNNEX Denmark ApS",
+    "location": "Birker\u00f8d",
+    "date_posted": "2024-05-27",
+    "link": "https://synnex.wd5.myworkdayjobs.com/tdsynnexcareers/job/Birkerod-Denmark/Nordic-Solution-Advisor-for-VMware_R29639"
+  },
+  {
+    "id": "Systems Engineer / System tekniker s\u00f8ges til snarlig tiltr\u00e6delse-https://www.fisker-it.dk/-Herlev-2024-05-27",
+    "title": "Systems Engineer / System tekniker s\u00f8ges til snarlig tiltr\u00e6delse",
+    "url": "https://www.fisker-it.dk/",
+    "img": "https://www.jobindex.dk/img/logo/Fisker.IT_logo.png",
+    "company": "Fisker IT ApS",
+    "location": "Herlev",
+    "date_posted": "2024-05-27",
+    "link": "https://www.fisker-it.dk/job-systems-engineer"
+  }
+]
diff --git a/scrapers/run_scraper.sh b/scrapers/run_scraper.sh
new file mode 100644
index 0000000..b0c6fe9
--- /dev/null
+++ b/scrapers/run_scraper.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+cd /app
+python job_scraper.py