diff --git a/scrapers/Dockerfile b/scrapers/Dockerfile index c38f725..2907bb8 100644 --- a/scrapers/Dockerfile +++ b/scrapers/Dockerfile @@ -1,36 +1,26 @@ -# Use an official Python runtime as a parent image -FROM python:slim +# Use the official Python image from the Docker Hub +FROM python:3.9-slim -# Set the working directory in the container +# Set the working directory inside the container WORKDIR /app -# Ensure cron is installed -RUN apt-get update && apt-get install -y cron && rm -rf /var/lib/apt/lists/* - -# Install any needed packages specified in requirements.txt -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - # Copy the current directory contents into the container at /app COPY . /app -# Copy the crontab file to the cron.d directory -COPY crontab /etc/cron.d/scraper-cron +# Install the required Python packages +RUN pip install requests beautifulsoup4 + +# Copy and add a cron job to run the script every hour +COPY cronjob /etc/cron.d/jobindex-cron # Give execution rights on the cron job -RUN chmod 0644 /etc/cron.d/scraper-cron +RUN chmod 0644 /etc/cron.d/jobindex-cron # Apply cron job -RUN crontab /etc/cron.d/scraper-cron +RUN crontab /etc/cron.d/jobindex-cron # Create the log file to be able to run tail RUN touch /var/log/cron.log -# Copy the script to run the scraper -COPY run_scraper.sh /usr/local/bin/run_scraper.sh - -# Grant execution rights to the script -RUN chmod +x /usr/local/bin/run_scraper.sh - # Run the command on container startup -CMD ["cron", "-f"] +CMD cron && tail -f /var/log/cron.log \ No newline at end of file diff --git a/scrapers/crontab b/scrapers/crontab index a12aa0f..49ea0eb 100644 --- a/scrapers/crontab +++ b/scrapers/crontab @@ -1,2 +1,2 @@ -# Run the scraper every hour -0 * * * * root /usr/local/bin/run_scraper.sh >> /var/log/cron.log 2>&1 +# Run the jobindex.py script every hour +0 * * * * root python /app/jobindex.py >> /var/log/cron.log 2>&1 diff --git a/scrapers/run_scraper.sh b/scrapers/run_scraper.sh deleted file mode 100644 index b0c6fe9..0000000 --- a/scrapers/run_scraper.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -cd /app -python job_scraper.py