#!/bin/bash -x
#
# SLURM submission script for ERA5 data download
#
# Adjust these SBATCH directives as needed for your HPC cluster
#
##########################
#SBATCH -o era5_download.o
#SBATCH -e era5_download.e
#SBATCH --nodes=1     
#SBATCH --ntasks-per-node=5        
#SBATCH --time=1:10:00
#SBATCH -J era5_download
#SBATCH -p PESQ1 

# Load any necessary modules (e.g., Python, Conda)
# This depends on your HPC environment. Uncomment and modify if needed.
# module load python/3.10
module load anaconda3-2022.05-gcc-11.2.0-q74p53i

# Activate your Python virtual environment or Conda environment if you are using one
# If you installed cdsapi globally or in your user's base environment, you might not need this.
# source /path/to/your/venv/bin/activate
# conda activate my_cds_env

echo "Starting ERA5 data download job on $(hostname) at $(date)"

# Define the path to your Python script
PYTHON_SCRIPT="download_era5_parallel.py"

# Define the variables, years, and number of processes for your Python script
# Ensure these match the arguments you would pass directly to the Python script
ERA5_VARIABLES="10m_u_component_of_wind,10m_v_component_of_wind,mean_sea_level_pressure,total_precipitation"
ERA5_YEARS="2020,2019,2018"
NUM_PROCESSES=3 # This should ideally match --cpus-per-task for optimal resource usage

# Define the output directory for the downloaded NetCDF files
ERA5_OUTPUT_DIR="/mnt/beegfs/jorge.gomes/era5_downloads"

# Define the geographical area (optional, adjust as needed)
NORTH=50.0
WEST=-150.0
SOUTH=-75.0
EAST=50.0

# Execute your Python script
# Use `python3` or `python` depending on your system's setup and activated environment
python3 ${PYTHON_SCRIPT} \
    --variables "${ERA5_VARIABLES}" \
    --years "${ERA5_YEARS}" \
    --num_processes ${NUM_PROCESSES} \
    --output_dir "${ERA5_OUTPUT_DIR}" \
    --north ${NORTH} \
    --west ${WEST} \
    --south ${SOUTH} \
    --east ${EAST}

# Check the exit status of the Python script
if [ $? -eq 0 ]; then
    echo "ERA5 data download job completed successfully at $(date)"
else
    echo "ERA5 data download job failed! Check slurm_error_${SLURM_JOB_ID}.log for details."
fi

# Deactivate your Python environment if you activated one
# If you activated one, uncomment this line.
# conda deactivate

