#!/bin/bash -l #SBATCH --job-name=Spark.cluster #SBATCH --nodes=2 #SBATCH --ntasks-per-node=40 #SBATCH --mem=1000G #SBATCH --time=2:00:00 #SBATCH --partition=dumbo # Change to work dir cd $SLURM_SUBMIT_DIR # Load modules module load my_necessary_modules # Load modules module load Hadoop-cluster/1.0 # Start hadoop cluser with spark support hadoop-cluster start --spark # Submit Spark job # # spark.executor.instances - total number of executors # spark.executor.cores - number of cores per executor # spark.executor.memory - amount of memory per executor SPARK_OPTS=" --conf spark.driver.memory=4g --conf spark.driver. cores=1 --conf spark.executor.instances=17 --conf spark.executor.cores=5 --conf spark.executor.memory=14g " spark-submit ${SPARK_OPTS} --class org.apache.spark.examples.SparkPi \ $SPARK_HOME/examples/jars/spark-examples_2.11-2.1.1.jar 100 # Stop spark cluster hadoop-cluster stop