#!/bin/bash -l #SBATCH --job-name=Hadoop.cluster #SBATCH --nodes=2 #SBATCH --ntasks-per-node=40 #SBATCH --mem=1000G #SBATCH --time=0:30:00 #SBATCH --partition=dumbo # Change to work dir cd $SLURM_SUBMIT_DIR # Load modules module load my_necessary_modules # Load the cluster management module module load Hadoop-cluster/1.0 # Start Hadoop cluster # Cluster storage is located on local disks of reserved nodes. # The storage is not persistent (removed after Hadoop termination) hadoop-cluster start # Report filesystem info&stats hdfs dfsadmin -report # Start the word count app hadoop fs -mkdir -p /data/wordcount/input hadoop fs -put -f $HADOOP_HOME/README.txt $HADOOP_HOME/NOTICE.txt /data/wordcount/input hadoop fs -rm -R -f /data/wordcount/output hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /data/wordcount/input/data/wordcount/output hadoop fs -ls -R -h /data/wordcount/output rm -rf output hadoop fs -get /data/wordcount/output # Stop hadoop cluster hadoop-cluster stop