#!/usr/bin/env bash

# BEGIN_COPYRIGHT
#
# Copyright 2009-2019 CRS4.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy
# of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# END_COPYRIGHT

set -euo pipefail
[ -n "${DEBUG:-}" ] && set -x
this="${BASH_SOURCE-$0}"
this_dir=$(cd -P -- "$(dirname -- "${this}")" && pwd -P)
. "${this_dir}/../config.sh"

old_pwd=$(pwd)

wd=$(mktemp -d)
vc_tar="${wd}"/vowelcount.tgz
pydoop_tar="${wd}"/pydoop.tgz

cd "${this_dir}/../.."
${PYTHON} -m pip install --pre -t "${wd}" .
cd "${wd}/pydoop"
tar cfz "${pydoop_tar}" .
cd "${this_dir}/vowelcount"
tar czf "${vc_tar}" .
if [ "$(hadoop_fs)" != "file" ]; then
    ensure_dfs_home
    input="input"
    output="output"
    ${HDFS} dfs -rm -r -f "${input}" "${output}"
    ${HDFS} dfs -put "${this_dir}/../input" "${input}"
else
    input="${this_dir}/../input"
    output="${wd}/output"
fi

opts=(
    "--python-zip" "${vc_tar}"
    "--upload-archive-to-cache" "${pydoop_tar}"
    "--job-name" "self_contained"
    "--entry-point" "main"
    "--no-override-home"
    "--no-override-env"
    "-D" "mapreduce.task.timeout=10000"
)
[ -n "${DEBUG:-}" ] && opts+=( "--log-level" "DEBUG" )
${PYDOOP} submit "${opts[@]}" vowelcount.mr.main "${input}" "${output}"
${PYTHON} "${this_dir}/check_results.py" "${input}" "${output}"

cd ${old_pwd}
rm -rf "${wd}"
