| | FROM python:3.6-alpine |
---|
| | |
---|
| | ENV SPARK_VERSION="2.4.3" \ |
---|
| | ENV SPARK_VERSION="2.4.8" \ |
---|
| | HADOOP_VERSION="2.7" \ |
---|
| | GRAPHFRAMES_VERSION="0.8.1-spark2.4-s_2.11" \ |
---|
| | APACHE_MIRROR="https://dlcdn.apache.org" \ |
---|
| | SPARK_INSTALL="/usr/local" |
---|
| | |
---|
| | RUN apk add --no-cache \ |
---|
| | bash \ |
---|
| | openjdk8 \ |
---|
| | tini \ |
---|
| | zeromq |
---|
| | openjdk8-jre \ |
---|
| | tini |
---|
| | |
---|
| | RUN apk add --no-cache --virtual .fetch-deps \ |
---|
| | curl \ |
---|
| | wget \ |
---|
| | tar |
---|
| | |
---|
| | RUN curl -s https://www-us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz | tar -xz -C $SPARK_INSTALL && \ |
---|
| | cd $SPARK_INSTALL && ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark |
---|
| | # download, install, and symlink spark |
---|
| | RUN cd $SPARK_INSTALL && \ |
---|
| | wget -q --show-progress --progress=bar:force:noscroll $APACHE_MIRROR/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz 2>&1 && \ |
---|
| | tar xzf spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz && \ |
---|
| | ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark && \ |
---|
| | rm -f spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz |
---|
| | |
---|
| | RUN apk del .fetch-deps |
---|
| | # download and install graphframes |
---|
| | RUN cd $SPARK_INSTALL/spark/jars && \ |
---|
| | wget -q --show-progress --progress=bar:force:noscroll https://repos.spark-packages.org/graphframes/graphframes/$GRAPHFRAMES_VERSION/graphframes-$GRAPHFRAMES_VERSION.jar |
---|
| | |
---|
| | RUN apk del .fetch-deps && \ |
---|
| | rm -rf /tmp/* && \ |
---|
| | rm -rf /var/cache/* && \ |
---|
| | rm -rf /root/.cache |
---|
| | |
---|
| | COPY start-master.sh start-worker.sh /usr/local/bin/ |
---|
| | |
---|
| | # these need to be separate because you can't reference prior environment |
---|
| |
---|