diff --git a/spark-pyspark-kafka/Dockerfile b/spark-pyspark-kafka/Dockerfile
new file mode 100644
index 0000000..ec8531b
--- /dev/null
+++ b/spark-pyspark-kafka/Dockerfile
@@ -0,0 +1,128 @@
+# Merging all the Spark-related tools into one image saves a LOT of space
+# over having separate images for each, especially given they all share
+# the same underlying infrastructure. It’s still just as easy to set up
+# and run the various services via docker-compose.
+
+FROM python:3.6-slim-buster
+# I originally used python:3.6-alpine but dropped it after getting tied
+# in knots with Python dependencies around numpy. Pip under Alpine builds
+# all modules from source. You can try to be clever and install the bigger
+# modules like numpy via apk, but the problem with numpy is that the Python
+# version most likely will install a different version of numpy from what
+# the base Alpine image supplies pre-built, so you end up building numpy
+# from source regardless, which takes forever (well, 10 minutes ;). I tried
+# to figure out a combination that worked without success.
+#
+# Going to a later version of Python would probably resolve this, but that
+# would require moving to Spark 3 (2.4 doesn't support 3.8 or later), and
+# we don’t have time to test that at the moment. Which is why this is still
+# using Python 3.6: if it ain't broke...
+
+ENV \
+    # download mirrors
+    SPARK_MIRROR="https://dlcdn.apache.org" \
+    GRAPHFRAMES_MIRROR="https://repos.spark-packages.org" \
+    KAFKA_MIRROR="https://archive.apache.org/dist" \
+    # Spark
+    SPARK_VERSION="2.4.8" \
+    HADOOP_VERSION="2.7" \
+    SPARK_INSTALL="/usr/local" \
+    SPARK_HOSTNAME="localhost" \
+    SPARK_MASTER_PORT="7077" \
+    SPARK_MASTER_WEBUI_PORT="8080" \
+    # graphframes
+    GRAPHFRAMES_VERSION="0.8.1-spark2.4-s_2.11" \
+    # Kafka
+    KAFKA_VERSION="2.2.2" \
+    SCALA_VERSION="2.11" \
+    # PySpark
+    PYSPARK_KERNEL_NAME="PySpark" \
+    PYSPARK_PYTHON="/usr/local/bin/python" \
+    KERNELS_TEMPLATE_PATH="/tmp" \
+    KERNELS_DIR_PATH="/root/.local/share/jupyter/kernels" \
+    # Miscellaneous
+    BUILD_PACKAGES="wget gnupg software-properties-common" \
+    PURGE_PACKAGES="$BUILD_PACKAGES readline-common libreadline7 netbase libgdbm6"
+
+# These environment variables need to be defined separately because you
+# can't reference prior environment variables in the same ENV block.
+ENV \
+    # Spark
+    SPARK_HOME="$SPARK_INSTALL/spark" \
+    SPARK_MASTER="spark://$SPARK_HOSTNAME:$SPARK_MASTER_PORT" \
+    # Kafka
+    KAFKA_HOME="$KAFKA_INSTALL/kafka" \
+    # PySpark
+    PYSPARK_DRIVER_PYTHON="$PYSPARK_PYTHON"
+
+# More PySpark stuff
+ENV PYTHONPATH="$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip" \
+    PYSPARK_SUBMIT_ARGS="--master $SPARK_MASTER pyspark-shell  --py-files $SPARK_HOME/jars/graphframes-$GRAPHFRAMES_VERSION.jar" \
+    PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py"
+
+# Coalescing the RUNs saves quite a significant amount of space
+# (about 10% just for Spark alone).
+# <https://www.dajobe.org/blog/2015/04/18/making-debian-docker-images-smaller/>
+RUN apt-get update && \
+    # Miscellaneous infrastructure
+    apt-get install -y --no-install-recommends tini $BUILD_PACKAGES ; \
+    # Install AdoptOpenJDK 8
+    # <https://stackoverflow.com/a/59436618>
+    wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - && \
+    add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ && \
+    apt-get update && apt-get install -y adoptopenjdk-8-hotspot ; \
+    # Download, install, and symlink spark
+    cd $SPARK_INSTALL && \
+    wget -q --show-progress --progress=bar:force:noscroll $SPARK_MIRROR/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz 2>&1 && \
+    tar xzf spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz && \
+    ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark && \
+    rm -f spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz ; \
+    # Download and install graphframes
+    cd $SPARK_INSTALL/spark/jars && \
+    wget -q --show-progress --progress=bar:force:noscroll $GRAPHFRAMES_MIRROR/graphframes/graphframes/$GRAPHFRAMES_VERSION/graphframes-$GRAPHFRAMES_VERSION.jar ; \
+    # Download, install, and symlink Kafka
+    cd $SPARK_INSTALL && \
+    wget -q --show-progress --progress=bar:force:noscroll $KAFKA_MIRROR/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz 2>&1 && \
+    tar xzf kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz && \
+    ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka && \
+    rm -f kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz ; \
+    # Install required modules for PySpark and install kernel
+    pip install --upgrade pip && \
+    pip install jupyter kafka-python graphframes && \
+        # Tornado 6 breaks sparkmonitor
+        # tornado==5.1 \
+        # sparkmonitor \
+    ipython kernel install --name "$PYSPARK_KERNEL_NAME" ; \
+    # Clear out all the unnecessary crap
+    apt-get remove -y --purge $PURGE_PACKAGES && \
+    apt -y autoremove && \
+    apt-get clean -y && \
+    rm -rf /var/lib/apt/lists/* && \
+    rm -rf /tmp/* && \
+    rm -rf /var/cache/* && \
+    rm -rf /root/.cache
+
+# Startup scripts
+COPY start-master.sh start-worker.sh start-kafka.sh /usr/local/bin/
+
+# Configuration files
+COPY spark-defaults.conf $SPARK_HOME/conf
+COPY server.properties $KAFKA_HOME/config/
+
+# PySpark kernel, based on the template from
+# <https://github.com/Anchormen/pyspark-jupyter-kernels>.
+COPY kernel.json $KERNELS_DIR_PATH/$PYSPARK_KERNEL_NAME/
+
+# Spark doesn't seem to respond directly to SIGTERM as the exit status is
+# for SIGKILL (137), after a pause. Presumably docker-compose down times out.
+# Using tini gives immediate exit with status 143 (SIGTERM).
+ENTRYPOINT ["/usr/bin/tini", "--"]
+
+# Override this in your docker-compose.yml or at the command line.
+# Spark master (default): /usr/local/bin/start-master.sh
+# Spark worker: /usr/local/bin/start-worker.sh
+# PySpark: /usr/local/bin/jupyter notebook --ip=0.0.0.0 --port=8888 --allow-root
+# Kafka: /usr/local/bin/start-kafka.sh
+CMD ["/usr/local/bin/start-master.sh"]
+# debugging
+# CMD ["bash"]