-
Notifications
You must be signed in to change notification settings - Fork 2
/
docker-compose.yml
102 lines (91 loc) · 3.2 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
---
services:
# https://docs.confluent.io/platform/current/installation/docker/config-reference.html#zk-configuration
zookeeper:
image: confluentinc/cp-zookeeper:latest
container_name: zookeeper
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
volumes:
- zk_vol_data:/var/lib/zookeeper/data
- zk_vol_log:/var/lib/zookeeper/log
networks:
- spark_backend
restart: always
# if you change this name, MUST also change in the environ, and remove the ZK volume since it hold an old kafka server ID for the old name.
kafka:
# "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
# An important note about accessing Kafka from clients on other machines:
# -----------------------------------------------------------------------
#
# The config used here exposes port 29092 for _external_ connections to the broker
# i.e. those from _outside_ the docker network. This could be from the host machine
# running docker, or maybe further afield if you've got a more complicated setup.
# If the latter is true, you will need to change the value 'localhost' in
# KAFKA_ADVERTISED_LISTENERS to one that is resolvable to the docker host from those
# remote clients
#
# For connections _internal_ to the docker network, such as from other services
# and components, use kafka:29092.
#
# See https://rmoff.net/2018/08/02/kafka-listeners-explained/ for details
# "`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-'"`-._,-
#
image: confluentinc/cp-kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_LOG_RETENTION_BYTES: 2000000
KAFKA_LOG_RETENTION_HOURS: 123456
volumes:
- kafka_vol:/var/lib/kafka/data/
networks:
- spark_backend
restart: always
# # export kafka metrics for use by prometheus
# kafka-exporter:
# image: danielqsj/kafka-exporter
# command: ["--kafka.server=kafka:9092"]
# depends_on:
# - kafka
# ports:
# - 9308:9308
# networks:
# - spark_backend
spark:
container_name: spark-lab
depends_on:
- kafka
# image: jupyter/pyspark-notebook:lab-3.3.2
###### WARNING ########## the reported spark version is 3.2.1 ???????????
image: spark-3.3.2_jars
build:
dockerfile: Dockerfile.spark
environment:
JUPYTER_ENABLE_LAB : "yes"
ports:
- 8888:8888
- 4040:4040 # the diagnostics web UI is created after succesful creation of SparkSession.
volumes:
- $PWD/work:/home/jovyan/work
- $PWD/data:/home/jovyan/data
- $HOME/datasets:/datasets
networks:
- spark_backend
volumes:
zk_vol_data:
zk_vol_log:
kafka_vol:
networks:
spark_backend:
name: spark_backend