docker-compose搭建大数据环境

之前写过一篇docker-compose一键启停Vue+SpringBoot的前后端项目。最近需要用到hadoop、es、kafka、flink等大数据组件,由于公司测试环境并不满足起这么多服务,加上正好公司分配的电脑主机处于空闲状态,于是想着在主机上用docker搭建大数据环境,供测试与学习使用。

windows docker桌面版安装

电脑主机系统是windows10,安装对应版本的docker,具体安装教程可参考https://runoob.com/docker/windows-docker-install.html

docker-compose.yml

首先编写docker-compose.yml文件,具体配置解释可以参考https://www.runoob.com/docker/docker-compose.html,本次编写的大数据环境包括hadoop、hive、spark、es、kafka、flink和hbase。具体内容如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
version: '2'
services:
namenode:
image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8
container_name: namenode
volumes:
- ./data/namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=hadoop
env_file:
- ./hadoop-hive.env
ports:
- 50070:50070
resourcemanager:
image: bde2020/hadoop-resourcemanager:1.1.0-hadoop2.8-java8
container_name: resourcemanager
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop-hive.env
ports:
- "8088:8088"
historyserver:
image: bde2020/hadoop-historyserver:1.1.0-hadoop2.8-java8
container_name: historyserver
depends_on:
- namenode
- datanode1
- datanode2
volumes:
- ./data/historyserver:/hadoop/yarn/timeline
env_file:
- ./hadoop-hive.env
ports:
- "8188:8188"

nodemanager1:
image: bde2020/hadoop-nodemanager:1.1.0-hadoop2.8-java8
container_name: nodemanager1
depends_on:
- namenode
- datanode1
- datanode2
env_file:
- ./hadoop-hive.env
ports:
- "8042:8042"

datanode1:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
container_name: datanode1
depends_on:
- namenode
volumes:
- ./data/datanode1:/hadoop/dfs/data
env_file:
- ./hadoop-hive.env

datanode2:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
container_name: datanode2
depends_on:
- namenode
volumes:
- ./data/datanode2:/hadoop/dfs/data
env_file:
- ./hadoop-hive.env

datanode3:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
container_name: datanode3
depends_on:
- namenode
volumes:
- ./data/datanode3:/hadoop/dfs/data
env_file:
- ./hadoop-hive.env

hive-server:
image: bde2020/hive:2.1.0-postgresql-metastore
container_name: hive-server
env_file:
- ./hadoop-hive.env
environment:
- "HIVE_CORE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore/metastore"
ports:
- "10000:10000"
hive-metastore:
image: bde2020/hive:2.1.0-postgresql-metastore
container_name: hive-metastore
env_file:
- ./hadoop-hive.env
command: /opt/hive/bin/hive --service metastore
ports:
- 9083:9083
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.1.0
ports:
- 5432:5432
volumes:
- ./data/postgresql/:/var/lib/postgresql/data
spark-master:
image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8
container_name: spark-master
ports:
- 8080:8080
- 7077:7077
env_file:
- ./hadoop-hive.env
spark-worker:
image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8
depends_on:
- spark-master
environment:
- SPARK_MASTER=spark://spark-master:7077
ports:
- "8081:8081"
env_file:
- ./hadoop-hive.env
mysql-server:
image: mysql:5.7
container_name: mysql-server
ports:
- "3306:3306"
environment:
- MYSQL_ROOT_PASSWORD=123456
volumes:
- ./data/mysql:/var/lib/mysql

elasticsearch:
image: elasticsearch:6.5.3
environment:
- discovery.type=single-node
ports:
- "9200:9200"
- "9300:9300"
networks:
- es_network
kibana:
image: kibana:6.5.3
ports:
- "5601:5601"
networks:
- es_network

jobmanager:
image: flink
expose:
- "6123"
ports:
- "8081:8081"
command: jobmanager
environment:
- JOB_MANAGER_RPC_ADDRESS=jobmanager
taskmanager:
image: flink
expose:
- "6121"
- "6122"
depends_on:
- jobmanager
command: taskmanager
links:
- "jobmanager:jobmanager"
environment:
- JOB_MANAGER_RPC_ADDRESS=jobmanager

zoo:
image: zookeeper:3.4.10
container_name: zoo
environment:
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=0.0.0.0:2888:3888
ports:
- 2181:2181

kafka:
image: wurstmeister/kafka
volumes:
- ./data/kafka/etc/localtime:/etc/localtime
ports:
- 9092:9092
environment:
KAFKA_ADVERTISED_HOST_NAME: <主机ip>
KAFKA_ZOOKEEPER_CONNECT: <主机ip>:2181

hbase-master:
image: bde2020/hbase-master:1.0.0-hbase1.2.6
container_name: hbase-master
hostname: hbase-master
env_file:
- ./hbase-distributed-local.env
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181"
ports:
- 16010:16010

hbase-region:
image: bde2020/hbase-regionserver:1.0.0-hbase1.2.6
container_name: hbase-regionserver
hostname: hbase-regionserver
env_file:
- ./hbase-distributed-local.env
environment:
HBASE_CONF_hbase_regionserver_hostname: hbase-region
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181 hbase-master:16010"
ports:
- 16030:16030


networks:
es_network:
external: true

hadoop-hive.env

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
CORE_CONF_fs_defaultFS=hdfs://namenode:9000
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_scheduler_class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___mb=8192
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___vcores=4
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_mapreduce_map_output_compress=true
YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec
YARN_CONF_yarn_nodemanager_resource_memory___mb=16384
YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8
YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle

MAPRED_CONF_mapreduce_framework_name=yarn
MAPRED_CONF_mapred_child_java_opts=-Xmx4096m
MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m

hbase-distributed-local.env

1
2
3
4
5
6
7
8
9
10
11
12
HBASE_CONF_hbase_rootdir=hdfs://namenode:9000/hbase
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_zookeeper_quorum=zoo

HBASE_CONF_hbase_master=hbase-master:16000
HBASE_CONF_hbase_master_hostname=hbase-master
HBASE_CONF_hbase_master_port=16000
HBASE_CONF_hbase_master_info_port=16010
HBASE_CONF_hbase_regionserver_port=16020
HBASE_CONF_hbase_regionserver_info_port=16030

HBASE_MANAGES_ZK=false

一键启停

可以把docker-compose.yml中的大数据组件配置分为多个文件写,这样就可以用docker界面去启停container。作者采用的方法是编写脚本去启停各个服务。

所有服务启停

start.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/bin/bash

docker-compose -f docker-compose.yml up -d namenode datanode1 datanode2 datanode3 resourcemanager nodemanager1 historyserver

sleep 5

docker-compose -f docker-compose.yml up -d hive-metastore-postgresql hive-server hive-metastore hive-metastore-postgresql mysql-server

sleep 5

docker-compose -f docker-compose.yml up -d spark-master spark-worker

sleep 5

docker-compose -f docker-compose.yml up -d elasticsearch kibana

sleep 5

docker-compose -f docker-compose.yml up -d zoo kafka

sleep 5

docker-compose -f docker-compose.yml up -d jobmanager taskmanager

stop.sh

1
2
#!/bin/bash
docker-compose stop

分别启停服务

编写启停脚本,以启停flink为例,其他服务类似。
start-flink.sh

1
docker-compose stop -f docker-compose.yml up -d jobmanager taskmanager

stop-fink.sh

1
docker-compose stop jobmanager taskmanager

×

纯属好玩

扫码支持
扫码打赏,你说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦

文章目录
  1. 1. windows docker桌面版安装
  2. 2. docker-compose.yml
  3. 3. 一键启停
    1. 3.1. 所有服务启停
    2. 3.2. 分别启停服务
,