From 8d0e9c63047027f1df7a28144fe1e5d0a7b0191b Mon Sep 17 00:00:00 2001 From: jackfiled Date: Sat, 6 Jul 2024 22:06:28 +0800 Subject: [PATCH] feat: add images --- images/Dockerfile | 111 +++++++++ images/docker-compose.yml | 52 +++++ images/flink_conf/flink-conf.yaml | 260 +++++++++++++++++++++ images/flink_conf/log4j-cli.properties | 67 ++++++ images/flink_conf/log4j-console.properties | 68 ++++++ images/flink_conf/log4j-session.properties | 42 ++++ images/flink_conf/log4j.properties | 61 +++++ images/flink_conf/logback-console.xml | 64 +++++ images/flink_conf/logback-session.xml | 39 ++++ images/flink_conf/logback.xml | 58 +++++ images/flink_conf/masters | 2 + images/flink_conf/workers | 4 + images/flink_conf/zoo.cfg | 36 +++ images/hadoop_config/core-site.xml | 28 +++ images/hadoop_config/hdfs-site.xml | 37 +++ images/hadoop_config/mapred-site.xml | 40 ++++ images/hadoop_config/yarn-site.xml | 39 ++++ images/hbase_config/hbase-site.xml | 66 ++++++ images/hbase_config/hdfs-site.xml | 37 +++ images/hbase_config/regionservers | 3 + images/id_big_data | 38 +++ images/run.sh | 18 ++ images/zookeeper_config/zoo.cfg | 41 ++++ 23 files changed, 1211 insertions(+) create mode 100644 images/Dockerfile create mode 100644 images/docker-compose.yml create mode 100644 images/flink_conf/flink-conf.yaml create mode 100644 images/flink_conf/log4j-cli.properties create mode 100644 images/flink_conf/log4j-console.properties create mode 100644 images/flink_conf/log4j-session.properties create mode 100644 images/flink_conf/log4j.properties create mode 100644 images/flink_conf/logback-console.xml create mode 100644 images/flink_conf/logback-session.xml create mode 100644 images/flink_conf/logback.xml create mode 100644 images/flink_conf/masters create mode 100644 images/flink_conf/workers create mode 100644 images/flink_conf/zoo.cfg create mode 100644 images/hadoop_config/core-site.xml create mode 100644 images/hadoop_config/hdfs-site.xml create mode 100644 images/hadoop_config/mapred-site.xml create mode 100644 images/hadoop_config/yarn-site.xml create mode 100644 images/hbase_config/hbase-site.xml create mode 100644 images/hbase_config/hdfs-site.xml create mode 100644 images/hbase_config/regionservers create mode 100644 images/id_big_data create mode 100755 images/run.sh create mode 100644 images/zookeeper_config/zoo.cfg diff --git a/images/Dockerfile b/images/Dockerfile new file mode 100644 index 0000000..d507fda --- /dev/null +++ b/images/Dockerfile @@ -0,0 +1,111 @@ +FROM archlinux:latest + +# Install necessary dependencies +RUN echo 'Server = https://mirrors.cernet.edu.cn/archlinux/$repo/os/$arch' > /etc/pacman.d/mirrorlist +RUN pacman -Sy --noconfirm openssh jdk8-openjdk which inetutils + +# Setting JAVA_HOME env +ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk + +# Configuring SSH login +RUN echo 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCyyLt1bsAlCcadB2krSCDr0JP8SrF7EsUM+Qiv3m+V10gIBoCBFEh9iwpVN1UMioK8qdl9lm+LK22RW+IU6RjW+zyPB7ui3LlG0bk5H4g9v7uXH/+/ANfiJI2/2+Q4gOQAsRR+7kOpGemeKnFGJMgxnndSCpgYI4Is9ydAFzcQcGgxVB2mTGT6siufJb77tWKxrVzGn60ktdRxfwqct+2Nt88GTGw7eGJfMQADX1fVt9490M3G3x2Kw9KweXr2m+qr1yCRAlt3WyNHoNOXVhrF41/YgwGe0sGJd+kXBAdM2nh2xa0ZZPUGFkAp4MIWBDbycleRCeLUpCHFB0bt2D82BhF9luCeTXtpLyDym1+PS+OLZ3NDcvztBaH8trsgH+RkUc2Bojo1J4W9NoiEWsHGlaziWgF6L3z1vgesDPboxd0ol6EhKVX+QjxA9XE79IT4GidHxDwqonJz/dHXwjilqqmI4TEHndVWhJN0GV47a63+YCK02VAZ2mOA3aw/7LE= ricardo@magicbook-14' >> /root/.ssh/authorized_keys +COPY id_big_data /root/.ssh/id_rsa +RUN echo 'Host *' >> /etc/ssh/ssh_config && echo ' StrictHostKeyChecking no' >> /etc/ssh/ssh_config + +# Install Hadoop +ADD hadoop-3.3.6.tar.gz /opt/ +RUN mv /opt/hadoop-3.3.6 /opt/hadoop && \ + chmod -R 777 /opt/hadoop + +# Configure Hadoop +ENV HADOOP_HOME=/opt/hadoop +RUN echo "slave1" >> $HADOOP_HOME/etc/hadoop/workers +RUN echo "slave2" >> $HADOOP_HOME/etc/hadoop/workers +RUN echo "slave3" >> $HADOOP_HOME/etc/hadoop/workers +RUN mkdir $HADOOP_HOME/tmp +ENV HADOOP_TMP_DIR=$HADOOP_HOME/tmp +RUN mkdir $HADOOP_HOME/namenode +RUN mkdir $HADOOP_HOME/datanode +ENV HADOOP_CONFIG_HOME=$HADOOP_HOME/etc/hadoop +ENV PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH +ENV HADOOP_CLASSPATH=$HADOOP_HOME/share/hadoop/tools/lib/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_CLASSPATH +ENV HDFS_NAMENODE_USER="root" +ENV HDFS_DATANODE_USER="root" +ENV HDFS_SECONDARYNAMENODE_USER="root" +ENV YARN_RESOURCEMANAGER_USER="root" +ENV YARN_NODEMANAGER_USER="root" +COPY hadoop_config/* $HADOOP_HOME/etc/hadoop/ +RUN sed -i '1i export JAVA_HOME=/usr/lib/jvm/java-8-openjdk' $HADOOP_HOME/etc/hadoop/hadoop-env.sh + +# Install zookeeper +ADD apache-zookeeper-3.9.2-bin.tar.gz /opt/ +RUN mv /opt/apache-zookeeper-3.9.2-bin /opt/zookeeper && \ + chmod -R 777 /opt/zookeeper + +# Configure zookeeper +ENV ZOOKEEPER_HOME=/opt/zookeeper +ENV PATH=$ZOOKEEPER_HOME/bin:$PATH +RUN mkdir $ZOOKEEPER_HOME/tmp +COPY zookeeper_config/* $ZOOKEEPER_HOME/conf/ + +# Install hbase +ADD hbase-2.5.8-bin.tar.gz /opt/ +RUN mv /opt/hbase-2.5.8 /opt/hbase && \ + chmod -R 777 /opt/hbase + +# Configure hbase +ENV HBASE_HOME=/opt/hbase +ENV PATH=$HBASE_HOME/bin:$HBASE_HOME/sbin:$PATH +COPY hbase_config/* $HBASE_HOME/conf/ +RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk" >> $HBASE_HOME/conf/hbase-env.sh +RUN echo "export HBASE_MANAGES_ZK=false" >> $HBASE_HOME/conf/hbase-env.sh +RUN echo "export HBASE_LIBRARY_PATH=/opt/hadoop/lib/native" >> $HBASE_HOME/conf/hbase-env.sh +RUN echo 'export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"' >> $HBASE_HOME/conf/hbase-env.sh + +# Install spark +ADD spark-3.5.1-bin-hadoop3-scala2.13.tgz /opt/ +RUN mv /opt/spark-3.5.1-bin-hadoop3-scala2.13 /opt/spark && \ + chmod -R 777 /opt/spark + +# Configure spark +ENV SPARK_HOME=/opt/spark +ENV PATH=$SPARK_HOME/bin:$PATH +ENV HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop +ENV YARN_CONF_DIR=/opt/hadoop/etc/hadoop +RUN mv /opt/spark/conf/spark-env.sh.template /opt/spark/conf/spark-env.sh && \ + echo 'export SPARK_DIST_CLASSPATH=$(/opt/hadoop/bin/hadoop classpath)' >> /opt/spark/conf/spark-env.sh && \ + touch /opt/spark/conf/workers && \ + echo "master" >> /opt/spark/conf/workers && \ + echo "slave1" >> /opt/spark/conf/workers && \ + echo "slave2" >> /opt/spark/conf/workers && \ + echo "slave3" >> /opt/spark/conf/workers + +# Add Mysql JDBC Connector +COPY mysql-connector-j-8.4.0.jar /opt/spark/jars/ + +# Install flink +ADD flink-1.13.6-bin-scala_2.11.tgz /opt/ +RUN mv /opt/flink-1.13.6 /opt/flink && \ + chmod -R 777 /opt/flink + +# Add hadoop library +ADD commons-cli-1.4.jar /opt/flink/lib/ +ADD flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar /opt/flink/lib/ + +# Configure flink +ENV FLINK_HOME=/opt/flink +ENV PATH=$FLINK_HOME/bin:$PATH +COPY flink_conf/* $FLINK_HOME/conf/ + +# Install kafka +ADD kafka_2.12-1.0.2.tgz /opt/ +RUN mv /opt/kafka_2.12-1.0.2 /opt/kafka/ && \ + chmod -R 777 /opt/kafka + +# Configure kafka +ENV KAFKA_HOME=/opt/kafka +ENV PATH=$KAFKA_HOME/bin:$PATH + + +COPY run.sh /run.sh +CMD [ "/run.sh" ] \ No newline at end of file diff --git a/images/docker-compose.yml b/images/docker-compose.yml new file mode 100644 index 0000000..c2ec5fa --- /dev/null +++ b/images/docker-compose.yml @@ -0,0 +1,52 @@ +version: '3.8' +services: + master: + hostname: rcj-2021211180-master + image: big-data + command: + - "/run.sh" + - "1" + - "master" + networks: + hadoop-network: + ipv4_address: 172.126.1.111 + + slave1: + hostname: rcj-2021211180-slave1 + image: big-data + command: + - "/run.sh" + - "2" + - "slave1" + networks: + hadoop-network: + ipv4_address: 172.126.1.112 + + slave2: + hostname: rcj-2021211180-slave2 + image: big-data + command: + - "/run.sh" + - "3" + - "slave2" + networks: + hadoop-network: + ipv4_address: 172.126.1.113 + + slave3: + hostname: rcj-2021211180-slave3 + image: big-data + command: + - "/run.sh" + - "4" + - "slave3" + networks: + hadoop-network: + ipv4_address: 172.126.1.114 + +networks: + hadoop-network: + driver: bridge + ipam: + config: + - subnet: 172.126.1.0/24 diff --git a/images/flink_conf/flink-conf.yaml b/images/flink_conf/flink-conf.yaml new file mode 100644 index 0000000..1ed0b64 --- /dev/null +++ b/images/flink_conf/flink-conf.yaml @@ -0,0 +1,260 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + + +#============================================================================== +# Common +#============================================================================== + +# The external address of the host on which the JobManager runs and can be +# reached by the TaskManagers and any clients which want to connect. This setting +# is only used in Standalone mode and may be overwritten on the JobManager side +# by specifying the --host parameter of the bin/jobmanager.sh executable. +# In high availability mode, if you use the bin/start-cluster.sh script and setup +# the conf/masters file, this will be taken care of automatically. Yarn/Mesos +# automatically configure the host name based on the hostname of the node where the +# JobManager runs. + +jobmanager.rpc.address: master + +# The RPC port where the JobManager is reachable. + +jobmanager.rpc.port: 6123 + + +# The total process memory size for the JobManager. +# +# Note this accounts for all memory usage within the JobManager process, including JVM metaspace and other overhead. + +jobmanager.memory.process.size: 1600m + + +# The total process memory size for the TaskManager. +# +# Note this accounts for all memory usage within the TaskManager process, including JVM metaspace and other overhead. + +taskmanager.memory.process.size: 1728m + +# To exclude JVM metaspace and overhead, please, use total Flink memory size instead of 'taskmanager.memory.process.size'. +# It is not recommended to set both 'taskmanager.memory.process.size' and Flink memory. +# +# taskmanager.memory.flink.size: 1280m + +# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline. + +taskmanager.numberOfTaskSlots: 1 + +# The parallelism used for programs that did not specify and other parallelism. + +parallelism.default: 1 + +# The default file system scheme and authority. +# +# By default file paths without scheme are interpreted relative to the local +# root file system 'file:///'. Use this to override the default and interpret +# relative paths relative to a different file system, +# for example 'hdfs://mynamenode:12345' +# +# fs.default-scheme + +#============================================================================== +# High Availability +#============================================================================== + +# The high-availability mode. Possible options are 'NONE' or 'zookeeper'. +# +high-availability: zookeeper +high-availability.storageDir: hdfs://master/flink_yarn_ha +high-availability.zookeeper.path.root: /flink-yarn +high-availability.zookeeper.quorum: master:2181,slave1:2181,slave2:2181,slave3:2181 +yarn.application-attempts: 10 + +# The path where metadata for master recovery is persisted. While ZooKeeper stores +# the small ground truth for checkpoint and leader election, this location stores +# the larger objects, like persisted dataflow graphs. +# +# Must be a durable file system that is accessible from all nodes +# (like HDFS, S3, Ceph, nfs, ...) +# +# high-availability.storageDir: hdfs:///flink/ha/ + +# The list of ZooKeeper quorum peers that coordinate the high-availability +# setup. This must be a list of the form: +# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181) +# +# high-availability.zookeeper.quorum: localhost:2181 + + +# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes +# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE) +# The default value is "open" and it can be changed to "creator" if ZK security is enabled +# +# high-availability.zookeeper.client.acl: open + +#============================================================================== +# Fault tolerance and checkpointing +#============================================================================== + +# The backend that will be used to store operator state checkpoints if +# checkpointing is enabled. +# +# Supported backends are 'jobmanager', 'filesystem', 'rocksdb', or the +# . +# +# state.backend: filesystem + +# Directory for checkpoints filesystem, when using any of the default bundled +# state backends. +# +# state.checkpoints.dir: hdfs://namenode-host:port/flink-checkpoints + +# Default target directory for savepoints, optional. +# +# state.savepoints.dir: hdfs://namenode-host:port/flink-savepoints + +# Flag to enable/disable incremental checkpoints for backends that +# support incremental checkpoints (like the RocksDB state backend). +# +# state.backend.incremental: false + +# The failover strategy, i.e., how the job computation recovers from task failures. +# Only restart tasks that may have been affected by the task failure, which typically includes +# downstream tasks and potentially upstream tasks if their produced data is no longer available for consumption. + +jobmanager.execution.failover-strategy: region + +#============================================================================== +# Rest & web frontend +#============================================================================== + +# The port to which the REST client connects to. If rest.bind-port has +# not been specified, then the server will bind to this port as well. +# +#rest.port: 8081 + +# The address to which the REST client will connect to +# +#rest.address: 0.0.0.0 + +# Port range for the REST and web server to bind to. +# +#rest.bind-port: 8080-8090 + +# The address that the REST & web server binds to +# +#rest.bind-address: 0.0.0.0 + +# Flag to specify whether job submission is enabled from the web-based +# runtime monitor. Uncomment to disable. + +#web.submit.enable: false + +#============================================================================== +# Advanced +#============================================================================== + +# Override the directories for temporary files. If not specified, the +# system-specific Java temporary directory (java.io.tmpdir property) is taken. +# +# For framework setups on Yarn or Mesos, Flink will automatically pick up the +# containers' temp directories without any need for configuration. +# +# Add a delimited list for multiple directories, using the system directory +# delimiter (colon ':' on unix) or a comma, e.g.: +# /data1/tmp:/data2/tmp:/data3/tmp +# +# Note: Each directory entry is read from and written to by a different I/O +# thread. You can include the same directory multiple times in order to create +# multiple I/O threads against that directory. This is for example relevant for +# high-throughput RAIDs. +# +# io.tmp.dirs: /tmp + +# The classloading resolve order. Possible values are 'child-first' (Flink's default) +# and 'parent-first' (Java's default). +# +# Child first classloading allows users to use different dependency/library +# versions in their application than those in the classpath. Switching back +# to 'parent-first' may help with debugging dependency issues. +# +# classloader.resolve-order: child-first + +# The amount of memory going to the network stack. These numbers usually need +# no tuning. Adjusting them may be necessary in case of an "Insufficient number +# of network buffers" error. The default min is 64MB, the default max is 1GB. +# +# taskmanager.memory.network.fraction: 0.1 +# taskmanager.memory.network.min: 64mb +# taskmanager.memory.network.max: 1gb + +#============================================================================== +# Flink Cluster Security Configuration +#============================================================================== + +# Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors - +# may be enabled in four steps: +# 1. configure the local krb5.conf file +# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit) +# 3. make the credentials available to various JAAS login contexts +# 4. configure the connector to use JAAS/SASL + +# The below configure how Kerberos credentials are provided. A keytab will be used instead of +# a ticket cache if the keytab path and principal are set. + +# security.kerberos.login.use-ticket-cache: true +# security.kerberos.login.keytab: /path/to/kerberos/keytab +# security.kerberos.login.principal: flink-user + +# The configuration below defines which JAAS login contexts + +# security.kerberos.login.contexts: Client,KafkaClient + +#============================================================================== +# ZK Security Configuration +#============================================================================== + +# Below configurations are applicable if ZK ensemble is configured for security + +# Override below configuration to provide custom ZK service name if configured +# zookeeper.sasl.service-name: zookeeper + +# The configuration below must match one of the values set in "security.kerberos.login.contexts" +# zookeeper.sasl.login-context-name: Client + +#============================================================================== +# HistoryServer +#============================================================================== + +# The HistoryServer is started and stopped via bin/historyserver.sh (start|stop) + +# Directory to upload completed jobs to. Add this directory to the list of +# monitored directories of the HistoryServer as well (see below). +#jobmanager.archive.fs.dir: hdfs:///completed-jobs/ + +# The address under which the web-based HistoryServer listens. +#historyserver.web.address: 0.0.0.0 + +# The port under which the web-based HistoryServer listens. +#historyserver.web.port: 8082 + +# Comma separated list of directories to monitor for completed jobs. +#historyserver.archive.fs.dir: hdfs:///completed-jobs/ + +# Interval in milliseconds for refreshing the monitored directories. +#historyserver.archive.fs.refresh-interval: 10000 + diff --git a/images/flink_conf/log4j-cli.properties b/images/flink_conf/log4j-cli.properties new file mode 100644 index 0000000..d372da1 --- /dev/null +++ b/images/flink_conf/log4j-cli.properties @@ -0,0 +1,67 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. +monitorInterval=30 + +rootLogger.level = INFO +rootLogger.appenderRef.file.ref = FileAppender + +# Log all infos in the given file +appender.file.name = FileAppender +appender.file.type = FILE +appender.file.append = false +appender.file.fileName = ${sys:log.file} +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n + +# Log output from org.apache.flink.yarn to the console. This is used by the +# CliFrontend class when using a per-job YARN cluster. +logger.yarn.name = org.apache.flink.yarn +logger.yarn.level = INFO +logger.yarn.appenderRef.console.ref = ConsoleAppender +logger.yarncli.name = org.apache.flink.yarn.cli.FlinkYarnSessionCli +logger.yarncli.level = INFO +logger.yarncli.appenderRef.console.ref = ConsoleAppender +logger.hadoop.name = org.apache.hadoop +logger.hadoop.level = INFO +logger.hadoop.appenderRef.console.ref = ConsoleAppender + +# Make sure hive logs go to the file. +logger.hive.name = org.apache.hadoop.hive +logger.hive.level = INFO +logger.hive.additivity = false +logger.hive.appenderRef.file.ref = FileAppender + +# Log output from org.apache.flink.kubernetes to the console. +logger.kubernetes.name = org.apache.flink.kubernetes +logger.kubernetes.level = INFO +logger.kubernetes.appenderRef.console.ref = ConsoleAppender + +appender.console.name = ConsoleAppender +appender.console.type = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n + +# suppress the warning that hadoop native libraries are not loaded (irrelevant for the client) +logger.hadoopnative.name = org.apache.hadoop.util.NativeCodeLoader +logger.hadoopnative.level = OFF + +# Suppress the irrelevant (wrong) warnings from the Netty channel handler +logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline +logger.netty.level = OFF diff --git a/images/flink_conf/log4j-console.properties b/images/flink_conf/log4j-console.properties new file mode 100644 index 0000000..81868ab --- /dev/null +++ b/images/flink_conf/log4j-console.properties @@ -0,0 +1,68 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. +monitorInterval=30 + +# This affects logging for both user code and Flink +rootLogger.level = INFO +rootLogger.appenderRef.console.ref = ConsoleAppender +rootLogger.appenderRef.rolling.ref = RollingFileAppender + +# Uncomment this if you want to _only_ change Flink's logging +#logger.flink.name = org.apache.flink +#logger.flink.level = INFO + +# The following lines keep the log level of common libraries/connectors on +# log level INFO. The root logger does not override this. You have to manually +# change the log levels here. +logger.akka.name = akka +logger.akka.level = INFO +logger.kafka.name= org.apache.kafka +logger.kafka.level = INFO +logger.hadoop.name = org.apache.hadoop +logger.hadoop.level = INFO +logger.zookeeper.name = org.apache.zookeeper +logger.zookeeper.level = INFO +logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3 +logger.shaded_zookeeper.level = INFO + +# Log all infos to the console +appender.console.name = ConsoleAppender +appender.console.type = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n + +# Log all infos in the given rolling file +appender.rolling.name = RollingFileAppender +appender.rolling.type = RollingFile +appender.rolling.append = true +appender.rolling.fileName = ${sys:log.file} +appender.rolling.filePattern = ${sys:log.file}.%i +appender.rolling.layout.type = PatternLayout +appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n +appender.rolling.policies.type = Policies +appender.rolling.policies.size.type = SizeBasedTriggeringPolicy +appender.rolling.policies.size.size=100MB +appender.rolling.policies.startup.type = OnStartupTriggeringPolicy +appender.rolling.strategy.type = DefaultRolloverStrategy +appender.rolling.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10} + +# Suppress the irrelevant (wrong) warnings from the Netty channel handler +logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline +logger.netty.level = OFF diff --git a/images/flink_conf/log4j-session.properties b/images/flink_conf/log4j-session.properties new file mode 100644 index 0000000..2fd6f50 --- /dev/null +++ b/images/flink_conf/log4j-session.properties @@ -0,0 +1,42 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. +monitorInterval=30 + +rootLogger.level = INFO +rootLogger.appenderRef.console.ref = ConsoleAppender + +appender.console.name = ConsoleAppender +appender.console.type = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n + +# Suppress the irrelevant (wrong) warnings from the Netty channel handler +logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline +logger.netty.level = OFF +logger.zookeeper.name = org.apache.zookeeper +logger.zookeeper.level = WARN +logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3 +logger.shaded_zookeeper.level = WARN +logger.curator.name = org.apache.flink.shaded.org.apache.curator.framework +logger.curator.level = WARN +logger.runtimeutils.name= org.apache.flink.runtime.util.ZooKeeperUtils +logger.runtimeutils.level = WARN +logger.runtimeleader.name = org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalDriver +logger.runtimeleader.level = WARN diff --git a/images/flink_conf/log4j.properties b/images/flink_conf/log4j.properties new file mode 100644 index 0000000..17d0a53 --- /dev/null +++ b/images/flink_conf/log4j.properties @@ -0,0 +1,61 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. +monitorInterval=30 + +# This affects logging for both user code and Flink +rootLogger.level = INFO +rootLogger.appenderRef.file.ref = MainAppender + +# Uncomment this if you want to _only_ change Flink's logging +#logger.flink.name = org.apache.flink +#logger.flink.level = INFO + +# The following lines keep the log level of common libraries/connectors on +# log level INFO. The root logger does not override this. You have to manually +# change the log levels here. +logger.akka.name = akka +logger.akka.level = INFO +logger.kafka.name= org.apache.kafka +logger.kafka.level = INFO +logger.hadoop.name = org.apache.hadoop +logger.hadoop.level = INFO +logger.zookeeper.name = org.apache.zookeeper +logger.zookeeper.level = INFO +logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3 +logger.shaded_zookeeper.level = INFO + +# Log all infos in the given file +appender.main.name = MainAppender +appender.main.type = RollingFile +appender.main.append = true +appender.main.fileName = ${sys:log.file} +appender.main.filePattern = ${sys:log.file}.%i +appender.main.layout.type = PatternLayout +appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n +appender.main.policies.type = Policies +appender.main.policies.size.type = SizeBasedTriggeringPolicy +appender.main.policies.size.size = 100MB +appender.main.policies.startup.type = OnStartupTriggeringPolicy +appender.main.strategy.type = DefaultRolloverStrategy +appender.main.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10} + +# Suppress the irrelevant (wrong) warnings from the Netty channel handler +logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline +logger.netty.level = OFF diff --git a/images/flink_conf/logback-console.xml b/images/flink_conf/logback-console.xml new file mode 100644 index 0000000..8cea03c --- /dev/null +++ b/images/flink_conf/logback-console.xml @@ -0,0 +1,64 @@ + + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n + + + + + ${log.file} + false + + + ${log.file}.%i + 1 + 10 + + + + 100MB + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n + + + + + + + + + + + + + + + + + + + + + diff --git a/images/flink_conf/logback-session.xml b/images/flink_conf/logback-session.xml new file mode 100644 index 0000000..636b78a --- /dev/null +++ b/images/flink_conf/logback-session.xml @@ -0,0 +1,39 @@ + + + + + ${log.file} + false + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n + + + + + + %d{yyyy-MM-dd HH:mm:ss} %-5level %logger{60} %X{sourceThread} - %msg%n + + + + + + + + + diff --git a/images/flink_conf/logback.xml b/images/flink_conf/logback.xml new file mode 100644 index 0000000..ae0bfe7 --- /dev/null +++ b/images/flink_conf/logback.xml @@ -0,0 +1,58 @@ + + + + + ${log.file} + false + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/images/flink_conf/masters b/images/flink_conf/masters new file mode 100644 index 0000000..ff8b5d6 --- /dev/null +++ b/images/flink_conf/masters @@ -0,0 +1,2 @@ +rcj-2021211180-node1:8081 +rcj-2021211180-node2:8081 diff --git a/images/flink_conf/workers b/images/flink_conf/workers new file mode 100644 index 0000000..43f5617 --- /dev/null +++ b/images/flink_conf/workers @@ -0,0 +1,4 @@ +master +slave1 +slave2 +slave3 diff --git a/images/flink_conf/zoo.cfg b/images/flink_conf/zoo.cfg new file mode 100644 index 0000000..f598997 --- /dev/null +++ b/images/flink_conf/zoo.cfg @@ -0,0 +1,36 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# The number of milliseconds of each tick +tickTime=2000 + +# The number of ticks that the initial synchronization phase can take +initLimit=10 + +# The number of ticks that can pass between sending a request and getting an acknowledgement +syncLimit=5 + +# The directory where the snapshot is stored. +# dataDir=/tmp/zookeeper + +# The port at which the clients will connect +clientPort=2181 + +# ZooKeeper quorum peers +server.1=localhost:2888:3888 +# server.2=host:peer-port:leader-port diff --git a/images/hadoop_config/core-site.xml b/images/hadoop_config/core-site.xml new file mode 100644 index 0000000..e7ba540 --- /dev/null +++ b/images/hadoop_config/core-site.xml @@ -0,0 +1,28 @@ + + + + + + + + + hadoop.tmp.dir + /opt/hadoop/tmp + + + fs.defaultFS + hdfs://master:8020 + + \ No newline at end of file diff --git a/images/hadoop_config/hdfs-site.xml b/images/hadoop_config/hdfs-site.xml new file mode 100644 index 0000000..6cc46d4 --- /dev/null +++ b/images/hadoop_config/hdfs-site.xml @@ -0,0 +1,37 @@ + + + + + + + + + dfs.replication + 3 + + + dfs.webhdfs.enabled + true + + + dfs.namenode.name.dir + /opt/hadoop/namenode + + + + dfs.datanode.name.dir + /opt/hadoop/datanode + + \ No newline at end of file diff --git a/images/hadoop_config/mapred-site.xml b/images/hadoop_config/mapred-site.xml new file mode 100644 index 0000000..5a3fea8 --- /dev/null +++ b/images/hadoop_config/mapred-site.xml @@ -0,0 +1,40 @@ + + + + + + + + + mapreduce.framework.name + yarn + + + + mapreduce.application.classpath + /opt/hadoop/share/hadoop/tools/lib/*:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs/*:/opt/hadoop/share/hadoop/hdfs/lib/*:/opt/hadoop/share/hadoop/yarn/*:/opt/hadoop/share/hadoop/yarn/lib/*:/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/mapreduce/lib/* + + \ No newline at end of file diff --git a/images/hadoop_config/yarn-site.xml b/images/hadoop_config/yarn-site.xml new file mode 100644 index 0000000..9cc62ad --- /dev/null +++ b/images/hadoop_config/yarn-site.xml @@ -0,0 +1,39 @@ + + + + + + + + yarn.nodemanager.local-dirs + /opt/hadoop/tmp/nm-local-dir + + + yarn.resourcemanager.hostname + master + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.resourcemanager.am.max-attempts + 4 + + + yarn.application.classpath + /opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/*:/opt/hadoop/share/hadoop/hdfs/*:/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/yarn:/opt/hadoop/share/hadoop/yarn/lib/*:/opt/hadoop/share/hadoop/yarn/* + + \ No newline at end of file diff --git a/images/hbase_config/hbase-site.xml b/images/hbase_config/hbase-site.xml new file mode 100644 index 0000000..1238d5d --- /dev/null +++ b/images/hbase_config/hbase-site.xml @@ -0,0 +1,66 @@ + + + + + + + hbase.rootdir + hdfs://master:8020/hbase + + + hbase.cluster.distributed + true + + + hbase.tmp.dir + /opt/hbase/tmp + + + hbase.unsafe.stream.capability.enforce + false + + + hbase.zookeeper.quorum + slave1:2181,slave2:2181,slave3:2181 + + + \ No newline at end of file diff --git a/images/hbase_config/hdfs-site.xml b/images/hbase_config/hdfs-site.xml new file mode 100644 index 0000000..6cc46d4 --- /dev/null +++ b/images/hbase_config/hdfs-site.xml @@ -0,0 +1,37 @@ + + + + + + + + + dfs.replication + 3 + + + dfs.webhdfs.enabled + true + + + dfs.namenode.name.dir + /opt/hadoop/namenode + + + + dfs.datanode.name.dir + /opt/hadoop/datanode + + \ No newline at end of file diff --git a/images/hbase_config/regionservers b/images/hbase_config/regionservers new file mode 100644 index 0000000..15cb51e --- /dev/null +++ b/images/hbase_config/regionservers @@ -0,0 +1,3 @@ +slave1 +slave2 +slave3 \ No newline at end of file diff --git a/images/id_big_data b/images/id_big_data new file mode 100644 index 0000000..604a93f --- /dev/null +++ b/images/id_big_data @@ -0,0 +1,38 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABlwAAAAdzc2gtcn +NhAAAAAwEAAQAAAYEAssi7dW7AJQnGnQdpK0gg69CT/EqxexLFDPkIr95vlddICAaAgRRI +fYsKVTdVDIqCvKnZfZZviyttkVviFOkY1vs8jwe7oty5RtG5OR+IPb+7lx//vwDX4iSNv9 +vkOIDkALEUfu5DqRnpnipxRiTIMZ53UgqYGCOCLPcnQBc3EHBoMVQdpkxk+rIrnyW++7Vi +sa1cxp+tJLXUcX8KnLftjbfPBkxsO3hiXzEAA19X1bfePdDNxt8disPSsHl69pvqq9cgkQ +Jbd1sjR6DTl1YaxeNf2IMBntLBiXfpFwQHTNp4dsWtGWT1BhZAKeDCFgQ28nJXkQni1KQh +xQdG7dg/NgYRfZbgnk17aS8g8ptfj0vji2dzQ3L87QWh/La7IB/kZFHNgaI6NSeFvTaIhF +rBxpWs4loBei989b4HrAz26MXdKJehISlV/kI8QPVxO/SE+BonR8Q8KqJyc/3R18I4paqp +iOExB53VVoSTdBleO2ut/mAitNlQGdpjgN2sP+yxAAAFkPcFk673BZOuAAAAB3NzaC1yc2 +EAAAGBALLIu3VuwCUJxp0HaStIIOvQk/xKsXsSxQz5CK/eb5XXSAgGgIEUSH2LClU3VQyK +gryp2X2Wb4srbZFb4hTpGNb7PI8Hu6LcuUbRuTkfiD2/u5cf/78A1+Ikjb/b5DiA5ACxFH +7uQ6kZ6Z4qcUYkyDGed1IKmBgjgiz3J0AXNxBwaDFUHaZMZPqyK58lvvu1YrGtXMafrSS1 +1HF/Cpy37Y23zwZMbDt4Yl8xAANfV9W33j3QzcbfHYrD0rB5evab6qvXIJECW3dbI0eg05 +dWGsXjX9iDAZ7SwYl36RcEB0zaeHbFrRlk9QYWQCngwhYENvJyV5EJ4tSkIcUHRu3YPzYG +EX2W4J5Ne2kvIPKbX49L44tnc0Ny/O0Fofy2uyAf5GRRzYGiOjUnhb02iIRawcaVrOJaAX +ovfPW+B6wM9ujF3SiXoSEpVf5CPED1cTv0hPgaJ0fEPCqicnP90dfCOKWqqYjhMQed1VaE +k3QZXjtrrf5gIrTZUBnaY4DdrD/ssQAAAAMBAAEAAAGAAwFsY1hCCy1bpSqpejSLdyNpxX +lE/A5v+RybqIc7BYAZwibH1MOwR0oF2bJlL+GJOp45XYrgo6yOej3a8mKLzBtrpHTrEt0W +kUn3zO3Khs05HFyJml6rgm3+Yw4Ri2ChVKCleU/Mm+RnYMUSY+0831asZ1jtvyQjrVlXU9 +TAFHrrSX8CQ5gvr2Y85P9Hs2gUeWuyblVjXgVfhavu0/0klXgUVyUEQcivkK2cdDIPYjSl +3BCtUO1RzBIoMdbM+iXbWCA6s/62MfEHDE1mpX8y4iWfSAjfNmxjyGASFaHnkPntFNH9cK +l44mgU79VM7OzGNgZlnpZQpqBetq2dgYFzVCP+2wl9KzoDMgfW8Pk+Vf049PIrFbnkKxSY +zN+FkI9TXi7k5Mh2z9xBDxUW8d55JOutAolBxn2QVKbPqc8Peaz2bdWl5en6buUIc20jWs +XbV2D7yvynGwG5JX7guqm8w5XzJVFlq83oQPlYq+0jufZWEloqsDwgOwUzQEJHU6VFAAAA +wCUSmw54v/CbFFfLwBB/+xDe1vASmS3/OajBzEvm7tKvvl/NOIxRBRGcyque7jpvRZ4g2A +RhI6ETbSK9XUqVqA+KXjD7neNuOhKZ3zVjrSeTtxZTHOKhvpbuKxjoBY6u/sUrXS9hycfW +zMM3jL2dEuuRCmzvTlE58LVVob1cnJtxBFBDDAs0xe8GWfPXyr86w3J46ey7YEfx2ok9g+ +ma3+llSg0mMP3fUa9UXi3f/5W/Kl/uV7g650Oo4F1S9gwyEAAAAMEA6uF7of6cVsFZYcNy +4cqMgTTRxtb2BGi9OwB9VxkSCToSFAqY27Xrmss+byDDESiyjG2FmHDVgk0j3weyoqb16r +++xEg9yMpjHEmlCArsWFMM6Xkm4i1vO8sTmf8fO3TXprYA78YuuJeh2luUb/8g6GK5ob/L +DhQ7wfgBiItZb63u19yjNcYxIgt1K1U9Q4bDcqBFQDnz9r9VjZi0qgXrKWhyIhGG5rlgDx +WeHxWLi7ytylfmmsOv9M90UKedScOtAAAAwQDC3AIsUZmuoryijQ1RB0GCi7fMgpH0FJPX +iJEN0Xf+NiQbQ6UUoolrEYjdfYo095nSclfe8APAH7pUSrRF28JUyLwXKF2Kl7waO/xWVd +LzOek9dxd8LDFlEevbwkD24CkHNH+PZQ5B7zvQO28BnTVPoaBs+pGQrpxpH+5pC6Cr0eir +cruRJSPYGPSi0glafhxRhtrkLSSsSceEC0FqvLnr21go753ER5ps/naiQndJuN48ObeuUE +3MTFaJ89EOTZUAAAAUcmljYXJkb0BtYWdpY2Jvb2stMTQBAgMEBQYH +-----END OPENSSH PRIVATE KEY----- diff --git a/images/run.sh b/images/run.sh new file mode 100755 index 0000000..f526faa --- /dev/null +++ b/images/run.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +num=$1 +echo $num > /opt/zookeeper/tmp/myid +sed -i "s/^server\.$num=.*/server\.$num=0.0.0.0:2888:3888/" /opt/zookeeper/conf/zoo.cfg + +hostname=$2 +echo "broker.id=$num" > /opt/kafka/config/server.properties +echo "host.name=$hostname" >> /opt/kafka/config/server.properties +echo "zookeeper.connect=master:2181,slave1:2181,slave2:2181,slave3:2181" >> /opt/kafka/config/server.properties + +/usr/bin/ssh-keygen -A +/usr/bin/sshd -D \ No newline at end of file diff --git a/images/zookeeper_config/zoo.cfg b/images/zookeeper_config/zoo.cfg new file mode 100644 index 0000000..0692e4b --- /dev/null +++ b/images/zookeeper_config/zoo.cfg @@ -0,0 +1,41 @@ +# The number of milliseconds of each tick +tickTime=2000 +# The number of ticks that the initial +# synchronization phase can take +initLimit=10 +# The number of ticks that can pass between +# sending a request and getting an acknowledgement +syncLimit=5 +# the directory where the snapshot is stored. +# do not use /tmp for storage, /tmp here is just +# example sakes. +dataDir=/opt/zookeeper/tmp +# the port at which the clients will connect +clientPort=2181 +# the maximum number of client connections. +# increase this if you need to handle more clients +#maxClientCnxns=60 +# +# Be sure to read the maintenance section of the +# administrator guide before turning on autopurge. +# +# https://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance +# +# The number of snapshots to retain in dataDir +#autopurge.snapRetainCount=3 +# Purge task interval in hours +# Set to "0" to disable auto purge feature +#autopurge.purgeInterval=1 + +## Metrics Providers +# +# https://prometheus.io Metrics Exporter +#metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider +#metricsProvider.httpHost=0.0.0.0 +#metricsProvider.httpPort=7000 +#metricsProvider.exportJvmInfo=true + +server.1=master:2888:3888 +server.2=slave1:2888:3888 +server.3=slave2:2888:3888 +server.4=slave3:2888:3888 \ No newline at end of file