fluo-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] mikewalch closed pull request #231: Created checksums file and updated for Accumulo 2.x & Hadoop 3.x
Date Wed, 12 Sep 2018 21:19:22 GMT
mikewalch closed pull request #231: Created checksums file and updated for Accumulo 2.x &
Hadoop 3.x
URL: https://github.com/apache/fluo-muchos/pull/231
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/ansible/roles/accumulo/tasks/init-accumulo.yml b/ansible/roles/accumulo/tasks/init-accumulo.yml
index 9e45215..b2ee10b 100644
--- a/ansible/roles/accumulo/tasks/init-accumulo.yml
+++ b/ansible/roles/accumulo/tasks/init-accumulo.yml
@@ -1,5 +1,5 @@
 - name: "determine if accumulo needs to be initialized"
-  command: "{{ hadoop_prefix }}/bin/hdfs dfs -stat /accumulo"
+  command: "{{ hadoop_home }}/bin/hdfs dfs -stat /accumulo"
   register: accumulo_stat
   changed_when: accumulo_stat.rc != 0
   failed_when: accumulo_stat.rc != 0 and 'No such file or directory' not in accumulo_stat.stderr
diff --git a/ansible/roles/accumulo/tasks/main.yml b/ansible/roles/accumulo/tasks/main.yml
index e5e64de..811dc05 100644
--- a/ansible/roles/accumulo/tasks/main.yml
+++ b/ansible/roles/accumulo/tasks/main.yml
@@ -8,7 +8,18 @@
     - monitor_logger.xml
     - log4j.properties
   when: accumulo_major_version == '1'
-- name: "configure accumulo using managed templates"
+- name: "configure accumulo 2.0 configuration"
+  template: src={{ item }} dest={{ accumulo_home }}/conf/{{ item }}
+  with_items:
+    - accumulo-env.sh
+    - accumulo.properties
+    - accumulo-client.properties
+    - gc
+    - tracers
+    - masters
+    - monitor
+  when: accumulo_major_version == '2'
+- name: "configure accumulo 1.0 configuration"
   template: src={{ item }} dest={{ accumulo_home }}/conf/{{ item }}
   with_items:
     - accumulo-env.sh
@@ -18,6 +29,7 @@
     - tracers
     - masters
     - monitor
+  when: accumulo_major_version == '1'
 - name: "configure accumulo to send metrics (if metrics server exists)"
   template: src={{ item }} dest={{ accumulo_home }}/conf/{{ item }}
   with_items:
diff --git a/ansible/roles/accumulo/templates/accumulo-client.properties b/ansible/roles/accumulo/templates/accumulo-client.properties
new file mode 100644
index 0000000..985b259
--- /dev/null
+++ b/ansible/roles/accumulo/templates/accumulo-client.properties
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+################################
+## Accumulo client configuration
+################################
+
+## Name of Accumulo instance to connect to
+instance.name={{ accumulo_instance }}
+
+## Zookeeper connection information for Accumulo instance
+instance.zookeepers={{ zookeeper_connect }}
+
+## Authentication method (i.e password, kerberos, PasswordToken, KerberosToken, etc)
+auth.type=password
+
+## Accumulo principal/username for chosen authentication method
+auth.principal=root
+
+## Authentication token (ex. mypassword, /path/to/keytab)
+auth.token={{ accumulo_password }}
diff --git a/ansible/roles/accumulo/templates/accumulo-env.sh b/ansible/roles/accumulo/templates/accumulo-env.sh
index ca64e7f..d63188f 100755
--- a/ansible/roles/accumulo/templates/accumulo-env.sh
+++ b/ansible/roles/accumulo/templates/accumulo-env.sh
@@ -16,13 +16,13 @@
 # limitations under the License.
 
 export ACCUMULO_LOG_DIR={{ worker_data_dirs[0] }}/logs/accumulo
-export HADOOP_PREFIX={{ hadoop_prefix }}
-export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop"
 export ZOOKEEPER_HOME={{ zookeeper_home }}
 export JAVA_HOME={{ java_home }}
 
 {% if accumulo_major_version == '1' %}
 
+export HADOOP_PREFIX={{ hadoop_home }}
+export HADOOP_CONF_DIR="$HADOOP_PREFIX/etc/hadoop"
 export ACCUMULO_TSERVER_OPTS="-Xmx{{ accumulo_tserv_mem }} -Xms{{ accumulo_tserv_mem }}"
 export ACCUMULO_MASTER_OPTS="-Xmx256m -Xms256m"
 export ACCUMULO_MONITOR_OPTS="-Xmx128m -Xms64m"
@@ -32,17 +32,14 @@ export ACCUMULO_GENERAL_OPTS="-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancy
 export ACCUMULO_OTHER_OPTS="-Xmx256m -Xms64m"
 export ACCUMULO_KILL_CMD='kill -9 %p'
 export NUM_TSERVERS=1
+export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
 
 {% else %}
 
-CLASSPATH="$(find "$ZOOKEEPER_HOME"/ "$HADOOP_PREFIX"/share/hadoop/{common,common/lib,hdfs,mapreduce,yarn}
-maxdepth 1 -name '*.jar' \
-  -and -not -name '*slf4j*' \
-  -and -not -name '*fatjar*' \
-  -and -not -name '*-javadoc*' \
-  -and -not -name '*-sources*.jar' \
-  -and -not -name '*-test*.jar' \
-  -print0 | tr '\0' ':')$CLASSPATH"
-CLASSPATH="${conf}:${lib}/*:${HADOOP_CONF_DIR}:${CLASSPATH}"
+export HADOOP_HOME={{ hadoop_home }}
+export HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop"
+
+CLASSPATH="${conf}:${lib}/*:${HADOOP_CONF_DIR}:${ZOOKEEPER_HOME}/*:${HADOOP_HOME}/share/hadoop/client/*"
 export CLASSPATH
 
 JAVA_OPTS=("${ACCUMULO_JAVA_OPTS[@]}"
@@ -80,7 +77,6 @@ case "$cmd" in
 esac
 export JAVA_OPTS
 
+export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
 export LD_LIBRARY_PATH="${HADOOP_PREFIX}/lib/native:${LD_LIBRARY_PATH}"
 {% endif %}
-
-export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
diff --git a/ansible/roles/accumulo/templates/accumulo-site.xml b/ansible/roles/accumulo/templates/accumulo-site.xml
index 5e79942..8dec8b5 100644
--- a/ansible/roles/accumulo/templates/accumulo-site.xml
+++ b/ansible/roles/accumulo/templates/accumulo-site.xml
@@ -40,8 +40,6 @@
     <name>tserver.walog.max.size</name>
     <value>512M</value>
   </property>
-
-{% if accumulo_major_version == '1' %}
   <property>
     <name>tserver.cache.data.size</name>
     <value>{{ accumulo_dcache_size }}</value>
@@ -81,5 +79,4 @@
       $HADOOP_PREFIX/share/hadoop/yarn/lib/jersey.*.jar
     </value>
   </property>
-{% endif %}
 </configuration>
diff --git a/ansible/roles/accumulo/templates/accumulo.properties b/ansible/roles/accumulo/templates/accumulo.properties
new file mode 100644
index 0000000..44edeca
--- /dev/null
+++ b/ansible/roles/accumulo/templates/accumulo.properties
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main configuration file for Apache Accumulo. Available configuration properties
can be
+# found in the Accumulo documentation on the Accumulo project website (https://accumulo.apache.org/)
+# Link for Accumulo 2.0: https://accumulo.apache.org/docs/2.0/administration/properties
+
+## Time to wait on I/O for simple, short RPC calls
+general.rpc.timeout=240s
+
+## A secret unique to a given instnace that servers must know to communicate
+instance.secret=muchos
+
+## Sets location in HDFS where Accumulo will store data
+instance.volumes={{ hdfs_root }}/accumulo
+
+## Sets location of Zookeepers
+instance.zookeeper.host={{ zookeeper_connect }}
+
+## The durability used to write the write-ahead log
+table.durability=flush
+
+## Enables C++ in-memory data store that limits Java GC pauses
+tserver.memory.maps.native.enabled=true
+
+## Minimum number of threads to use to handle incoming requests
+tserver.server.threads.minimum=64
+
+## The maximum size for each write-ahead log
+tserver.walog.max.size=512M
diff --git a/ansible/roles/common/templates/bash_profile b/ansible/roles/common/templates/bash_profile
index b180a6c..af79087 100644
--- a/ansible/roles/common/templates/bash_profile
+++ b/ansible/roles/common/templates/bash_profile
@@ -12,8 +12,8 @@ PATH=$PATH:$HOME/bin
 PATH=$PATH:{{ accumulo_home }}/bin
 PATH=$PATH:{{ fluo_home }}/bin
 PATH=$PATH:{{ fluo_yarn_home }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/sbin
+PATH=$PATH:{{ hadoop_home }}/bin
+PATH=$PATH:{{ hadoop_home }}/sbin
 PATH=$PATH:{{ hub_home }}/bin
 PATH=$PATH:{{ maven_home }}/bin
 PATH=$PATH:{{ spark_home }}/bin
diff --git a/ansible/roles/common/templates/bashrc b/ansible/roles/common/templates/bashrc
index 425ebb0..fd6b387 100644
--- a/ansible/roles/common/templates/bashrc
+++ b/ansible/roles/common/templates/bashrc
@@ -6,9 +6,15 @@ if [ -f /etc/bashrc ]; then
 fi
 
 export JAVA_HOME={{ java_home }}
-export HADOOP_PREFIX={{ hadoop_prefix }}
-export HADOOP_HOME=$HADOOP_PREFIX
+
+{% if hadoop_major_version == '2' %}
+export HADOOP_PREFIX={{ hadoop_home }}
 export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
+{% else %}
+export HADOOP_HOME={{ hadoop_home }}
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+{% endif %}
+
 export ZOOKEEPER_HOME={{ zookeeper_home }}
 export SPARK_HOME={{ spark_home }}
 export ACCUMULO_HOME={{ accumulo_home }}
@@ -16,7 +22,7 @@ export FLUO_HOME={{ fluo_home }}
 export FLUO_YARN_HOME={{ fluo_yarn_home }}
 
 alias ssh='ssh -A'
-alias cdh='cd {{ hadoop_prefix }}'
+alias cdh='cd {{ hadoop_home }}'
 alias cdz='cd {{ zookeeper_home }}'
 alias cda='cd {{ accumulo_home }}'
 alias cdf='cd {{ fluo_home }}'
diff --git a/ansible/roles/common/templates/root_bashrc b/ansible/roles/common/templates/root_bashrc
index 500851a..0e5c2eb 100644
--- a/ansible/roles/common/templates/root_bashrc
+++ b/ansible/roles/common/templates/root_bashrc
@@ -7,9 +7,15 @@ fi
 
 # User specific aliases and functions
 export JAVA_HOME={{ java_home }}
-export HADOOP_PREFIX={{ hadoop_prefix }}
-export HADOOP_HOME=$HADOOP_PREFIX
+
+{% if hadoop_major_version == '2' %}
+export HADOOP_PREFIX={{ hadoop_home }}
 export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
+{% else %}
+export HADOOP_HOME={{ hadoop_home }}
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+{% endif %}
+
 export ZOOKEEPER_HOME={{ zookeeper_home }}
 export SPARK_HOME={{ spark_home }}
 export ACCUMULO_HOME={{ accumulo_home }}
@@ -18,8 +24,8 @@ export FLUO_HOME={{ fluo_home }}
 PATH=$JAVA_HOME/bin:$PATH
 PATH=$PATH:{{ accumulo_home }}/bin
 PATH=$PATH:{{ fluo_home }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/bin
-PATH=$PATH:{{ hadoop_prefix }}/sbin
+PATH=$PATH:{{ hadoop_home }}/bin
+PATH=$PATH:{{ hadoop_home }}/sbin
 PATH=$PATH:{{ hub_home }}/bin
 PATH=$PATH:{{ maven_home }}/bin
 PATH=$PATH:{{ spark_home }}/bin
diff --git a/ansible/roles/hadoop/tasks/main.yml b/ansible/roles/hadoop/tasks/main.yml
index e091d62..fdb63e5 100644
--- a/ansible/roles/hadoop/tasks/main.yml
+++ b/ansible/roles/hadoop/tasks/main.yml
@@ -1,7 +1,7 @@
 - name: "install hadoop tarball"
-  unarchive: src={{ tarballs_dir }}/{{ hadoop_tarball }} dest={{ install_dir }} creates={{
hadoop_prefix }} copy=yes
+  unarchive: src={{ tarballs_dir }}/{{ hadoop_tarball }} dest={{ install_dir }} creates={{
hadoop_home }} copy=yes
 - name: "configure hadoop with templates"
-  template: src={{ item }} dest={{ hadoop_prefix }}/etc/hadoop/{{ item }}
+  template: src={{ item }} dest={{ hadoop_home }}/etc/hadoop/{{ item }}
   with_items:
     - core-site.xml
     - hdfs-site.xml
@@ -9,14 +9,14 @@
     - mapred-site.xml
     - slaves
 - name: "copy spark yarn shuffle jar to hadoop lib"
-  command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_prefix
}}/share/hadoop/yarn/lib/ creates={{ hadoop_prefix }}/share/hadoop/yarn/lib/spark-{{ spark_version
}}-yarn-shuffle.jar
+  command: cp {{ spark_home }}/yarn/spark-{{ spark_version }}-yarn-shuffle.jar {{ hadoop_home
}}/share/hadoop/yarn/lib/ creates={{ hadoop_home }}/share/hadoop/yarn/lib/spark-{{ spark_version
}}-yarn-shuffle.jar
   when: "'spark' in groups"
 - name: "setup hadoop short circuit socket dir"
   file: path=/var/lib/hadoop-hdfs state=directory owner={{ cluster_user }} group={{ cluster_user
}} mode=0755
   become: yes
 - name: "Configure hadoop log dir"
   replace:
-    path: "{{ hadoop_prefix }}/etc/hadoop/hadoop-env.sh"
+    path: "{{ hadoop_home }}/etc/hadoop/hadoop-env.sh"
     regexp: '.*export\s+HADOOP_LOG_DIR.*'
     replace: "export HADOOP_LOG_DIR={{ worker_data_dirs[0] }}/logs/hadoop"
 - name: "Create hadoop log dir"
diff --git a/ansible/roles/hadoop/tasks/start-hdfs.yml b/ansible/roles/hadoop/tasks/start-hdfs.yml
index d0718f9..bb58e52 100644
--- a/ansible/roles/hadoop/tasks/start-hdfs.yml
+++ b/ansible/roles/hadoop/tasks/start-hdfs.yml
@@ -1,8 +1,8 @@
 - name: "format namenode"
-  command: "{{ hadoop_prefix }}/bin/hdfs namenode -format"
+  command: "{{ hadoop_home }}/bin/hdfs namenode -format"
   args:
     creates: "{{ worker_data_dirs[0] }}/hadoop/name"
 - name: "start hdfs"
-  command: "{{ hadoop_prefix }}/sbin/start-dfs.sh"
+  command: "{{ hadoop_home }}/sbin/start-dfs.sh"
   register: start_hdfs
   changed_when: "': starting' in start_hdfs.stdout"
diff --git a/ansible/roles/hadoop/tasks/start-yarn.yml b/ansible/roles/hadoop/tasks/start-yarn.yml
index 978fb0f..7477e87 100644
--- a/ansible/roles/hadoop/tasks/start-yarn.yml
+++ b/ansible/roles/hadoop/tasks/start-yarn.yml
@@ -1,4 +1,4 @@
 - name: "start yarn"
-  command: "{{ hadoop_prefix }}/sbin/start-yarn.sh"
+  command: "{{ hadoop_home }}/sbin/start-yarn.sh"
   register: start_yarn_result
   changed_when: start_yarn_result.stdout | search("starting (:?resource|node)manager")
diff --git a/ansible/roles/mesos/templates/mesos-slave b/ansible/roles/mesos/templates/mesos-slave
index a5e3390..9003eb0 100644
--- a/ansible/roles/mesos/templates/mesos-slave
+++ b/ansible/roles/mesos/templates/mesos-slave
@@ -1,3 +1,3 @@
 MASTER=`cat /etc/mesos/zk`
-MESOS_HADOOP_HOME={{ hadoop_prefix }}
+MESOS_HADOOP_HOME={{ hadoop_home }}
 JAVA_HOME={{ java_home }}
diff --git a/ansible/roles/spark/tasks/start-spark-history.yml b/ansible/roles/spark/tasks/start-spark-history.yml
index 85395ac..ecf67a3 100644
--- a/ansible/roles/spark/tasks/start-spark-history.yml
+++ b/ansible/roles/spark/tasks/start-spark-history.yml
@@ -1,5 +1,5 @@
 - name: "ensure spark history directory exists in hdfs" 
-  command: "{{ hadoop_prefix}}/bin/hdfs dfs -mkdir -p /spark/history"
+  command: "{{ hadoop_home}}/bin/hdfs dfs -mkdir -p /spark/history"
   register: mk_hist_dir
   changed_when: mk_hist_dir.rc != 0
 - name: "start spark history server"
diff --git a/ansible/roles/spark/templates/spark-env.sh b/ansible/roles/spark/templates/spark-env.sh
index 477f242..f190eb7 100755
--- a/ansible/roles/spark/templates/spark-env.sh
+++ b/ansible/roles/spark/templates/spark-env.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 
-export SPARK_DIST_CLASSPATH=$({{ hadoop_prefix }}/bin/hadoop classpath)
-export HADOOP_CONF_DIR={{ hadoop_prefix }}/etc/hadoop
+export SPARK_DIST_CLASSPATH=$({{ hadoop_home }}/bin/hadoop classpath)
+export HADOOP_CONF_DIR={{ hadoop_home }}/etc/hadoop
diff --git a/ansible/wipe.yml b/ansible/wipe.yml
index 763db89..b5d81e0 100644
--- a/ansible/wipe.yml
+++ b/ansible/wipe.yml
@@ -16,7 +16,7 @@
   - name: "wipe software installation dirs"
     file: path={{ item }} state=absent
     with_items:
-      - "{{ hadoop_prefix }}"
+      - "{{ hadoop_home }}"
       - "{{ zookeeper_home }}"
       - "{{ accumulo_home }}"
       - "{{ fluo_home }}"
@@ -27,7 +27,7 @@
     file: path={{item}}/hadoop state=absent
     with_items: "{{ worker_data_dirs }}"
   - name: "remove hadoop logs"
-    shell: rm -rf {{ hadoop_prefix }}/logs/*
+    shell: rm -rf {{ hadoop_home }}/logs/*
 - hosts: zookeepers
   tasks:
   - name: "wipe zookeeper data"
diff --git a/conf/checksums b/conf/checksums
new file mode 100644
index 0000000..af0c4aa
--- /dev/null
+++ b/conf/checksums
@@ -0,0 +1,20 @@
+accumulo:1.9.2:c23c147e6abde5e6b851cf27f91b813705dc41d07c2bfea798a86abb144255d5
+accumulo:1.9.0:f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe
+accumulo:1.8.1:eba3bfe823935ca7901ea7c2bd59c84a68b9381361699c7e260bbd9191f237f4
+accumulo:1.7.4:3776dddbc2a09f4a9d7a2ae4958e212e91eb5067a124a628330edbee4e32e754
+accumulo:1.7.3:294f2f1f3fbc164b68e80cecd5a6ce5c245df804fb35ae5e03ab1c86bc9480da
+fluo:1.2.0:037f89cd2bfdaf76a1368256c52de46d6b9a85c9c1bfc776ec4447d02c813fb2
+fluo_yarn:1.0.0:c6220d35cf23127272f3b5638c44586504dc17a46f5beecdfee5027b5ff874b0
+hadoop:3.1.1:f837fe260587f71629aad1f4fb6719274e948111dc96ffc5a8e26f27deac5602
+hadoop:3.0.2:0d507aa71007b2685e292343c11c2cb90a92ea7625446b57d1fb47c5721e2f82
+hadoop:2.9.0:8d48666f29f9ade6ed2762b7a9edab177bad2c57396f43d0ffd6a269d54f6fe1
+hadoop:2.8.4:6b545972fdd73173887cdbc3e1cbd3cc72068271924edea82a0e7e653199b115
+hadoop:2.8.3:e8bf9a53337b1dca3b152b0a5b5e277dc734e76520543e525c301a050bb27eae
+hadoop:2.7.6:f2327ea93f4bc5a5d7150dee8e0ede196d3a77ff8526a7dd05a48a09aae25669
+hadoop:2.7.5:0bfc4d9b04be919be2fdf36f67fa3b4526cdbd406c512a7a1f5f1b715661f831
+hadoop:2.6.5:001ad18d4b6d0fe542b15ddadba2d092bc97df1c4d2d797381c8d12887691898
+spark:2.2.2:023b2fea378b3dd0fee2d5d1de6bfaf2d8349aefe7be97a9cbcf03bbacc428d7
+zookeeper:3.4.13:7ced798e41d2027784b8fd55c908605ad5bd94a742d5dab2506be8f94770594d
+zookeeper:3.4.12:c686f9319050565b58e642149cb9e4c9cc8c7207aacc2cb70c5c0672849594b9
+zookeeper:3.4.11:f6bd68a1c8f7c13ea4c2c99f13082d0d71ac464ffaf3bf7a365879ab6ad10e84
+zookeeper:3.4.10:7f7f5414e044ac11fee2a1e0bc225469f51fb0cdf821e67df762a43098223f27
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 64fc905..a98057b 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -33,20 +33,13 @@ network_interface=eth0
 accumulo_instance = muchos
 # Accumluo Password
 accumulo_password = secret
-# Software versions
+# Software versions (set sha-256 in conf/checksums)
 hadoop_version = 2.8.4
 zookeeper_version = 3.4.12
 spark_version = 2.2.2
 fluo_version = 1.2.0
 fluo_yarn_version = 1.0.0
 accumulo_version = 1.9.2
-# Software sha256 checksums
-hadoop_sha256 = 6b545972fdd73173887cdbc3e1cbd3cc72068271924edea82a0e7e653199b115
-zookeeper_sha256 = c686f9319050565b58e642149cb9e4c9cc8c7207aacc2cb70c5c0672849594b9
-spark_sha256 = 023b2fea378b3dd0fee2d5d1de6bfaf2d8349aefe7be97a9cbcf03bbacc428d7
-fluo_sha256 = 037f89cd2bfdaf76a1368256c52de46d6b9a85c9c1bfc776ec4447d02c813fb2
-fluo_yarn_sha256 = c6220d35cf23127272f3b5638c44586504dc17a46f5beecdfee5027b5ff874b0
-accumulo_sha256 = c23c147e6abde5e6b851cf27f91b813705dc41d07c2bfea798a86abb144255d5
 
 [ec2]
 # AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
diff --git a/lib/muchos/config.py b/lib/muchos/config.py
index 4d643b1..2aa8996 100644
--- a/lib/muchos/config.py
+++ b/lib/muchos/config.py
@@ -22,7 +22,7 @@
 
 class DeployConfig(ConfigParser):
 
-    def __init__(self, deploy_path, config_path, hosts_path, cluster_name):
+    def __init__(self, deploy_path, config_path, hosts_path, checksums_path, cluster_name):
         ConfigParser.__init__(self)
         self.optionxform = str
         self.deploy_path = deploy_path
@@ -35,6 +35,8 @@ def __init__(self, deploy_path, config_path, hosts_path, cluster_name):
         self.metrics_drive_root = 'media-' + self.ephemeral_root
         self.node_d = None
         self.hosts = None
+        self.checksums_path = checksums_path
+        self.checksums_d = None
         self.init_nodes()
 
     def verify_config(self, action):
@@ -118,8 +120,30 @@ def metrics_drive_ids(self):
     def version(self, software_id):
         return self.get('general', software_id + '_version')
 
-    def sha256(self, software_id):
-        return self.get('general', software_id + '_sha256')
+    def checksum(self, software):
+        return self.checksum_ver(software, self.version(software))
+
+    def checksum_ver(self, software, version):
+        if not os.path.isfile(self.checksums_path):
+            exit('ERROR - A checksums file does not exist at %s' % self.hosts_path)
+
+        if not self.checksums_d:
+            self.checksums_d = {}
+            with open(self.checksums_path) as f:
+                for line in f:
+                    line = line.strip()
+                    if line.startswith("#") or not line:
+                        continue
+                    args = line.split(':')
+                    if len(args) == 3:
+                        self.checksums_d["{0}:{1}".format(args[0], args[1])] = args[2]
+                    else:
+                        exit('ERROR - Bad line %s in checksums %s' % (line, self.checksums_path))
+
+        key = "{0}:{1}".format(software, version)
+        if key not in self.checksums_d:
+            exit('ERROR - Failed to find checksums for {0} {1} in {2}' % (software, version,
self.checksums_path))
+        return self.checksums_d[key]
 
     def verify_instance_type(self, instance_type):
         if get_arch(instance_type) == 'pvm':
@@ -205,6 +229,8 @@ def parse_hosts(self):
                 else:
                     exit('ERROR - Bad line %s in hosts %s' % (line, self.hosts_path))
 
+
+
     def get_hosts(self):
         if self.hosts is None:
             self.parse_hosts()
@@ -276,9 +302,10 @@ def print_property(self, key):
   'fluo_yarn_home': '"{{ install_dir }}/fluo-yarn-{{ fluo_yarn_version }}"',
   'fluo_yarn_tarball': 'fluo-yarn-{{ fluo_yarn_version }}-bin.tar.gz',
   'fluo_yarn_version': None,
-  'hadoop_prefix': '"{{ install_dir }}/hadoop-{{ hadoop_version }}"',
+  'hadoop_home': '"{{ install_dir }}/hadoop-{{ hadoop_version }}"',
   'hadoop_tarball': 'hadoop-{{ hadoop_version }}.tar.gz',
   'hadoop_version': None,
+  'hadoop_major_version': '"{{ hadoop_version.split(\'.\')[0] }}"',
   'hdfs_root': 'hdfs://{{ groups[\'namenode\'][0] }}:8020',
   'install_dir': '"{{ cluster_basedir }}/install"',
   'java_home': '"/usr/lib/jvm/java-1.8.0-openjdk"',
@@ -303,13 +330,13 @@ def print_property(self, key):
   'accumulo_imap_size': None,
   'accumulo_sha256': None,
   'accumulo_tserv_mem': None,
+  'fluo_sha256': None,
   'fluo_worker_instances_multiplier': None,
   'fluo_worker_mem_mb': None,
   'fluo_worker_threads': None,
+  'fluo_yarn_sha256': None,
   'force_format': None,
   'fstype': None,
-  'fluo_sha256': None,
-  'fluo_yarn_sha256': None,
   'hadoop_sha256': None,
   'hub_version': '2.2.3',
   'hub_home': '"{{ install_dir }}/hub-linux-amd64-{{ hub_version }}"',
diff --git a/lib/muchos/main.py b/lib/muchos/main.py
index 49bbb4a..0d8d0f4 100644
--- a/lib/muchos/main.py
+++ b/lib/muchos/main.py
@@ -200,6 +200,13 @@ def sync(self):
                     if name in play_vars:
                         play_vars[name] = value
 
+        play_vars['accumulos_sha256'] = config.checksum('accumulo')
+        play_vars['fluo_sha256'] = config.checksum('fluo')
+        play_vars['fluo_yarn_sha256'] = config.checksum('fluo_yarn')
+        play_vars['hadoop_sha256'] = config.checksum('hadoop')
+        play_vars['spark_sha256'] = config.checksum('spark')
+        play_vars['zookeeper_sha256'] = config.checksum('zookeeper')
+
         cloud_provider = host_vars.get('cloud_provider', 'ec2')
         node_type_map = {}
         if cloud_provider == 'ec2':
@@ -427,6 +434,9 @@ def main():
     config_path = join(deploy_path, "conf/muchos.props")
     if not isfile(config_path):
         exit('ERROR - A config file does not exist at '+config_path)
+    checksums_path = join(deploy_path, "conf/checksums")
+    if not isfile(checksums_path):
+        exit('ERROR - A checksums file does not exist at '+checksums_path)
 
     hosts_dir = join(deploy_path, "conf/hosts/")
 
@@ -439,7 +449,7 @@ def main():
 
     hosts_path = join(hosts_dir, opts.cluster)
 
-    config = DeployConfig(deploy_path, config_path, hosts_path, opts.cluster)
+    config = DeployConfig(deploy_path, config_path, hosts_path, checksums_path, opts.cluster)
     config.verify_config(action)
 
     cluster = MuchosCluster(config)
diff --git a/lib/tests/test_config.py b/lib/tests/test_config.py
index 738036f..4502cd6 100644
--- a/lib/tests/test_config.py
+++ b/lib/tests/test_config.py
@@ -17,7 +17,9 @@
 
 def test_defaults():
     c = DeployConfig("muchos", '../conf/muchos.props.example', '../conf/hosts/example/example_cluster',
-                     'mycluster')
+                     '../conf/checksums', 'mycluster')
+    assert c.checksum_ver('accumulo', '1.9.0') == 'f68a6145029a9ea843b0305c90a7f5f0334d8a8ceeea94734267ec36421fe7fe'
+    assert c.checksum('accumulo') == 'c23c147e6abde5e6b851cf27f91b813705dc41d07c2bfea798a86abb144255d5'
     assert c.get('ec2', 'default_instance_type') == 'm5d.large'
     assert c.get('ec2', 'worker_instance_type') == 'm5d.large'
     assert c.get('ec2', 'aws_ami') == 'ami-9887c6e7'
@@ -61,7 +63,7 @@ def test_defaults():
 
 def test_case_sensitive():
     c = DeployConfig("muchos", '../conf/muchos.props.example', '../conf/hosts/example/example_cluster',
-                     'mycluster')
+                     '../conf/checksums', 'mycluster')
     assert c.has_option('ec2', 'default_instance_type') == True
     assert c.has_option('ec2', 'Default_instance_type') == False
     c.set('nodes', 'CamelCaseWorker', 'worker,fluo')


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message