Author: shv
Date: Tue Jun 5 01:02:16 2012
New Revision: 1346206
URL: http://svn.apache.org/viewvc?rev=1346206&view=rev
Log:
MAPREDUCE-4240. Revert MAPREDUCE-2767. Contributed by Benoy Antony.
Added:
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/.autom4te.cfg
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/Makefile.am
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.c
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.h
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configure.ac
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/main.c
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.c
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.h
hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/tests/test-task-controller.c
hadoop/common/branches/branch-0.22/mapreduce/src/java/org/apache/hadoop/mapred/LinuxTaskController.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/ClusterWithLinuxTaskController.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/TestDebugScriptWithLinuxTaskController.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/TestJobExecutionAsDifferentUser.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/TestKillSubProcessesWithLinuxTaskController.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/TestLinuxTaskController.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/TestLocalizationWithLinuxTaskController.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/TestTrackerDistributedCacheManagerWithLinuxTaskController.java (with props)
hadoop/common/branches/branch-0.22/mapreduce/src/test/mapred/org/apache/hadoop/mapred/pipes/TestPipesAsDifferentUser.java (with props)
Modified:
hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt
hadoop/common/branches/branch-0.22/mapreduce/build.xml
Modified: hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt?rev=1346206&r1=1346205&r2=1346206&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt Tue Jun 5 01:02:16 2012
@@ -29,6 +29,10 @@ Release 0.22.1 - Unreleased
MAPREDUCE-3863. 0.22 branch mvn deploy is not publishing hadoop-streaming JAR
(Benoy Antony via cos)
+ SUBTASKS OF HADOOP-8357. Restore security in Hadoop 0.22 branch.
+
+ MAPREDUCE-4240. Revert MAPREDUCE-2767 (Benoy Antony via shv)
+
Release 0.22.0 - 2011-11-29
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-0.22/mapreduce/build.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/build.xml?rev=1346206&r1=1346205&r2=1346206&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/build.xml (original)
+++ hadoop/common/branches/branch-0.22/mapreduce/build.xml Tue Jun 5 01:02:16 2012
@@ -162,6 +162,20 @@
<property name="make.cmd" value="make"/>
<property name="findbugs.heap.size" value="512M"/>
+ <!-- task-controller properties set here -->
+ <!-- Source directory from where configure is run and files are copied
+ -->
+
+ <property name="c++.task-controller.src"
+ value="${basedir}/src/c++/task-controller" />
+ <!-- directory where autoconf files + temporary files and src is
+ stored for compilation -->
+ <property name="build.c++.task-controller"
+ value="${build.c++}/task-controller" />
+ <!-- the default install dir is build directory override it using
+ -Dtask-controller.install.dir=$HADOOP_HOME/bin -->
+ <property name="task-controller.install.dir" value="${dist.dir}/bin" />
+ <!-- end of task-controller properties -->
<!-- IVY properteis set here -->
<property name="ivy.dir" location="ivy" />
@@ -673,6 +687,8 @@
<sysproperty key="test.debug.data" value="${test.debug.data}"/>
<sysproperty key="hadoop.log.dir" value="@{test.dir}/logs"/>
<sysproperty key="test.src.dir" value="@{fileset.dir}"/>
+ <sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
+ <sysproperty key="taskcontroller-ugi" value="${taskcontroller-ugi}"/>
<sysproperty key="test.build.extraconf" value="@{test.dir}/extraconf" />
<sysproperty key="hadoop.policy.file" value="hadoop-policy.xml"/>
<sysproperty key="java.library.path"
@@ -1254,6 +1270,7 @@
<fileset file="${dist.dir}/src/examples/pipes/configure"/>
<fileset file="${dist.dir}/src/c++/utils/configure"/>
<fileset file="${dist.dir}/src/c++/pipes/configure"/>
+ <fileset file="${dist.dir}/src/c++/task-controller/configure"/>
</chmod>
<chmod perm="ugo+x" type="file" parallel="false">
<fileset dir="${dist.dir}/bin"/>
@@ -1279,6 +1296,7 @@
<exclude name="${final.name}/src/examples/pipes/configure"/>
<exclude name="${final.name}/src/c++/utils/configure"/>
<exclude name="${final.name}/src/c++/pipes/configure"/>
+ <exclude name="${final.name}/src/c++/task-controller/configure"/>
<include name="${final.name}/**" />
</tarfileset>
<tarfileset dir="${build.dir}" mode="755">
@@ -1287,6 +1305,7 @@
<include name="${final.name}/src/examples/pipes/configure"/>
<include name="${final.name}/src/c++/utils/configure"/>
<include name="${final.name}/src/c++/pipes/configure"/>
+ <include name="${final.name}/src/c++/task-controller/configure"/>
</tarfileset>
</param.listofitems>
</macro_tar>
@@ -1667,6 +1686,7 @@
<exclude name="src/c++/librecordio/*"/>
<exclude name="src/c++/pipes/*"/>
<exclude name="src/c++/utils/*"/>
+ <exclude name="src/c++/task-controller/*"/>
<exclude name="src/examples/pipes/*"/>
<exclude name="src/c++/pipes/debug/*"/>
</fileset>
@@ -1758,6 +1778,9 @@
<condition property="need.c++.examples.pipes.configure">
<not> <available file="${c++.examples.pipes.src}/configure"/> </not>
</condition>
+ <condition property="need.c++.task-controller.configure">
+ <not> <available file="${c++.task-controller.src}/configure"/> </not>
+ </condition>
</target>
<target name="create-c++-utils-configure" depends="check-c++-configure"
@@ -1787,9 +1810,19 @@
</exec>
</target>
+ <target name="create-c++-task-controller-configure" depends="check-c++-configure"
+ if="need.c++.task-controller.configure">
+ <exec executable="autoreconf" dir="${c++.task-controller.src}"
+ searchpath="yes" failonerror="yes">
+ <arg value="-i"/>
+ <arg value="-f"/>
+ </exec>
+ </target>
+
<target name="create-c++-configure" depends="create-c++-utils-configure,
create-c++-pipes-configure,
- create-c++-examples-pipes-configure"
+ create-c++-examples-pipes-configure,
+ create-c++-task-controller-configure"
if="compile.c++">
</target>
@@ -2229,6 +2262,67 @@
</echo>
</target>
+ <!-- taskcontroller targets -->
+ <target name="init-task-controller-build">
+ <antcall target="create-c++-task-controller-configure" inheritAll="true"/>
+ <mkdir dir="${build.c++.task-controller}" />
+ <copy todir="${build.c++.task-controller}">
+ <fileset dir="${c++.task-controller.src}" includes="*.c"/>
+ <fileset dir="${c++.task-controller.src}" includes="*.h"/>
+ </copy>
+ <chmod file="${c++.task-controller.src}/configure" perm="ugo+x"/>
+ <condition property="task-controller.conf.dir.passed">
+ <not>
+ <equals arg1="${hadoop.conf.dir}" arg2="$${hadoop.conf.dir}"/>
+ </not>
+ </condition>
+ </target>
+ <target name="configure-task-controller" depends="init,
+ init-task-controller-build,
+ task-controller-configuration-with-confdir,
+ task-controller-configuration-with-no-confdir">
+ </target>
+ <target name="task-controller-configuration-with-confdir"
+ if="task-controller.conf.dir.passed" >
+ <exec executable="${c++.task-controller.src}/configure"
+ dir="${build.c++.task-controller}" failonerror="yes">
+ <arg value="--prefix=${task-controller.install.dir}" />
+ <arg value="--with-confdir=${hadoop.conf.dir}" />
+ </exec>
+ </target>
+ <target name="task-controller-configuration-with-no-confdir"
+ unless="task-controller.conf.dir.passed">
+ <exec executable="${c++.task-controller.src}/configure"
+ dir="${build.c++.task-controller}" failonerror="yes">
+ <arg value="--prefix=${task-controller.install.dir}" />
+ </exec>
+ </target>
+ <!--
+ * Create the installation directory.
+ * Do a make install.
+ -->
+ <target name="task-controller" depends="configure-task-controller">
+ <mkdir dir="${task-controller.install.dir}" />
+ <exec executable="${make.cmd}" dir="${build.c++.task-controller}"
+ searchpath="yes" failonerror="yes">
+ <arg value="install" />
+ </exec>
+ </target>
+ <target name="test-task-controller" depends="task-controller">
+ <copy todir="${build.c++.task-controller}" verbose="true">
+ <fileset dir="${c++.task-controller.src}" includes="tests/"/>
+ </copy>
+ <exec executable="${make.cmd}" dir="${build.c++.task-controller}"
+ searchpath="yes" failonerror="yes">
+ <arg value="clean" />
+ <arg value="test" />
+ </exec>
+ <exec executable="${build.c++.task-controller}/tests/test-task-controller"
+ dir="${build.c++.task-controller}/tests/"
+ failonerror="yes">
+ </exec>
+ </target>
+ <!-- end of task-controller targets -->
<!-- Begining of fault-injection targets-->
<import file="${test.src.dir}/aop/build/aop.xml"/>
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/.autom4te.cfg
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/.autom4te.cfg?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/.autom4te.cfg (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/.autom4te.cfg Tue Jun 5 01:02:16 2012
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# autom4te configuration for hadoop utils library
+#
+
+begin-language: "Autoheader-preselections"
+args: --no-cache
+end-language: "Autoheader-preselections"
+
+begin-language: "Automake-preselections"
+args: --no-cache
+end-language: "Automake-preselections"
+
+begin-language: "Autoreconf-preselections"
+args: --no-cache
+end-language: "Autoreconf-preselections"
+
+begin-language: "Autoconf-without-aclocal-m4"
+args: --no-cache
+end-language: "Autoconf-without-aclocal-m4"
+
+begin-language: "Autoconf"
+args: --no-cache
+end-language: "Autoconf"
+
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/Makefile.am
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/Makefile.am?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/Makefile.am (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/Makefile.am Tue Jun 5 01:02:16 2012
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ACLOCAL_AMFLAGS = -I ../utils/m4
+AM_CFLAGS = -Wall
+
+bindir = $(exec_prefix)
+
+bin_PROGRAMS = task-controller
+check_PROGRAMS = tests/test-task-controller
+TESTS = $(check_PROGRAMS)
+
+task_controller_SOURCES = main.c task-controller.c configuration.c \
+ task-controller.h
+
+tests_test_task_controller_SOURCES = tests/test-task-controller.c \
+ task-controller.c configuration.c task-controller.h
+
+test: $(check_PROGRAMS)
+ @echo Done with $<
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/configuration.c?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.c (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.c Tue Jun 5 01:02:16 2012
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "configuration.h"
+
+
+char * hadoop_conf_dir;
+
+struct configuration config={.size=0, .confdetails=NULL};
+
+//clean up method for freeing configuration
+void free_configurations() {
+ int i = 0;
+ for (i = 0; i < config.size; i++) {
+ if (config.confdetails[i]->key != NULL) {
+ free((void *)config.confdetails[i]->key);
+ }
+ if (config.confdetails[i]->value != NULL) {
+ free((void *)config.confdetails[i]->value);
+ }
+ free(config.confdetails[i]);
+ }
+ if (config.size > 0) {
+ free(config.confdetails);
+ }
+ config.size = 0;
+}
+
+//function used to load the configurations present in the secure config
+void get_configs() {
+ FILE *conf_file;
+ char *line;
+ char *equaltok;
+ char *temp_equaltok;
+ size_t linesize = 1000;
+ int size_read = 0;
+ int str_len = 0;
+ char *file_name = NULL;
+
+#ifndef HADOOP_CONF_DIR
+ str_len = strlen(CONF_FILE_PATTERN) + strlen(hadoop_conf_dir);
+ file_name = (char *) malloc(sizeof(char) * (str_len + 1));
+#else
+ str_len = strlen(CONF_FILE_PATTERN) + strlen(HADOOP_CONF_DIR);
+ file_name = (char *) malloc(sizeof(char) * (str_len + 1));
+#endif
+
+ if (file_name == NULL) {
+ fprintf(LOGFILE, "Malloc failed :Out of memory \n");
+ return;
+ }
+ memset(file_name,'\0',str_len +1);
+#ifndef HADOOP_CONF_DIR
+ snprintf(file_name,str_len, CONF_FILE_PATTERN, hadoop_conf_dir);
+#else
+ snprintf(file_name, str_len, CONF_FILE_PATTERN, HADOOP_CONF_DIR);
+#endif
+
+#ifdef DEBUG
+ fprintf(LOGFILE, "get_configs :Conf file name is : %s \n", file_name);
+#endif
+
+ //allocate space for ten configuration items.
+ config.confdetails = (struct confentry **) malloc(sizeof(struct confentry *)
+ * MAX_SIZE);
+ config.size = 0;
+ conf_file = fopen(file_name, "r");
+ if (conf_file == NULL) {
+ fprintf(LOGFILE, "Invalid conf file provided : %s \n", file_name);
+ free(file_name);
+ return;
+ }
+ while(!feof(conf_file)) {
+ line = (char *) malloc(linesize);
+ if(line == NULL) {
+ fprintf(LOGFILE, "malloc failed while reading configuration file.\n");
+ goto cleanup;
+ }
+ size_read = getline(&line,&linesize,conf_file);
+ //feof returns true only after we read past EOF.
+ //so a file with no new line, at last can reach this place
+ //if size_read returns negative check for eof condition
+ if (size_read == -1) {
+ if(!feof(conf_file)){
+ fprintf(LOGFILE, "getline returned error.\n");
+ goto cleanup;
+ }else {
+ break;
+ }
+ }
+ //trim the ending new line
+ line[strlen(line)-1] = '\0';
+ //comment line
+ if(line[0] == '#') {
+ free(line);
+ continue;
+ }
+ //tokenize first to get key and list of values.
+ //if no equals is found ignore this line, can be an empty line also
+ equaltok = strtok_r(line, "=", &temp_equaltok);
+ if(equaltok == NULL) {
+ free(line);
+ continue;
+ }
+ config.confdetails[config.size] = (struct confentry *) malloc(
+ sizeof(struct confentry));
+ if(config.confdetails[config.size] == NULL) {
+ fprintf(LOGFILE,
+ "Failed allocating memory for single configuration item\n");
+ goto cleanup;
+ }
+
+#ifdef DEBUG
+ fprintf(LOGFILE, "get_configs : Adding conf key : %s \n", equaltok);
+#endif
+
+ memset(config.confdetails[config.size], 0, sizeof(struct confentry));
+ config.confdetails[config.size]->key = (char *) malloc(
+ sizeof(char) * (strlen(equaltok)+1));
+ strcpy((char *)config.confdetails[config.size]->key, equaltok);
+ equaltok = strtok_r(NULL, "=", &temp_equaltok);
+ if (equaltok == NULL) {
+ fprintf(LOGFILE, "configuration tokenization failed \n");
+ goto cleanup;
+ }
+ //means value is commented so don't store the key
+ if(equaltok[0] == '#') {
+ free(line);
+ free((void *)config.confdetails[config.size]->key);
+ free(config.confdetails[config.size]);
+ continue;
+ }
+
+#ifdef DEBUG
+ fprintf(LOGFILE, "get_configs : Adding conf value : %s \n", equaltok);
+#endif
+
+ config.confdetails[config.size]->value = (char *) malloc(
+ sizeof(char) * (strlen(equaltok)+1));
+ strcpy((char *)config.confdetails[config.size]->value, equaltok);
+ if((config.size + 1) % MAX_SIZE == 0) {
+ config.confdetails = (struct confentry **) realloc(config.confdetails,
+ sizeof(struct confentry **) * (MAX_SIZE + config.size));
+ if (config.confdetails == NULL) {
+ fprintf(LOGFILE,
+ "Failed re-allocating memory for configuration items\n");
+ goto cleanup;
+ }
+ }
+ if(config.confdetails[config.size] )
+ config.size++;
+ free(line);
+ }
+
+ //close the file
+ fclose(conf_file);
+ //clean up allocated file name
+ free(file_name);
+ return;
+ //free spaces alloced.
+ cleanup:
+ if (line != NULL) {
+ free(line);
+ }
+ fclose(conf_file);
+ free(file_name);
+ free_configurations();
+ return;
+}
+
+/*
+ * function used to get a configuration value.
+ * The function for the first time populates the configuration details into
+ * array, next time onwards used the populated array.
+ *
+ */
+const char * get_value(const char* key) {
+ int count;
+ if (config.size == 0) {
+ get_configs();
+ }
+ if (config.size == 0) {
+ fprintf(LOGFILE, "Invalid configuration provided\n");
+ return NULL;
+ }
+ for (count = 0; count < config.size; count++) {
+ if (strcmp(config.confdetails[count]->key, key) == 0) {
+ return strdup(config.confdetails[count]->value);
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Function to return an array of values for a key.
+ * Value delimiter is assumed to be a comma.
+ */
+const char ** get_values(const char * key) {
+ const char ** toPass = NULL;
+ const char *value = get_value(key);
+ char *tempTok = NULL;
+ char *tempstr = NULL;
+ int size = 0;
+ int len;
+ //first allocate any array of 10
+ if(value != NULL) {
+ toPass = (const char **) malloc(sizeof(char *) * MAX_SIZE);
+ tempTok = strtok_r((char *)value, ",", &tempstr);
+ if (tempTok != NULL) {
+ while (1) {
+ toPass[size++] = tempTok;
+ tempTok = strtok_r(NULL, ",", &tempstr);
+ if(tempTok == NULL){
+ break;
+ }
+ if((size % MAX_SIZE) == 0) {
+ toPass = (const char **) realloc(toPass,(sizeof(char *) *
+ (MAX_SIZE * ((size/MAX_SIZE) +1))));
+ }
+ }
+ } else {
+ toPass[size] = (char *)value;
+ }
+ }
+ if(size > 0) {
+ toPass[size] = NULL;
+ }
+ return toPass;
+}
+
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.h
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/configuration.h?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.h (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configuration.h Tue Jun 5 01:02:16 2012
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#define INCREMENT_SIZE 1000
+#define MAX_SIZE 10
+
+struct confentry {
+ const char *key;
+ const char *value;
+};
+
+
+struct configuration {
+ int size;
+ struct confentry **confdetails;
+};
+
+FILE *LOGFILE;
+
+#ifdef HADOOP_CONF_DIR
+ #define CONF_FILE_PATTERN "%s/taskcontroller.cfg"
+#else
+ #define CONF_FILE_PATTERN "%s/conf/taskcontroller.cfg"
+#endif
+
+extern struct configuration config;
+//configuration file contents
+#ifndef HADOOP_CONF_DIR
+ extern char *hadoop_conf_dir;
+#endif
+//method exposed to get the configurations
+const char * get_value(const char* key);
+//method to free allocated configuration
+void free_configurations();
+
+//function to return array of values pointing to the key. Values are
+//comma seperated strings.
+const char ** get_values(const char* key);
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configure.ac
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/configure.ac?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configure.ac (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/configure.ac Tue Jun 5 01:02:16 2012
@@ -0,0 +1,68 @@
+# -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+AC_PREREQ(2.59)
+AC_INIT([task-controller],[0.1])
+
+#changing default prefix value to empty string, so that binary does not
+#gets installed within system
+AC_PREFIX_DEFAULT(.)
+
+#add new argument called -with-confdir
+AC_ARG_WITH(confdir,[--with-confdir path to hadoop conf dir])
+AC_CONFIG_SRCDIR([task-controller.h])
+AC_CONFIG_AUX_DIR([config])
+AC_CONFIG_MACRO_DIR([../utils/m4])
+AM_INIT_AUTOMAKE([subdir-objects foreign no-dist])
+
+# Checks for programs.
+AC_PROG_CC
+
+# Checks for libraries.
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([stdlib.h string.h unistd.h fcntl.h])
+
+#check for HADOOP_CONF_DIR
+
+
+if test "$with_confdir" != ""
+then
+AC_DEFINE_UNQUOTED(HADOOP_CONF_DIR, ["$with_confdir"], [Location of Hadoop configuration])
+fi
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_TYPE_PID_T
+AC_TYPE_MODE_T
+AC_TYPE_SIZE_T
+
+# Checks for library functions.
+AC_FUNC_MALLOC
+AC_FUNC_REALLOC
+AC_FUNC_CHOWN
+AC_CHECK_FUNCS([strerror memset mkdir rmdir strdup])
+
+AC_CONFIG_FILES([Makefile])
+AC_OUTPUT
+
+AC_HEADER_STDBOOL
+AC_PROG_MAKE_SET
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/main.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/main.c?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/main.c (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/main.c Tue Jun 5 01:02:16 2012
@@ -0,0 +1,260 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "task-controller.h"
+
+void open_log_file(const char *log_file) {
+ if (log_file == NULL) {
+ LOGFILE = stdout;
+ } else {
+ LOGFILE = fopen(log_file, "a");
+ if (LOGFILE == NULL) {
+ fprintf(stdout, "Unable to open LOGFILE : %s \n", log_file);
+ LOGFILE = stdout;
+ }
+ if (LOGFILE != stdout) {
+ if (chmod(log_file, S_IREAD | S_IEXEC | S_IWRITE | S_IROTH | S_IWOTH
+ | S_IRGRP | S_IWGRP) < 0) {
+ fprintf(stdout, "Unable to change permission of the log file %s \n",
+ log_file);
+ fclose(LOGFILE);
+ fprintf(stdout, "changing log file to stdout");
+ LOGFILE = stdout;
+ }
+ }
+ }
+}
+
+void display_usage(FILE *stream) {
+ fprintf(stream,
+ "Usage: task-controller [-l logfile] user command command-args\n");
+}
+
+/**
+ * Check the permissions on taskcontroller to make sure that security is
+ * promisable. For this, we need task-controller binary to
+ * * be user-owned by root
+ * * be group-owned by a configured special group.
+ * * others do not have write or execute permissions
+ * * be setuid
+ */
+int check_taskcontroller_permissions(char *executable_file) {
+
+ errno = 0;
+ char * resolved_path = (char *) canonicalize_file_name(executable_file);
+ if (resolved_path == NULL) {
+ fprintf(LOGFILE,
+ "Error resolving the canonical name for the executable : %s!",
+ strerror(errno));
+ return -1;
+ }
+
+ struct stat filestat;
+ errno = 0;
+ if (stat(resolved_path, &filestat) != 0) {
+ fprintf(LOGFILE, "Could not stat the executable : %s!.\n", strerror(errno));
+ return -1;
+ }
+
+ uid_t binary_euid = filestat.st_uid; // Binary's user owner
+ gid_t binary_egid = filestat.st_gid; // Binary's group owner
+
+ // Effective uid should be root
+ if (binary_euid != 0) {
+ fprintf(LOGFILE,
+ "The task-controller binary should be user-owned by root.\n");
+ return -1;
+ }
+
+ // Get the group entry for the special_group
+ errno = 0;
+ struct group *special_group_entry = getgrgid(binary_egid);
+ if (special_group_entry == NULL) {
+ fprintf(LOGFILE,
+ "Unable to get information for effective group of the binary : %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ char * binary_group = special_group_entry->gr_name;
+ // verify that the group name of the special group
+ // is same as the one in configuration
+ if (check_variable_against_config(TT_GROUP_KEY, binary_group) != 0) {
+ fprintf(LOGFILE,
+ "Group of the binary does not match with that in configuration\n");
+ return -1;
+ }
+
+ // check others do not have write/execute permissions
+ if ((filestat.st_mode & S_IWOTH) == S_IWOTH ||
+ (filestat.st_mode & S_IXOTH) == S_IXOTH) {
+ fprintf(LOGFILE,
+ "The task-controller binary should not have write or execute for others.\n");
+ return -1;
+ }
+
+ // Binary should be setuid executable
+ if ((filestat.st_mode & S_ISUID) != S_ISUID) {
+ fprintf(LOGFILE,
+ "The task-controller binary should be set setuid.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ int command;
+ int next_option = 0;
+ const char * job_id = NULL;
+ const char * task_id = NULL;
+ const char * tt_root = NULL;
+ const char *log_dir = NULL;
+ const char * unique_string = NULL;
+ int exit_code = 0;
+ const char * task_pid = NULL;
+ const char* const short_options = "l:";
+ const struct option long_options[] = { { "log", 1, NULL, 'l' }, { NULL, 0,
+ NULL, 0 } };
+
+ const char* log_file = NULL;
+ char * dir_to_be_deleted = NULL;
+ int conf_dir_len = 0;
+
+ char *executable_file = argv[0];
+#ifndef HADOOP_CONF_DIR
+ conf_dir_len = (strlen(executable_file) - strlen(EXEC_PATTERN)) + 1;
+ if (conf_dir_len < 1) {
+ // We didn't get an absolute path to our executable_file; bail.
+ printf("Cannot find configuration directory.\n");
+ printf("This program must be run with its full absolute path.\n");
+ return INVALID_CONF_DIR;
+ } else {
+ hadoop_conf_dir = (char *) malloc (sizeof(char) * conf_dir_len);
+ strncpy(hadoop_conf_dir, executable_file,
+ (strlen(executable_file) - strlen(EXEC_PATTERN)));
+ hadoop_conf_dir[(strlen(executable_file) - strlen(EXEC_PATTERN))] = '\0';
+ }
+#endif
+ do {
+ next_option = getopt_long(argc, argv, short_options, long_options, NULL);
+ switch (next_option) {
+ case 'l':
+ log_file = optarg;
+ default:
+ break;
+ }
+ } while (next_option != -1);
+
+ open_log_file(log_file);
+
+ if (check_taskcontroller_permissions(executable_file) != 0) {
+ fprintf(LOGFILE, "Invalid permissions on task-controller binary.\n");
+ return INVALID_TASKCONTROLLER_PERMISSIONS;
+ }
+
+ //Minimum number of arguments required to run the task-controller
+ //command-name user command tt-root
+ if (argc < 3) {
+ display_usage(stdout);
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+ //checks done for user name
+ //checks done if the user is root or not.
+ if (argv[optind] == NULL) {
+ fprintf(LOGFILE, "Invalid user name \n");
+ return INVALID_USER_NAME;
+ }
+ if (get_user_details(argv[optind]) != 0) {
+ return INVALID_USER_NAME;
+ }
+ //implicit conversion to int instead of __gid_t and __uid_t
+ if (user_detail->pw_gid == 0 || user_detail->pw_uid == 0) {
+ fprintf(LOGFILE, "Cannot run tasks as super user\n");
+ return SUPER_USER_NOT_ALLOWED_TO_RUN_TASKS;
+ }
+ optind = optind + 1;
+ command = atoi(argv[optind++]);
+
+ fprintf(LOGFILE, "main : command provided %d\n",command);
+ fprintf(LOGFILE, "main : user is %s\n", user_detail->pw_name);
+
+ switch (command) {
+ case INITIALIZE_USER:
+ exit_code = initialize_user(user_detail->pw_name);
+ break;
+ case INITIALIZE_JOB:
+ job_id = argv[optind++];
+ exit_code = initialize_job(job_id, user_detail->pw_name);
+ break;
+ case INITIALIZE_DISTRIBUTEDCACHE_FILE:
+ tt_root = argv[optind++];
+ unique_string = argv[optind++];
+ exit_code = initialize_distributed_cache_file(tt_root, unique_string,
+ user_detail->pw_name);
+ break;
+ case LAUNCH_TASK_JVM:
+ tt_root = argv[optind++];
+ job_id = argv[optind++];
+ task_id = argv[optind++];
+ exit_code
+ = run_task_as_user(user_detail->pw_name, job_id, task_id, tt_root);
+ break;
+ case INITIALIZE_TASK:
+ job_id = argv[optind++];
+ task_id = argv[optind++];
+ exit_code = initialize_task(job_id, task_id, user_detail->pw_name);
+ break;
+ case TERMINATE_TASK_JVM:
+ task_pid = argv[optind++];
+ exit_code = kill_user_task(user_detail->pw_name, task_pid, SIGTERM);
+ break;
+ case KILL_TASK_JVM:
+ task_pid = argv[optind++];
+ exit_code = kill_user_task(user_detail->pw_name, task_pid, SIGKILL);
+ break;
+ case RUN_DEBUG_SCRIPT:
+ tt_root = argv[optind++];
+ job_id = argv[optind++];
+ task_id = argv[optind++];
+ exit_code
+ = run_debug_script_as_user(user_detail->pw_name, job_id, task_id, tt_root);
+ break;
+ case SIGQUIT_TASK_JVM:
+ task_pid = argv[optind++];
+ exit_code = kill_user_task(user_detail->pw_name, task_pid, SIGQUIT);
+ break;
+ case ENABLE_TASK_FOR_CLEANUP:
+ tt_root = argv[optind++];
+ job_id = argv[optind++];
+ dir_to_be_deleted = argv[optind++];
+ exit_code = enable_task_for_cleanup(tt_root, user_detail->pw_name, job_id,
+ dir_to_be_deleted);
+ break;
+ case ENABLE_JOB_FOR_CLEANUP:
+ tt_root = argv[optind++];
+ job_id = argv[optind++];
+ exit_code = enable_job_for_cleanup(tt_root, user_detail->pw_name, job_id);
+ break;
+ default:
+ exit_code = INVALID_COMMAND_PROVIDED;
+ }
+ fflush(LOGFILE);
+ fclose(LOGFILE);
+ return exit_code;
+}
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.c
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/task-controller.c?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.c (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.c Tue Jun 5 01:02:16 2012
@@ -0,0 +1,1300 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "task-controller.h"
+
+//struct to store the user details
+struct passwd *user_detail = NULL;
+
+//LOGFILE
+FILE *LOGFILE;
+
+//placeholder for global cleanup operations
+void cleanup() {
+ free_configurations();
+}
+
+//change the user to passed user for executing/killing tasks
+int change_user(const char * user) {
+ if (get_user_details(user) < 0) {
+ return -1;
+ }
+
+ if(initgroups(user_detail->pw_name, user_detail->pw_gid) != 0) {
+ fprintf(LOGFILE, "unable to initgroups : %s\n", strerror(errno));
+ cleanup();
+ return SETUID_OPER_FAILED;
+ }
+
+ errno = 0;
+
+ setgid(user_detail->pw_gid);
+ if (errno != 0) {
+ fprintf(LOGFILE, "unable to setgid : %s\n", strerror(errno));
+ cleanup();
+ return SETUID_OPER_FAILED;
+ }
+
+ setegid(user_detail->pw_gid);
+ if (errno != 0) {
+ fprintf(LOGFILE, "unable to setegid : %s\n", strerror(errno));
+ cleanup();
+ return SETUID_OPER_FAILED;
+ }
+
+ setuid(user_detail->pw_uid);
+ if (errno != 0) {
+ fprintf(LOGFILE, "unable to setuid : %s\n", strerror(errno));
+ cleanup();
+ return SETUID_OPER_FAILED;
+ }
+
+ seteuid(user_detail->pw_uid);
+ if (errno != 0) {
+ fprintf(LOGFILE, "unable to seteuid : %s\n", strerror(errno));
+ cleanup();
+ return SETUID_OPER_FAILED;
+ }
+ return 0;
+}
+
+/**
+ * Checks the passed value for the variable config_key against the values in
+ * the configuration.
+ * Returns 0 if the passed value is found in the configuration,
+ * -1 otherwise
+ */
+int check_variable_against_config(const char *config_key,
+ const char *passed_value) {
+
+ if (config_key == NULL || passed_value == NULL) {
+ return -1;
+ }
+
+ int found = -1;
+
+ const char **config_value = get_values(config_key);
+
+ if (config_value == NULL) {
+ fprintf(LOGFILE, "%s is not configured.\n", config_key);
+ return -1;
+ }
+
+ char *full_config_value = (char *)get_value(config_key);
+
+ char **config_val_ptr = (char **) config_value;
+ while (*config_val_ptr != NULL) {
+ if (strcmp(*config_val_ptr, passed_value) == 0) {
+ found = 0;
+ break;
+ }
+ config_val_ptr++;
+ }
+
+ if (found != 0) {
+ fprintf(
+ LOGFILE,
+ "Invalid value passed: \
+ Configured value of %s is %s. \
+ Passed value is %s.\n",
+ config_key, full_config_value, passed_value);
+ }
+ free(full_config_value);
+ free(config_value);
+ return found;
+}
+
+/**
+ * Utility function to concatenate argB to argA using the concat_pattern
+ */
+char *concatenate(char *concat_pattern, char *return_path_name, int numArgs,
+ ...) {
+ va_list ap;
+ va_start(ap, numArgs);
+ int strlen_args = 0;
+ char *arg = NULL;
+ int j;
+ for (j = 0; j < numArgs; j++) {
+ arg = va_arg(ap, char*);
+ if (arg == NULL) {
+ fprintf(LOGFILE, "One of the arguments passed for %s in null.\n",
+ return_path_name);
+ return NULL;
+ }
+ strlen_args += strlen(arg);
+ }
+ va_end(ap);
+
+ char *return_path = NULL;
+ int str_len = strlen(concat_pattern) + strlen_args;
+
+ return_path = (char *) malloc(sizeof(char) * (str_len + 1));
+ if (return_path == NULL) {
+ fprintf(LOGFILE, "Unable to allocate memory for %s.\n", return_path_name);
+ return NULL;
+ }
+ memset(return_path, '\0', str_len + 1);
+ va_start(ap, numArgs);
+ vsnprintf(return_path, str_len, concat_pattern, ap);
+ va_end(ap);
+ return return_path;
+}
+
+/**
+ * Get the job-directory path from tt_root, user name and job-id
+ */
+char *get_job_directory(const char * tt_root, const char *user,
+ const char *jobid) {
+ return concatenate(TT_JOB_DIR_PATTERN, "job_dir_path", 3, tt_root, user,
+ jobid);
+}
+
+/**
+ * Get the user directory of a particular user
+ */
+char *get_user_directory(const char *tt_root, const char *user) {
+ return concatenate(USER_DIR_PATTERN, "user_dir_path", 2, tt_root, user);
+}
+
+/**
+ * Get the distributed cache directory for a particular user
+ */
+char *get_distributed_cache_directory(const char *tt_root, const char *user,
+ const char* unique_string) {
+ return concatenate(USER_DISTRIBUTED_CACHE_DIR_PATTERN,
+ "dist_cache_unique_path", 3, tt_root, user, unique_string);
+}
+
+char *get_job_work_directory(const char *job_dir) {
+ return concatenate(JOB_DIR_TO_JOB_WORK_PATTERN, "job_work_dir_path", 2,
+ job_dir, "");
+}
+/**
+ * Get the attempt directory for the given attempt_id
+ */
+char *get_attempt_directory(const char *job_dir, const char *attempt_id) {
+ return concatenate(JOB_DIR_TO_ATTEMPT_DIR_PATTERN, "attempt_dir_path", 2,
+ job_dir, attempt_id);
+}
+
+/*
+ * Get the path to the task launcher file which is created by the TT
+ */
+char *get_task_launcher_file(const char *job_dir, const char *attempt_dir) {
+ return concatenate(TASK_SCRIPT_PATTERN, "task_script_path", 2, job_dir,
+ attempt_dir);
+}
+
+/*
+ * Builds the full path of the dir(localTaskDir or localWorkDir)
+ * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
+ * dir_to_be_deleted : is either taskDir($taskId) OR taskWorkDir($taskId/work)
+ */
+char *get_task_dir_path(const char *tt_root, const char *user,
+ const char *jobid, const char *dir_to_be_deleted) {
+ return concatenate(TT_LOCAL_TASK_DIR_PATTERN, "task_dir_full_path", 4,
+ tt_root, user, jobid, dir_to_be_deleted);
+}
+
+/**
+ * Get the log directory for the given attempt.
+ */
+char *get_task_log_dir(const char *log_dir, const char *job_id,
+ const char *attempt_id) {
+ return concatenate(ATTEMPT_LOG_DIR_PATTERN, "task_log_dir", 3, log_dir,
+ job_id, attempt_id);
+}
+
+/**
+ * Get the log directory for the given job.
+ */
+char *get_job_log_dir(const char *log_dir, const char *job_id) {
+ return concatenate(JOB_LOG_DIR_PATTERN, "job_log_dir", 2, log_dir, job_id);
+}
+
+/**
+ * Get the job ACLs file for the given job log dir.
+ */
+char *get_job_acls_file(const char *log_dir) {
+ return concatenate(JOB_LOG_DIR_TO_JOB_ACLS_FILE_PATTERN, "job_acls_file",
+ 1, log_dir);
+}
+
+/**
+ * Function to check if the passed tt_root is present in mapreduce.cluster.local.dir
+ * the task-controller is configured with.
+ */
+int check_tt_root(const char *tt_root) {
+ return check_variable_against_config(TT_SYS_DIR_KEY, tt_root);
+}
+
+/**
+ * Function to check if the constructed path and absolute path of the task
+ * launcher file resolve to one and same. This is done so as to avoid
+ * security pitfalls because of relative path components in the file name.
+ */
+int check_path_for_relative_components(char *path) {
+ char * resolved_path = (char *) canonicalize_file_name(path);
+ if (resolved_path == NULL) {
+ fprintf(LOGFILE,
+ "Error resolving the path: %s. Passed path: %s\n",
+ strerror(errno), path);
+ return ERROR_RESOLVING_FILE_PATH;
+ }
+ if (strcmp(resolved_path, path) != 0) {
+ fprintf(LOGFILE,
+ "Relative path components in the path: %s. Resolved path: %s\n",
+ path, resolved_path);
+ free(resolved_path);
+ return RELATIVE_PATH_COMPONENTS_IN_FILE_PATH;
+ }
+ free(resolved_path);
+ return 0;
+}
+
+/**
+ * Function to change the owner/group of a given path.
+ */
+static int change_owner(const char *path, uid_t uid, gid_t gid) {
+ int exit_code = chown(path, uid, gid);
+ if (exit_code != 0) {
+ fprintf(LOGFILE, "chown %d:%d for path %s failed: %s.\n", uid, gid, path,
+ strerror(errno));
+ }
+ return exit_code;
+}
+
+/**
+ * Function to change the mode of a given path.
+ */
+static int change_mode(const char *path, mode_t mode) {
+ int exit_code = chmod(path, mode);
+ if (exit_code != 0) {
+ fprintf(LOGFILE, "chmod %d of path %s failed: %s.\n", mode, path,
+ strerror(errno));
+ }
+ return exit_code;
+}
+
+/**
+ * Function to change permissions of the given path. It does the following
+ * recursively:
+ * 1) changes the owner/group of the paths to the passed owner/group
+ * 2) changes the file permission to the passed file_mode and directory
+ * permission to the passed dir_mode
+ *
+ * should_check_ownership : boolean to enable checking of ownership of each path
+ */
+static int secure_path(const char *path, uid_t uid, gid_t gid,
+ mode_t file_mode, mode_t dir_mode, int should_check_ownership) {
+ FTS *tree = NULL; // the file hierarchy
+ FTSENT *entry = NULL; // a file in the hierarchy
+ char *paths[] = { (char *) path, NULL };//array needs to be NULL-terminated
+ int process_path = 0;
+ int dir = 0;
+ int error_code = 0;
+ int done = 0;
+
+ // Get physical locations and don't resolve the symlinks.
+ // Don't change directory while walking the directory.
+ int ftsoptions = FTS_PHYSICAL | FTS_NOCHDIR;
+
+ tree = fts_open(paths, ftsoptions, NULL);
+ if (tree == NULL) {
+ fprintf(LOGFILE,
+ "Cannot open file traversal structure for the path %s:%s.\n", path,
+ strerror(errno));
+ return -1;
+ }
+
+ while (((entry = fts_read(tree)) != NULL) && !done) {
+ dir = 0;
+ switch (entry->fts_info) {
+ case FTS_D:
+ // A directory being visited in pre-order.
+ // We change ownership of directories in post-order.
+ // so ignore the pre-order visit.
+ process_path = 0;
+ break;
+ case FTS_DC:
+ // A directory that causes a cycle in the tree
+ // We don't expect cycles, ignore.
+ process_path = 0;
+ break;
+ case FTS_DNR:
+ // A directory which cannot be read
+ // Ignore and set error code.
+ process_path = 0;
+ error_code = -1;
+ break;
+ case FTS_DOT:
+ // "." or ".."
+ process_path = 0;
+ break;
+ case FTS_F:
+ // A regular file
+ process_path = 1;
+ break;
+ case FTS_DP:
+ // A directory being visited in post-order
+ if (entry->fts_level == 0) {
+ // root directory. Done with traversing.
+ done = 1;
+ }
+ process_path = 1;
+ dir = 1;
+ break;
+ case FTS_SL:
+ // A symbolic link
+ // We don't want to change-ownership(and set-permissions) for the file/dir
+ // pointed to by any symlink.
+ process_path = 0;
+ break;
+ case FTS_SLNONE:
+ // A symbolic link with a nonexistent target
+ process_path = 0;
+ break;
+ case FTS_NS:
+ // A file for which no stat(2) information was available
+ // Ignore and set error code
+ process_path = 0;
+ error_code = -1;
+ break;
+ case FTS_ERR:
+ // An error return. Ignore and set error code.
+ process_path = 0;
+ error_code = -1;
+ break;
+ case FTS_DEFAULT:
+ // File that doesn't belong to any of the above type. Ignore.
+ process_path = 0;
+ break;
+ default:
+ // None of the above. Ignore and set error code
+ process_path = 0;
+ error_code = -1;
+ }
+
+ if (error_code != 0) {
+ break;
+ }
+ if (!process_path) {
+ continue;
+ }
+ error_code = secure_single_path(entry->fts_path, uid, gid,
+ (dir ? dir_mode : file_mode), should_check_ownership);
+
+ }
+ if (fts_close(tree) != 0) {
+ fprintf(LOGFILE, "couldn't close file traversal structure:%s.\n",
+ strerror(errno));
+ }
+ return error_code;
+}
+
+/**
+ * Function to change ownership and permissions of the given path.
+ * This call sets ownership and permissions just for the path, not recursive.
+ */
+int secure_single_path(char *path, uid_t uid, gid_t gid,
+ mode_t perm, int should_check_ownership) {
+ int error_code = 0;
+ if (should_check_ownership &&
+ (check_ownership(path, uid, gid) != 0)) {
+ fprintf(LOGFILE,
+ "Invalid file path. %s not user/group owned by the tasktracker.\n", path);
+ error_code = -1;
+ } else if (change_owner(path, uid, gid) != 0) {
+ fprintf(LOGFILE, "couldn't change the ownership of %s\n", path);
+ error_code = -3;
+ } else if (change_mode(path, perm) != 0) {
+ fprintf(LOGFILE, "couldn't change the permissions of %s\n", path);
+ error_code = -3;
+ }
+ return error_code;
+}
+
+/**
+ * Function to prepare the attempt directories for the task JVM.
+ * This is done by changing the ownership of the attempt directory recursively
+ * to the job owner. We do the following:
+ * * sudo chown user:mapred -R taskTracker/$user/jobcache/$jobid/$attemptid/
+ * * sudo chmod 2770 -R taskTracker/$user/jobcache/$jobid/$attemptid/
+ */
+int prepare_attempt_directories(const char *job_id, const char *attempt_id,
+ const char *user) {
+ if (job_id == NULL || attempt_id == NULL || user == NULL) {
+ fprintf(LOGFILE, "Either attempt_id is null or the user passed is null.\n");
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+ gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+
+ if (get_user_details(user) < 0) {
+ fprintf(LOGFILE, "Couldn't get the user details of %s.\n", user);
+ return INVALID_USER_NAME;
+ }
+
+ char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
+
+ if (local_dir == NULL) {
+ fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
+ cleanup();
+ return PREPARE_ATTEMPT_DIRECTORIES_FAILED;
+ }
+
+ char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
+#ifdef DEBUG
+ fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
+ full_local_dir_str);
+#endif
+
+ char *job_dir;
+ char *attempt_dir;
+ char **local_dir_ptr = local_dir;
+ int failed = 0;
+ while (*local_dir_ptr != NULL) {
+ job_dir = get_job_directory(*local_dir_ptr, user, job_id);
+ if (job_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get job directory for %s.\n", job_id);
+ failed = 1;
+ break;
+ }
+
+ // prepare attempt-dir in each of the mapreduce.cluster.local.dir
+ attempt_dir = get_attempt_directory(job_dir, attempt_id);
+ if (attempt_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get attempt directory for %s.\n", attempt_id);
+ failed = 1;
+ free(job_dir);
+ break;
+ }
+
+ struct stat filestat;
+ if (stat(attempt_dir, &filestat) != 0) {
+ if (errno == ENOENT) {
+#ifdef DEBUG
+ fprintf(LOGFILE,
+ "attempt_dir %s doesn't exist. Not doing anything.\n", attempt_dir);
+#endif
+ } else {
+ // stat failed because of something else!
+ fprintf(LOGFILE, "Failed to stat the attempt_dir %s\n", attempt_dir);
+ failed = 1;
+ free(attempt_dir);
+ free(job_dir);
+ break;
+ }
+ } else if (secure_path(attempt_dir, user_detail->pw_uid,
+ tasktracker_gid, S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG,
+ 1) != 0) {
+ // No setgid on files and setgid on dirs, 770
+ fprintf(LOGFILE, "Failed to secure the attempt_dir %s\n", attempt_dir);
+ failed = 1;
+ free(attempt_dir);
+ free(job_dir);
+ break;
+ }
+
+ local_dir_ptr++;
+ free(attempt_dir);
+ free(job_dir);
+ }
+ free(local_dir);
+ free(full_local_dir_str);
+
+ cleanup();
+ if (failed) {
+ return PREPARE_ATTEMPT_DIRECTORIES_FAILED;
+ }
+ return 0;
+}
+
+/**
+ * Function to prepare the job log dir(and job acls file in it) for the child.
+ * It gives the user ownership of the job's log-dir to the user and
+ * group ownership to the user running tasktracker(i.e. tt_user).
+ *
+ * * sudo chown user:mapred log-dir/userlogs/$jobid
+ * * if user is not $tt_user,
+ * * sudo chmod 2570 log-dir/userlogs/$jobid
+ * * else
+ * * sudo chmod 2770 log-dir/userlogs/$jobid
+ * * sudo chown user:mapred log-dir/userlogs/$jobid/job-acls.xml
+ * * if user is not $tt_user,
+ * * sudo chmod 2570 log-dir/userlogs/$jobid/job-acls.xml
+ * * else
+ * * sudo chmod 2770 log-dir/userlogs/$jobid/job-acls.xml
+ */
+int prepare_job_logs(const char *log_dir, const char *job_id,
+ mode_t permissions) {
+
+ char *job_log_dir = get_job_log_dir(log_dir, job_id);
+ if (job_log_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get job log directory %s.\n", job_log_dir);
+ return -1;
+ }
+
+ struct stat filestat;
+ if (stat(job_log_dir, &filestat) != 0) {
+ if (errno == ENOENT) {
+#ifdef DEBUG
+ fprintf(LOGFILE, "job_log_dir %s doesn't exist. Not doing anything.\n",
+ job_log_dir);
+#endif
+ free(job_log_dir);
+ return 0;
+ } else {
+ // stat failed because of something else!
+ fprintf(LOGFILE, "Failed to stat the job log dir %s\n", job_log_dir);
+ free(job_log_dir);
+ return -1;
+ }
+ }
+
+ gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+ // job log directory should not be set permissions recursively
+ // because, on tt restart/reinit, it would contain directories of earlier run
+ if (secure_single_path(job_log_dir, user_detail->pw_uid, tasktracker_gid,
+ S_ISGID | permissions, 1) != 0) {
+ fprintf(LOGFILE, "Failed to secure the log_dir %s\n", job_log_dir);
+ free(job_log_dir);
+ return -1;
+ }
+
+ //set ownership and permissions for job_log_dir/job-acls.xml, if exists.
+ char *job_acls_file = get_job_acls_file(job_log_dir);
+ if (job_acls_file == NULL) {
+ fprintf(LOGFILE, "Couldn't get job acls file %s.\n", job_acls_file);
+ free(job_log_dir);
+ return -1;
+ }
+
+ struct stat filestat1;
+ if (stat(job_acls_file, &filestat1) != 0) {
+ if (errno == ENOENT) {
+#ifdef DEBUG
+ fprintf(LOGFILE, "job_acls_file %s doesn't exist. Not doing anything.\n",
+ job_acls_file);
+#endif
+ free(job_acls_file);
+ free(job_log_dir);
+ return 0;
+ } else {
+ // stat failed because of something else!
+ fprintf(LOGFILE, "Failed to stat the job_acls_file %s\n", job_acls_file);
+ free(job_acls_file);
+ free(job_log_dir);
+ return -1;
+ }
+ }
+
+ if (secure_single_path(job_acls_file, user_detail->pw_uid, tasktracker_gid,
+ permissions, 1) != 0) {
+ fprintf(LOGFILE, "Failed to secure the job acls file %s\n", job_acls_file);
+ free(job_acls_file);
+ free(job_log_dir);
+ return -1;
+ }
+ free(job_acls_file);
+ free(job_log_dir);
+ return 0;
+}
+
+/**
+ * Function to prepare the task logs for the child. It gives the user
+ * ownership of the attempt's log-dir to the user and group ownership to the
+ * user running tasktracker.
+ * * sudo chown user:mapred log-dir/userlogs/$jobid/$attemptid
+ * * sudo chmod -R 2770 log-dir/userlogs/$jobid/$attemptid
+ */
+int prepare_task_logs(const char *log_dir, const char *job_id,
+ const char *task_id) {
+
+ char *task_log_dir = get_task_log_dir(log_dir, job_id, task_id);
+ if (task_log_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get task_log directory %s.\n", task_log_dir);
+ return -1;
+ }
+
+ struct stat filestat;
+ if (stat(task_log_dir, &filestat) != 0) {
+ if (errno == ENOENT) {
+ // See TaskRunner.java to see that an absent log-dir doesn't fail the task.
+#ifdef DEBUG
+ fprintf(LOGFILE, "task_log_dir %s doesn't exist. Not doing anything.\n",
+ task_log_dir);
+#endif
+ free(task_log_dir);
+ return 0;
+ } else {
+ // stat failed because of something else!
+ fprintf(LOGFILE, "Failed to stat the task_log_dir %s\n", task_log_dir);
+ free(task_log_dir);
+ return -1;
+ }
+ }
+
+ gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+ if (secure_path(task_log_dir, user_detail->pw_uid, tasktracker_gid,
+ S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG, 1) != 0) {
+ // setgid on dirs but not files, 770. As of now, there are no files though
+ fprintf(LOGFILE, "Failed to secure the log_dir %s\n", task_log_dir);
+ free(task_log_dir);
+ return -1;
+ }
+ free(task_log_dir);
+ return 0;
+}
+
+//function used to populate and user_details structure.
+int get_user_details(const char *user) {
+ if (user_detail == NULL) {
+ user_detail = getpwnam(user);
+ if (user_detail == NULL) {
+ fprintf(LOGFILE, "Invalid user\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Function to check if the TaskTracker actually owns the file.
+ * Or it has right ownership already.
+ */
+int check_ownership(char *path, uid_t uid, gid_t gid) {
+ struct stat filestat;
+ if (stat(path, &filestat) != 0) {
+ return UNABLE_TO_STAT_FILE;
+ }
+ // check user/group. User should be TaskTracker user, group can either be
+ // TaskTracker's primary group or the special group to which binary's
+ // permissions are set.
+ // Or it can be the user/group owned by uid and gid passed.
+ if ((getuid() != filestat.st_uid || (getgid() != filestat.st_gid && getegid()
+ != filestat.st_gid)) &&
+ ((uid != filestat.st_uid) || (gid != filestat.st_gid))) {
+ return FILE_NOT_OWNED_BY_TASKTRACKER;
+ }
+ return 0;
+}
+
+/**
+ * Function to initialize the user directories of a user.
+ * It does the following:
+ * * sudo chown user:mapred -R taskTracker/$user
+ * * if user is not $tt_user,
+ * * sudo chmod 2570 -R taskTracker/$user
+ * * else // user is tt_user
+ * * sudo chmod 2770 -R taskTracker/$user
+ * This is done once per every user on the TaskTracker.
+ */
+int initialize_user(const char *user) {
+
+ if (user == NULL) {
+ fprintf(LOGFILE, "user passed is null.\n");
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+ if (get_user_details(user) < 0) {
+ fprintf(LOGFILE, "Couldn't get the user details of %s", user);
+ return INVALID_USER_NAME;
+ }
+
+ gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+
+ char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
+ if (local_dir == NULL) {
+ fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
+ cleanup();
+ return INVALID_TT_ROOT;
+ }
+
+ char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
+#ifdef DEBUG
+ fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
+ full_local_dir_str);
+#endif
+
+ int is_tt_user = (user_detail->pw_uid == getuid());
+
+ // for tt_user, set 770 permissions; otherwise set 570
+ mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
+ : (S_IRUSR | S_IXUSR | S_IRWXG);
+ char *user_dir;
+ char **local_dir_ptr = local_dir;
+ int failed = 0;
+ while (*local_dir_ptr != NULL) {
+ user_dir = get_user_directory(*local_dir_ptr, user);
+ if (user_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get userdir directory for %s.\n", user);
+ failed = 1;
+ break;
+ }
+
+ struct stat filestat;
+ if (stat(user_dir, &filestat) != 0) {
+ if (errno == ENOENT) {
+#ifdef DEBUG
+ fprintf(LOGFILE, "user_dir %s doesn't exist. Not doing anything.\n",
+ user_dir);
+#endif
+ } else {
+ // stat failed because of something else!
+ fprintf(LOGFILE, "Failed to stat the user_dir %s\n",
+ user_dir);
+ failed = 1;
+ free(user_dir);
+ break;
+ }
+ } else if (secure_path(user_dir, user_detail->pw_uid,
+ tasktracker_gid, permissions, S_ISGID | permissions, 1) != 0) {
+ // No setgid on files and setgid on dirs,
+ // 770 for tt_user and 570 for any other user
+ fprintf(LOGFILE, "Failed to secure the user_dir %s\n",
+ user_dir);
+ failed = 1;
+ free(user_dir);
+ break;
+ }
+
+ local_dir_ptr++;
+ free(user_dir);
+ }
+ free(local_dir);
+ free(full_local_dir_str);
+ cleanup();
+ if (failed) {
+ return INITIALIZE_USER_FAILED;
+ }
+ return 0;
+}
+
+/**
+ * Function to prepare the job directories for the task JVM.
+ * We do the following:
+ * * sudo chown user:mapred -R taskTracker/$user/jobcache/$jobid
+ * * sudo chown user:mapred -R logs/userlogs/$jobid
+ * * if user is not $tt_user,
+ * * sudo chmod 2570 -R taskTracker/$user/jobcache/$jobid
+ * * sudo chmod 2570 -R logs/userlogs/$jobid
+ * * else // user is tt_user
+ * * sudo chmod 2770 -R taskTracker/$user/jobcache/$jobid
+ * * sudo chmod 2770 -R logs/userlogs/$jobid
+ * *
+ * * For any user, sudo chmod 2770 taskTracker/$user/jobcache/$jobid/work
+ */
+int initialize_job(const char *jobid, const char *user) {
+ if (jobid == NULL || user == NULL) {
+ fprintf(LOGFILE, "Either jobid is null or the user passed is null.\n");
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+ if (get_user_details(user) < 0) {
+ fprintf(LOGFILE, "Couldn't get the user details of %s", user);
+ return INVALID_USER_NAME;
+ }
+
+ gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+
+ char **local_dir = (char **) get_values(TT_SYS_DIR_KEY);
+ if (local_dir == NULL) {
+ fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
+ cleanup();
+ return INVALID_TT_ROOT;
+ }
+
+ char *full_local_dir_str = (char *) get_value(TT_SYS_DIR_KEY);
+#ifdef DEBUG
+ fprintf(LOGFILE, "Value from config for %s is %s.\n", TT_SYS_DIR_KEY,
+ full_local_dir_str);
+#endif
+
+ int is_tt_user = (user_detail->pw_uid == getuid());
+
+ // for tt_user, set 770 permissions; for any other user, set 570 for job-dir
+ mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
+ : (S_IRUSR | S_IXUSR | S_IRWXG);
+ char *job_dir, *job_work_dir;
+ char **local_dir_ptr = local_dir;
+ int failed = 0;
+ while (*local_dir_ptr != NULL) {
+ job_dir = get_job_directory(*local_dir_ptr, user, jobid);
+ if (job_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get job directory for %s.\n", jobid);
+ failed = 1;
+ break;
+ }
+
+ struct stat filestat;
+ if (stat(job_dir, &filestat) != 0) {
+ if (errno == ENOENT) {
+#ifdef DEBUG
+ fprintf(LOGFILE, "job_dir %s doesn't exist. Not doing anything.\n",
+ job_dir);
+#endif
+ } else {
+ // stat failed because of something else!
+ fprintf(LOGFILE, "Failed to stat the job_dir %s\n", job_dir);
+ failed = 1;
+ free(job_dir);
+ break;
+ }
+ } else if (secure_path(job_dir, user_detail->pw_uid, tasktracker_gid,
+ permissions, S_ISGID | permissions, 1) != 0) {
+ // No setgid on files and setgid on dirs,
+ // 770 for tt_user and 570 for any other user
+ fprintf(LOGFILE, "Failed to secure the job_dir %s\n", job_dir);
+ failed = 1;
+ free(job_dir);
+ break;
+ } else if (!is_tt_user) {
+ // For tt_user, we don't need this as we already set 2770 for
+ // job-work-dir because of "chmod -R" done above
+ job_work_dir = get_job_work_directory(job_dir);
+ if (job_work_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get job-work directory for %s.\n", jobid);
+ failed = 1;
+ break;
+ }
+
+ // Set 2770 on the job-work directory
+ if (stat(job_work_dir, &filestat) != 0) {
+ if (errno == ENOENT) {
+#ifdef DEBUG
+ fprintf(LOGFILE,
+ "job_work_dir %s doesn't exist. Not doing anything.\n",
+ job_work_dir);
+#endif
+ free(job_work_dir);
+ } else {
+ // stat failed because of something else!
+ fprintf(LOGFILE, "Failed to stat the job_work_dir %s\n",
+ job_work_dir);
+ failed = 1;
+ free(job_work_dir);
+ free(job_dir);
+ break;
+ }
+ } else if (change_mode(job_work_dir, S_ISGID | S_IRWXU | S_IRWXG) != 0) {
+ fprintf(LOGFILE,
+ "couldn't change the permissions of job_work_dir %s\n",
+ job_work_dir);
+ failed = 1;
+ free(job_work_dir);
+ free(job_dir);
+ break;
+ }
+ }
+
+ local_dir_ptr++;
+ free(job_dir);
+ }
+ free(local_dir);
+ free(full_local_dir_str);
+ int exit_code = 0;
+ if (failed) {
+ exit_code = INITIALIZE_JOB_FAILED;
+ goto cleanup;
+ }
+
+ char *log_dir = (char *) get_value(TT_LOG_DIR_KEY);
+ if (log_dir == NULL) {
+ fprintf(LOGFILE, "Log directory is not configured.\n");
+ exit_code = INVALID_TT_LOG_DIR;
+ goto cleanup;
+ }
+
+ if (prepare_job_logs(log_dir, jobid, permissions) != 0) {
+ fprintf(LOGFILE, "Couldn't prepare job logs directory %s for %s.\n",
+ log_dir, jobid);
+ exit_code = PREPARE_JOB_LOGS_FAILED;
+ }
+
+ cleanup:
+ // free configurations
+ cleanup();
+ if (log_dir != NULL) {
+ free(log_dir);
+ }
+ return exit_code;
+}
+
+/**
+ * Function to initialize the distributed cache file for a user.
+ * It does the following:
+ * * sudo chown user:mapred -R taskTracker/$user/distcache/<randomdir>
+ * * if user is not $tt_user,
+ * * sudo chmod 2570 -R taskTracker/$user/distcache/<randomdir>
+ * * else // user is tt_user
+ * * sudo chmod 2770 -R taskTracker/$user/distcache/<randomdir>
+ * This is done once per localization. Tasks reusing JVMs just create
+ * symbolic links themselves and so there isn't anything specific to do in
+ * that case.
+ */
+int initialize_distributed_cache_file(const char *tt_root,
+ const char *unique_string, const char *user) {
+ if (tt_root == NULL) {
+ fprintf(LOGFILE, "tt_root passed is null.\n");
+ return INVALID_ARGUMENT_NUMBER;
+ }
+ if (unique_string == NULL) {
+ fprintf(LOGFILE, "unique_string passed is null.\n");
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+ if (user == NULL) {
+ fprintf(LOGFILE, "user passed is null.\n");
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+ if (get_user_details(user) < 0) {
+ fprintf(LOGFILE, "Couldn't get the user details of %s", user);
+ return INVALID_USER_NAME;
+ }
+ //Check tt_root
+ if (check_tt_root(tt_root) < 0) {
+ fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
+ cleanup();
+ return INVALID_TT_ROOT;
+ }
+
+ // set permission on the unique directory
+ char *localized_unique_dir = get_distributed_cache_directory(tt_root, user,
+ unique_string);
+ if (localized_unique_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't get unique distcache directory for %s.\n", user);
+ cleanup();
+ return INITIALIZE_DISTCACHEFILE_FAILED;
+ }
+
+ gid_t binary_gid = getegid(); // the group permissions of the binary.
+
+ int is_tt_user = (user_detail->pw_uid == getuid());
+
+ // for tt_user, set 770 permissions; for any other user, set 570
+ mode_t permissions = is_tt_user ? (S_IRWXU | S_IRWXG)
+ : (S_IRUSR | S_IXUSR | S_IRWXG);
+ int failed = 0;
+ struct stat filestat;
+ if (stat(localized_unique_dir, &filestat) != 0) {
+ // stat on distcache failed because of something
+ fprintf(LOGFILE, "Failed to stat the localized_unique_dir %s\n",
+ localized_unique_dir);
+ failed = INITIALIZE_DISTCACHEFILE_FAILED;
+ } else if (secure_path(localized_unique_dir, user_detail->pw_uid,
+ binary_gid, permissions, S_ISGID | permissions, 1) != 0) {
+ // No setgid on files and setgid on dirs,
+ // 770 for tt_user and 570 for any other user
+ fprintf(LOGFILE, "Failed to secure the localized_unique_dir %s\n",
+ localized_unique_dir);
+ failed = INITIALIZE_DISTCACHEFILE_FAILED;
+ }
+ free(localized_unique_dir);
+ cleanup();
+ return failed;
+}
+
+/**
+ * Function used to initialize task. Prepares attempt_dir, jars_dir and
+ * log_dir to be accessible by the child
+ */
+int initialize_task(const char *jobid, const char *taskid, const char *user) {
+ int exit_code = 0;
+#ifdef DEBUG
+ fprintf(LOGFILE, "job-id passed to initialize_task : %s.\n", jobid);
+ fprintf(LOGFILE, "task-d passed to initialize_task : %s.\n", taskid);
+#endif
+
+ if (prepare_attempt_directories(jobid, taskid, user) != 0) {
+ fprintf(LOGFILE,
+ "Couldn't prepare the attempt directories for %s of user %s.\n",
+ taskid, user);
+ exit_code = PREPARE_ATTEMPT_DIRECTORIES_FAILED;
+ goto cleanup;
+ }
+
+ char *log_dir = (char *) get_value(TT_LOG_DIR_KEY);
+ if (log_dir == NULL) {
+ fprintf(LOGFILE, "Log directory is not configured.\n");
+ exit_code = INVALID_TT_LOG_DIR;
+ goto cleanup;
+ }
+
+ if (prepare_task_logs(log_dir, jobid, taskid) != 0) {
+ fprintf(LOGFILE, "Couldn't prepare task logs directory %s for %s.\n",
+ log_dir, taskid);
+ exit_code = PREPARE_TASK_LOGS_FAILED;
+ }
+
+ cleanup:
+ // free configurations
+ cleanup();
+ if (log_dir != NULL) {
+ free(log_dir);
+ }
+ return exit_code;
+}
+
+/*
+ * Function used to launch a task as the provided user.
+ */
+int run_task_as_user(const char * user, const char *jobid, const char *taskid,
+ const char *tt_root) {
+ return run_process_as_user(user, jobid, taskid, tt_root, LAUNCH_TASK_JVM);
+}
+
+/*
+ * Function that is used as a helper to launch task JVMs and debug scripts.
+ * Not meant for launching any other process. It does the following :
+ * 1) Checks if the tt_root passed is found in mapreduce.cluster.local.dir
+ * 2) Prepares attempt_dir and log_dir to be accessible by the task JVMs
+ * 3) Uses get_task_launcher_file to fetch the task script file path
+ * 4) Does an execlp on the same in order to replace the current image with
+ * task image.
+ */
+int run_process_as_user(const char * user, const char * jobid,
+const char *taskid, const char *tt_root, int command) {
+ if (command != LAUNCH_TASK_JVM && command != RUN_DEBUG_SCRIPT) {
+ return INVALID_COMMAND_PROVIDED;
+ }
+ if (jobid == NULL || taskid == NULL || tt_root == NULL) {
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+ if (command == LAUNCH_TASK_JVM) {
+ fprintf(LOGFILE, "run_process_as_user launching a JVM for task :%s.\n", taskid);
+ } else if (command == RUN_DEBUG_SCRIPT) {
+ fprintf(LOGFILE, "run_process_as_user launching a debug script for task :%s.\n", taskid);
+ }
+
+#ifdef DEBUG
+ fprintf(LOGFILE, "Job-id passed to run_process_as_user : %s.\n", jobid);
+ fprintf(LOGFILE, "task-d passed to run_process_as_user : %s.\n", taskid);
+ fprintf(LOGFILE, "tt_root passed to run_process_as_user : %s.\n", tt_root);
+#endif
+
+ //Check tt_root before switching the user, as reading configuration
+ //file requires privileged access.
+ if (check_tt_root(tt_root) < 0) {
+ fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
+ cleanup();
+ return INVALID_TT_ROOT;
+ }
+
+ int exit_code = 0;
+ char *job_dir = NULL, *task_script_path = NULL;
+
+ if (command == LAUNCH_TASK_JVM &&
+ (exit_code = initialize_task(jobid, taskid, user)) != 0) {
+ fprintf(LOGFILE, "Couldn't initialise the task %s of user %s.\n", taskid,
+ user);
+ goto cleanup;
+ }
+
+ job_dir = get_job_directory(tt_root, user, jobid);
+ if (job_dir == NULL) {
+ fprintf(LOGFILE, "Couldn't obtain job_dir for %s in %s.\n", jobid, tt_root);
+ exit_code = OUT_OF_MEMORY;
+ goto cleanup;
+ }
+
+ task_script_path = get_task_launcher_file(job_dir, taskid);
+ if (task_script_path == NULL) {
+ fprintf(LOGFILE, "Couldn't obtain task_script_path in %s.\n", job_dir);
+ exit_code = OUT_OF_MEMORY;
+ goto cleanup;
+ }
+
+ errno = 0;
+ exit_code = check_path_for_relative_components(task_script_path);
+ if(exit_code != 0) {
+ goto cleanup;
+ }
+
+ //change the user
+ fcloseall();
+ free(job_dir);
+ umask(0007);
+ if (change_user(user) != 0) {
+ exit_code = SETUID_OPER_FAILED;
+ goto cleanup;
+ }
+
+ errno = 0;
+ cleanup();
+ execlp(task_script_path, task_script_path, NULL);
+ if (errno != 0) {
+ free(task_script_path);
+ if (command == LAUNCH_TASK_JVM) {
+ fprintf(LOGFILE, "Couldn't execute the task jvm file: %s", strerror(errno));
+ exit_code = UNABLE_TO_EXECUTE_TASK_SCRIPT;
+ } else if (command == RUN_DEBUG_SCRIPT) {
+ fprintf(LOGFILE, "Couldn't execute the task debug script file: %s", strerror(errno));
+ exit_code = UNABLE_TO_EXECUTE_DEBUG_SCRIPT;
+ }
+ }
+
+ return exit_code;
+
+cleanup:
+ if (job_dir != NULL) {
+ free(job_dir);
+ }
+ if (task_script_path != NULL) {
+ free(task_script_path);
+ }
+ // free configurations
+ cleanup();
+ return exit_code;
+}
+/*
+ * Function used to launch a debug script as the provided user.
+ */
+int run_debug_script_as_user(const char * user, const char *jobid, const char *taskid,
+ const char *tt_root) {
+ return run_process_as_user(user, jobid, taskid, tt_root, RUN_DEBUG_SCRIPT);
+}
+/**
+ * Function used to terminate/kill a task launched by the user,
+ * or dump the process' stack (by sending SIGQUIT).
+ * The function sends appropriate signal to the process group
+ * specified by the task_pid.
+ */
+int kill_user_task(const char *user, const char *task_pid, int sig) {
+ int pid = 0;
+
+ if(task_pid == NULL) {
+ return INVALID_ARGUMENT_NUMBER;
+ }
+
+#ifdef DEBUG
+ fprintf(LOGFILE, "user passed to kill_user_task : %s.\n", user);
+ fprintf(LOGFILE, "task-pid passed to kill_user_task : %s.\n", task_pid);
+ fprintf(LOGFILE, "signal passed to kill_user_task : %d.\n", sig);
+#endif
+
+ pid = atoi(task_pid);
+
+ if(pid <= 0) {
+ return INVALID_TASK_PID;
+ }
+
+ fcloseall();
+ if (change_user(user) != 0) {
+ cleanup();
+ return SETUID_OPER_FAILED;
+ }
+
+ //Don't continue if the process-group is not alive anymore.
+ if(kill(-pid,0) < 0) {
+ errno = 0;
+ cleanup();
+ return 0;
+ }
+
+ if (kill(-pid, sig) < 0) {
+ if(errno != ESRCH) {
+ fprintf(LOGFILE, "Error is %s\n", strerror(errno));
+ cleanup();
+ return UNABLE_TO_KILL_TASK;
+ }
+ errno = 0;
+ }
+ cleanup();
+ return 0;
+}
+
+/**
+ * Enables the path for deletion by changing the owner, group and permissions
+ * of the specified path and all the files/directories in the path recursively.
+ * * sudo chown user:mapred -R full_path
+ * * sudo chmod 2770 -R full_path
+ * Before changing permissions, makes sure that the given path doesn't contain
+ * any relative components.
+ * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
+ * full_path : is either jobLocalDir, taskDir OR taskWorkDir that is to be
+ * deleted
+ */
+static int enable_path_for_cleanup(const char *tt_root, const char *user,
+ char *full_path) {
+ int exit_code = 0;
+ gid_t tasktracker_gid = getegid(); // the group permissions of the binary.
+
+ if (check_tt_root(tt_root) < 0) {
+ fprintf(LOGFILE, "invalid tt root passed %s\n", tt_root);
+ cleanup();
+ return INVALID_TT_ROOT;
+ }
+
+ if (full_path == NULL) {
+ fprintf(LOGFILE,
+ "Could not build the full path. Not deleting the dir %s\n",
+ full_path);
+ exit_code = UNABLE_TO_BUILD_PATH; // may be malloc failed
+ }
+ // Make sure that the path given is not having any relative components
+ else if ((exit_code = check_path_for_relative_components(full_path)) != 0) {
+ fprintf(LOGFILE,
+ "Not changing permissions. Path may contain relative components.\n",
+ full_path);
+ }
+ else if (get_user_details(user) < 0) {
+ fprintf(LOGFILE, "Couldn't get the user details of %s.\n", user);
+ exit_code = INVALID_USER_NAME;
+ }
+ else if (exit_code = secure_path(full_path, user_detail->pw_uid,
+ tasktracker_gid,
+ S_IRWXU | S_IRWXG, S_ISGID | S_IRWXU | S_IRWXG, 0) != 0) {
+ // No setgid on files and setgid on dirs, 770.
+ // set 770 permissions for user, TTgroup for all files/directories in
+ // 'full_path' recursively sothat deletion of path by TaskTracker succeeds.
+
+ fprintf(LOGFILE, "Failed to set permissions for %s\n", full_path);
+ }
+
+ if (full_path != NULL) {
+ free(full_path);
+ }
+ // free configurations
+ cleanup();
+ return exit_code;
+}
+
+/**
+ * Enables the task work-dir/local-dir path for deletion.
+ * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
+ * dir_to_be_deleted : is either taskDir OR taskWorkDir that is to be deleted
+ */
+int enable_task_for_cleanup(const char *tt_root, const char *user,
+ const char *jobid, const char *dir_to_be_deleted) {
+ char *full_path = get_task_dir_path(tt_root, user, jobid, dir_to_be_deleted);
+ return enable_path_for_cleanup(tt_root, user, full_path);
+}
+
+/**
+ * Enables the jobLocalDir for deletion.
+ * tt_root : is the base path(i.e. mapred-local-dir) sent to task-controller
+ * user : owner of the job
+ * jobid : id of the job for which the cleanup is needed.
+ */
+int enable_job_for_cleanup(const char *tt_root, const char *user,
+ const char *jobid) {
+ char *full_path = get_job_directory(tt_root, user, jobid);
+ return enable_path_for_cleanup(tt_root, user, full_path);
+}
Added: hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.h
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/c%2B%2B/task-controller/task-controller.h?rev=1346206&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.h (added)
+++ hadoop/common/branches/branch-0.22/mapreduce/src/c++/task-controller/task-controller.h Tue Jun 5 01:02:16 2012
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <pwd.h>
+#include <assert.h>
+#include <getopt.h>
+#include <sys/stat.h>
+#include <sys/signal.h>
+#include <getopt.h>
+#include <grp.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fts.h>
+
+#include "configuration.h"
+
+//command definitions
+enum command {
+ INITIALIZE_USER,
+ INITIALIZE_JOB,
+ INITIALIZE_DISTRIBUTEDCACHE_FILE,
+ LAUNCH_TASK_JVM,
+ INITIALIZE_TASK,
+ TERMINATE_TASK_JVM,
+ KILL_TASK_JVM,
+ RUN_DEBUG_SCRIPT,
+ SIGQUIT_TASK_JVM,
+ ENABLE_TASK_FOR_CLEANUP,
+ ENABLE_JOB_FOR_CLEANUP
+};
+
+enum errorcodes {
+ INVALID_ARGUMENT_NUMBER = 1,
+ INVALID_USER_NAME, //2
+ INVALID_COMMAND_PROVIDED, //3
+ SUPER_USER_NOT_ALLOWED_TO_RUN_TASKS, //4
+ INVALID_TT_ROOT, //5
+ SETUID_OPER_FAILED, //6
+ UNABLE_TO_EXECUTE_TASK_SCRIPT, //7
+ UNABLE_TO_KILL_TASK, //8
+ INVALID_TASK_PID, //9
+ ERROR_RESOLVING_FILE_PATH, //10
+ RELATIVE_PATH_COMPONENTS_IN_FILE_PATH, //11
+ UNABLE_TO_STAT_FILE, //12
+ FILE_NOT_OWNED_BY_TASKTRACKER, //13
+ PREPARE_ATTEMPT_DIRECTORIES_FAILED, //14
+ INITIALIZE_JOB_FAILED, //15
+ PREPARE_TASK_LOGS_FAILED, //16
+ INVALID_TT_LOG_DIR, //17
+ OUT_OF_MEMORY, //18
+ INITIALIZE_DISTCACHEFILE_FAILED, //19
+ INITIALIZE_USER_FAILED, //20
+ UNABLE_TO_EXECUTE_DEBUG_SCRIPT, //21
+ INVALID_CONF_DIR, //22
+ UNABLE_TO_BUILD_PATH, //23
+ INVALID_TASKCONTROLLER_PERMISSIONS, //24
+ PREPARE_JOB_LOGS_FAILED, //25
+};
+
+#define USER_DIR_PATTERN "%s/taskTracker/%s"
+
+#define TT_JOB_DIR_PATTERN USER_DIR_PATTERN"/jobcache/%s"
+
+#define USER_DISTRIBUTED_CACHE_DIR_PATTERN USER_DIR_PATTERN"/distcache/%s"
+
+#define JOB_DIR_TO_JOB_WORK_PATTERN "%s/work"
+
+#define JOB_DIR_TO_ATTEMPT_DIR_PATTERN "%s/%s"
+
+#define JOB_LOG_DIR_PATTERN "%s/userlogs/%s"
+
+#define JOB_LOG_DIR_TO_JOB_ACLS_FILE_PATTERN "%s/job-acls.xml"
+
+#define ATTEMPT_LOG_DIR_PATTERN JOB_LOG_DIR_PATTERN"/%s"
+
+#define TASK_SCRIPT_PATTERN "%s/%s/taskjvm.sh"
+
+#define TT_LOCAL_TASK_DIR_PATTERN "%s/taskTracker/%s/jobcache/%s/%s"
+
+#define TT_SYS_DIR_KEY "mapreduce.cluster.local.dir"
+
+#define TT_LOG_DIR_KEY "hadoop.log.dir"
+
+#define TT_GROUP_KEY "mapreduce.tasktracker.group"
+
+#ifndef HADOOP_CONF_DIR
+ #define EXEC_PATTERN "/bin/task-controller"
+ extern char * hadoop_conf_dir;
+#endif
+
+extern struct passwd *user_detail;
+
+extern FILE *LOGFILE;
+
+int run_task_as_user(const char * user, const char *jobid, const char *taskid,
+ const char *tt_root);
+
+int run_debug_script_as_user(const char * user, const char *jobid, const char *taskid,
+ const char *tt_root);
+
+int initialize_user(const char *user);
+
+int initialize_task(const char *jobid, const char *taskid, const char *user);
+
+int initialize_job(const char *jobid, const char *user);
+
+int initialize_distributed_cache_file(const char *tt_root,
+ const char* unique_string, const char *user);
+
+int kill_user_task(const char *user, const char *task_pid, int sig);
+
+int enable_task_for_cleanup(const char *tt_root, const char *user,
+ const char *jobid, const char *dir_to_be_deleted);
+
+int enable_job_for_cleanup(const char *tt_root, const char *user,
+ const char *jobid);
+
+int prepare_attempt_directory(const char *attempt_dir, const char *user);
+
+// The following functions are exposed for testing
+
+int check_variable_against_config(const char *config_key,
+ const char *passed_value);
+
+int get_user_details(const char *user);
+
+char *get_task_launcher_file(const char *job_dir, const char *attempt_dir);
|