knox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
Subject [3/4] POC work and related changes to support a Knox SSO solution
Date Wed, 10 Jul 2013 14:25:22 GMT
diff --git a/gateway-spi/src/main/java/org/apache/hadoop/gateway/services/security/token/impl/
new file mode 100644
index 0000000..cb0836d
--- /dev/null
+++ b/gateway-spi/src/main/java/org/apache/hadoop/gateway/services/security/token/impl/
@@ -0,0 +1,135 @@
+  /**
+   * Licensed to the Apache Software Foundation (ASF) under one
+   * or more contributor license agreements.  See the NOTICE file
+   * distributed with this work for additional information
+   * regarding copyright ownership.  The ASF licenses this file
+   * to you under the Apache License, Version 2.0 (the
+   * "License"); you may not use this file except in compliance
+   * with the License.  You may obtain a copy of the License at
+   *
+   *
+   *
+   * Unless required by applicable law or agreed to in writing, software
+   * distributed under the License is distributed on an "AS IS" BASIS,
+   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   * See the License for the specific language governing permissions and
+   * limitations under the License.
+   */
+import java.text.MessageFormat;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.gateway.i18n.messages.MessagesFactory;
+import com.jayway.jsonpath.JsonPath;
+public class JWTToken {
+  private static final String headerTemplate = "'{'\"alg\": \"{0}\"'}'";
+  private static final String claimTemplate = "'{'\"iss\": \"{0}\", \"prn\": \"{1}\", \"aud\":
\"{2}\", \"exp\": \"{3}\"'}'";
+  public static final String PRINCIPAL = "prn";
+  public static final String ISSUER = "iss";
+  public static final String AUDIENCE = "aud";
+  public static final String EXPIRES = "exp";
+  private static JWTProviderMessages log = MessagesFactory.get( JWTProviderMessages.class
+  public String header = null;
+  public String claims = null;
+  byte[] payload = null;
+  private JWTToken(byte[] header, byte[] claims, byte[] signature) {
+    try {
+      this.header = new String(header, "UTF-8");
+ = new String(claims, "UTF-8");
+      this.payload = signature;
+    } catch (UnsupportedEncodingException e) {
+      log.unsupportedEncoding( e );
+    }
+  }
+  public JWTToken(String alg, String[] claimsArray) {
+    MessageFormat headerFormatter = new MessageFormat(headerTemplate);
+    String[] algArray = new String[1];
+    algArray[0] = alg;
+    header = headerFormatter.format(algArray);
+    MessageFormat claimsFormatter = new MessageFormat(claimTemplate);
+    claims = claimsFormatter.format(claimsArray);
+  }
+  public String getPayloadToSign() {
+    StringBuffer sb = new StringBuffer();
+    try {
+      sb.append(Base64.encodeBase64URLSafeString(header.getBytes("UTF-8")));
+      sb.append(".");
+      sb.append(Base64.encodeBase64URLSafeString(claims.getBytes("UTF-8")));
+    } catch (UnsupportedEncodingException e) {
+      log.unsupportedEncoding( e );
+    }
+    return sb.toString();
+  }
+  public String toString() {
+    StringBuffer sb = new StringBuffer();
+    try {
+      sb.append(Base64.encodeBase64URLSafeString(header.getBytes("UTF-8")));
+      sb.append(".");
+      sb.append(Base64.encodeBase64URLSafeString(claims.getBytes("UTF-8")));
+      sb.append(".");
+      sb.append(Base64.encodeBase64URLSafeString(payload));
+    } catch (UnsupportedEncodingException e) {
+      log.unsupportedEncoding( e );
+    }
+    log.renderingJWTTokenForTheWire(sb.toString());
+    return sb.toString();
+  }
+  public void setSignaturePayload(byte[] payload) {
+    this.payload = payload;
+  }
+  public byte[] getSignaturePayload() {
+    return this.payload;
+  }
+  public static JWTToken parseToken(String wireToken) {
+    JWTToken token = null;
+    log.parsingToken(wireToken);
+    String[] parts = wireToken.split("\\.");
+    token = new JWTToken(Base64.decodeBase64(parts[0]), Base64.decodeBase64(parts[1]), Base64.decodeBase64(parts[2]));
+//    System.out.println("header: " + token.header);
+//    System.out.println("claims: " +;
+//    System.out.println("payload: " + new String(token.payload));
+    return token;
+  }
+  public String getClaim(String claimName) {
+    String claim = null;
+    claim =, "$." + claimName);
+    return claim;
+  }
+  public String getPrincipal() {
+    return getClaim(JWTToken.PRINCIPAL);
+  }
+  public String getIssuer() {
+    return getClaim(JWTToken.ISSUER);
+  }
+  public String getAudience() {
+    return getClaim(JWTToken.AUDIENCE);
+  }
+  public String getExpires() {
+    return getClaim(JWTToken.EXPIRES);
+  }
diff --git a/hsso-release/home/CHANGES b/hsso-release/home/CHANGES
new file mode 100644
index 0000000..9fdb658
--- /dev/null
+++ b/hsso-release/home/CHANGES
@@ -0,0 +1,15 @@
+Changes v0.2.0 - v0.3.0
+Changes v0.1.0 - v0.2.0
+HTTPS Support (Client side)
+Oozie Support
+Protected DataNode URL query strings
+Pluggable Identity Asserters
+Principal Mapping
+URL Rewriting Enhancements
+KnoxShell Client DSL
diff --git a/hsso-release/home/DISCLAIMER b/hsso-release/home/DISCLAIMER
new file mode 100644
index 0000000..e6af5c0
--- /dev/null
+++ b/hsso-release/home/DISCLAIMER
@@ -0,0 +1,15 @@
+Apache Knox is an effort undergoing incubation at the Apache Software
+Foundation (ASF), sponsored by the Apache Incubator PMC.
+Incubation is required of all newly accepted projects until a further review
+indicates that the infrastructure, communications, and decision making process
+have stabilized in a manner consistent with other successful ASF projects.
+While incubation status is not necessarily a reflection of the completeness
+or stability of the code, it does indicate that the project has yet to be
+fully endorsed by the ASF.
+For more information about the incubation status of the Apache Knox project you
+can go to the following page:
\ No newline at end of file
diff --git a/hsso-release/home/INSTALL b/hsso-release/home/INSTALL
new file mode 100644
index 0000000..9cab07b
--- /dev/null
+++ b/hsso-release/home/INSTALL
@@ -0,0 +1,251 @@
+  Java 1.6 or later
+Hadoop Cluster:
+  A local installation of a Hadoop Cluster is required at this time.  Hadoop
+  EC2 cluster and/or Sandbox installations are currently difficult to access
+  remotely via the Gateway. The EC2 and Sandbox limitation is caused by
+  Hadoop services running with internal IP addresses.  For the Gateway to work
+  in these cases it will need to be deployed on the EC2 cluster or Sandbox, at
+  this time.
+  The instructions that follow assume that the Gateway is *not* collocated
+  with the Hadoop clusters themselves and (most importantly) that the
+  hostnames and IP addresses of the cluster services are accessible by the
+  gateway where ever it happens to be running.
+  The Hadoop cluster should be ensured to have WebHDFS, WebHCat
+  (i.e. Templeton) and Oozie configured, deployed and running.
+Installation and Deployment Instructions
+1. Install
+     Download and extract the knox-{VERSION}.zip file into the
+     installation directory that will contain your GATEWAY_HOME
+       jar xf knox-{VERSION}.zip
+     This will create a directory 'gateway' in your current directory.
+2. Enter Gateway Home directory
+     cd gateway
+   The fully qualified name of this directory will be referenced as
+   {GATEWAY_HOME} throughout the remainder of this document.
+3. Start the demo LDAP server (ApacheDS)
+   a. First, understand that the LDAP server provided here is for demonstration
+      purposes. You may configure the LDAP specifics within the topology
+      descriptor for the cluster as described in step 5 below, in order to
+      customize what LDAP instance to use. The assumption is that most users
+      will leverage the demo LDAP server while evaluating this release and
+      should therefore continue with the instructions here in step 3.
+   b. Edit {GATEWAY_HOME}/conf/users.ldif if required and add your users and
+      groups to the file.  A number of normal Hadoop users
+      (e.g. hdfs, mapred, hcat, hive) have already been included.  Note that
+      the passwords in this file are "fictitious" and have nothing to do with
+      the actual accounts on the Hadoop cluster you are using.  There is also
+      a copy of this file in the templates directory that you can use to start
+      over if necessary.
+   c. Start the LDAP server - pointing it to the config dir where it will find
+      the users.ldif file in the conf directory.
+        java -jar bin/ldap.jar conf &
+      There are a number of log messages of the form "Created null." that can
+      safely be ignored.  Take note of the port on which it was started as this
+      needs to match later configuration.  This will create a directory named
+      '' that
+      can safely be ignored.
+4. Start the Gateway server
+     java -jar bin/server.jar
+   a. Take note of the port identified in the logging output as you will need this for
+      accessing the gateway.
+   b. The server will prompt you for the master secret (password). This secret is used
+      to secure artifacts used to secure artifacts used by the gateway server for
+      things like SSL, credential/password aliasing. This secret will have to be entered
+      at startup unless you choose to persist it. Remember this secret and keep it safe.
+      It represents the keys to the kingdom. See the Persisting the Master section for
+      more information.
+5. Configure the Gateway with the topology of your Hadoop cluster
+   a. Edit the file {GATEWAY_HOME}/deployments/sample.xml
+   b. Change the host and port in the urls of the <service> elements for
+      NAMENODE, TEMPLETON and OOZIE services to match your Hadoop cluster
+      deployment.
+   c. The default configuration contains the LDAP URL for a LDAP server.  By
+      default that file is configured to access the demo ApacheDS based LDAP
+      server and its default configuration. By default, this server listens on
+      port 33389.  Optionally, you can change the LDAP URL for the LDAP server
+      to be used for authentication.  This is set via the
+      main.ldapRealm.contextFactory.url property in the
+      <gateway><provider><authentication> section.
+   d. Save the file.  The directory {GATEWAY_HOME}/deployments is monitored
+      by the Gateway server and reacts to the discovery of a new or changed
+      cluster topology descriptor by provisioning the endpoints and required
+      filter chains to serve the needs of each cluster as described by the
+      topology file.  Note that the name of the file excluding the extension
+      is also used as the path for that cluster in the URL.  So for example
+      the sample.xml file will result in Gateway URLs of the form
+        http://{gateway-host}:{gateway-port}/gateway/sample/namenode/api/v1
+6. Test the installation and configuration of your Gateway
+   Invoke the LISTSATUS operation on HDFS represented by your configured
+   NAMENODE by using your web browser or curl:
+   curl -i -k -u hdfs:hdfs-password -X GET \
+     'https://localhost:8443/gateway/sample/namenode/api/v1/?op=LISTSTATUS'
+   The results of the above command should result in something to along the
+   lines of the output below.  The exact information returned is subject to
+   the content within HDFS in your Hadoop cluster.
+     HTTP/1.1 200 OK
+       Content-Type: application/json
+       Content-Length: 760
+       Server: Jetty(6.1.26)
+     {"FileStatuses":{"FileStatus":[
+     {"accessTime":0,"blockSize":0,"group":"hdfs","length":0,"modificationTime":1350595859762,"owner":"hdfs","pathSuffix":"apps","permission":"755","replication":0,"type":"DIRECTORY"},
+     {"accessTime":0,"blockSize":0,"group":"mapred","length":0,"modificationTime":1350595874024,"owner":"mapred","pathSuffix":"mapred","permission":"755","replication":0,"type":"DIRECTORY"},
+     {"accessTime":0,"blockSize":0,"group":"hdfs","length":0,"modificationTime":1350596040075,"owner":"hdfs","pathSuffix":"tmp","permission":"777","replication":0,"type":"DIRECTORY"},
+     {"accessTime":0,"blockSize":0,"group":"hdfs","length":0,"modificationTime":1350595857178,"owner":"hdfs","pathSuffix":"user","permission":"755","replication":0,"type":"DIRECTORY"}
+     ]}}
+   For additional information on WebHDFS, Templeton/WebHCat and Oozie
+   REST APIs, see the following URLs respectively:
+Persisting the Master
+The master secret is required to start the server. This secret is used to access secured
artifacts by the gateway
+instance. Keystore, trust stores and credential stores are all protected with the master
+You may persist the master secret by supplying the *-persist-master* switch at startup. This
will result in a
+warning indicating that persisting the secret is less secure than providing it at startup.
We do make some provisions in
+order to protect the persisted password.
+It is encrypted with AES 128 bit encryption and where possible the file permissions are set
to only be accessible by
+the user that the gateway is running as.
+After persisting the secret, ensure that the file at config/security/master has the appropriate
permissions set for your
+environment. This is probably the most important layer of defense for master secret. Do not
assume that the encryption if
+sufficient protection.
+A specific user should be created to run the gateway this will protect a persisted master
+Management of Security Artifacts
+There are a number of artifacts that are used by the gateway in ensuring the security of
wire level communications,
+access to protected resources and the encryption of sensitive data. These artifacts can be
managed from outside of
+the gateway instances or generated and populated by the gateway instance itself.
+The following is a description of how this is coordinated with both standalone (development,
demo, etc) gateway
+instances and instances as part of a cluster of gateways in mind.
+Upon start of the gateway server we:
+1. Look for an identity store at conf/security/keystores/gateway.jks. The identity store
contains the certificate
+   and private key used to represent the identity of the server for SSL connections and signtature
+	a. If there is no identity store we create one and generate a self-signed certificate for
use in standalone/demo
+   	   mode. The certificate is stored with an alias of gateway-identity.
+   	b. If there is an identity store found than we ensure that it can be loaded using the
provided master secret and
+   	   that there is an alias with called gateway-identity.
+2. Look for a credential store at conf/security/keystores/__gateway-credentials.jceks. This
credential store is used
+   to store secrets/passwords that are used by the gateway. For instance, this is where the
passphrase for accessing
+   the gateway-identity certificate is kept.
+   a. If there is no credential store found then we create one and populate it with a generated
passphrase for the alias
+      gateway-identity-passphrase. This is coordinated with the population of the self-signed
cert into the identity-store.
+   b. If a credential store is found then we ensure that it can be loaded using the provided
master secret and that the
+      expected aliases have been populated with secrets.
+Upon deployment of a Hadoop cluster topology within the gateway we:
+1. Look for a credential store for the topology. For instance, we have a sample topology
that gets deployed out of the box.
+   We look for conf/security/keystores/sample-credentials.jceks. This topology specific credential
store is used for storing
+   secrets/passwords that are used for encrypting sensitive data with topology specific keys.
+   a. If no credential store is found for the topology being deployed then one is created
for it. Population of the aliases
+      is delegated to the configured providers within the system that will require the use
of a secret for a particular
+      task. They may programmatically set the value of the secret or choose to have the value
for the specified alias
+      generated through the AliasService..
+   b. If a credential store is found then we ensure that it can be loaded with the provided
master secret and the confgured
+      providers have the opportunity to ensure that the aliases are populated and if not
to populate them.
+ By leveraging the algorithm described above we can provide a window of opportunity for management
of these artifacts in a
+ number of ways.
+ 1. Using a single gateway instance as a master instance the artifacts can be generated or
placed into the expected location
+    and then replicated across all of the slave instances before startup.
+ 2. Using an NFS mount as a central location for the artifacts would provide a single source
of truth without the need to
+    replicate them over the network. Of course, NFS mounts have their own challenges.
+Summary of Secrets to be Managed:
+1. Master secret - the same for all gateway instances in a cluster of gateways
+2. All security related artifacts are protected with the master secret
+3. Secrets used by the gateway itself are stored within the gateway credential store and
are the same across all gateway
+   instances in the cluster of gateways
+4. Secrets used by providers within cluster topologies are stored in topology specific credential
stores and are the same
+   for the same topology across the cluster of gateway instances. However, they are specific
to the topology - so secrets
+   for one hadoop cluster are different from those of another. This allows for failover from
one gateway instance to another
+   even when encryption is being used while not allowing the compromise of one encryption
key to expose the data for all clusters.
+NOTE: the SSL certificate will need special consideration depending on the type of certificate.
Wildcard certs may be able
+to be shared across all gateway instances in a cluster. When certs are dedicated to specific
machines the gateway identity
+store will not be able to be blindly replicated as hostname verification problems will ensue.
Obviously, truststores will
+need to be taken into account as well.
+Mapping Gateway URLs to Hadoop cluster URLs
+The Gateway functions much like a reverse proxy.  As such it maintains a
+mapping of URLs that are exposed externally by the Gateway to URLs that are
+provided by the Hadoop cluster.  Examples of mappings for the NameNode and
+Templeton are shown below.  These mapping are generated from the combination
+of the Gateway configuration file (i.e. {GATEWAY_HOME}/gateway-site.xml)
+and the cluster topology descriptors
+(e.g. {GATEWAY_HOME}/deployments/<cluster-name>.xml).
+  HDFS (NameNode)
+    Gateway: http://<gateway-host>:<gateway-port>/<gateway-path>/<cluster-name>/namenode/api/v1
+    Cluster: http://<namenode-host>:50070/webhdfs/v1
+  WebHCat (Templeton)
+    Gateway: http://<gateway-host>:<gateway-port>/<gateway-path>/<cluster-name>/templeton/api/v1
+    Cluster: http://<templeton-host>:50111/templeton/v1
+  Oozie
+    Gateway: http://<gateway-host>:<gateway-port>/<gateway-path>/<cluster-name>/oozie/api/v1
+    Cluster: http://<templeton-host>:11000/oozie/v1
+The values for <gateway-host>, <gateway-port>, <gateway-path> are provided
+the Gateway configuration file (i.e. {GATEWAY_HOME}/gateway-site.xml).
+The value for <cluster-name> is derived from the name of the cluster topology
+descriptor (e.g. {GATEWAY_HOME}/deployments/<cluster-name>.xml).
+The value for <namenode-host> and <templeton-host> is provided via the cluster
+topology descriptor (e.g. {GATEWAY_HOME}/deployments/<cluster-name>.xml).
+Note: The ports 50070, 50111 and 11000 are the defaults for NameNode,
+      Templeton and Oozie respectively. Their values can also be provided via
+      the cluster topology descriptor if your Hadoop cluster uses different
+      ports.
+Usage Examples
+Please see the Apache Knox Gateway website for detailed examples.
+Enabling logging
+If necessary you can enable additional logging by editing the
+file in the conf directory.  Changing the rootLogger value from ERROR to DEBUG
+will generate a large amount of debug logging.  A number of useful, more fine
+loggers are also provided in the file.
diff --git a/hsso-release/home/ISSUES b/hsso-release/home/ISSUES
new file mode 100644
index 0000000..6f43c5d
--- /dev/null
+++ b/hsso-release/home/ISSUES
@@ -0,0 +1,10 @@
+Know Issues
+The Gateway cannot be be used against either EC2 cluster unless the gateway
+is deployed within the EC2.
+If the cluster deployment descriptors in {GATEWAY_HOME}/deployments are
+incorrect, the errors logged by the gateway are overly detailed and not
+diagnostic enough.

View raw message