any23-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lewi...@apache.org
Subject [2/4] git commit: Fix URL encoding issues
Date Thu, 29 May 2014 20:07:10 GMT
Fix URL encoding issues


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/4249ef32
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/4249ef32
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/4249ef32

Branch: refs/heads/master
Commit: 4249ef3229565cd810eff2f79c1c6b06013d96a0
Parents: c224e26
Author: Eugene Dzhurinsky <jdevelop@gmail.com>
Authored: Sun Dec 22 23:37:04 2013 -0500
Committer: Eugene Dzhurinsky <jdevelop@gmail.com>
Committed: Thu May 8 23:03:21 2014 -0400

----------------------------------------------------------------------
 .../apache/any23/source/HTTPDocumentSource.java | 19 +++++++--
 .../java/org/apache/any23/servlet/Servlet.java  | 41 ++++++++++----------
 2 files changed, 36 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/4249ef32/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
index 6ea2cc8..709bf5a 100644
--- a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
+++ b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
@@ -18,10 +18,13 @@
 package org.apache.any23.source;
 
 import org.apache.any23.http.HTTPClient;
+import org.apache.commons.httpclient.URI;
+import org.apache.commons.httpclient.URIException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.URI;
 import java.net.URISyntaxException;
 
 /**
@@ -29,6 +32,8 @@ import java.net.URISyntaxException;
  */
 public class HTTPDocumentSource implements DocumentSource {
 
+    private static final Logger LOG = LoggerFactory.getLogger(HTTPDocumentSource.class);
+
     private final HTTPClient client;
 
     private String uri;
@@ -43,7 +48,15 @@ public class HTTPDocumentSource implements DocumentSource {
     }
 
     private String normalize(String uri) throws URISyntaxException {
-        return new URI(uri).normalize().toString();
+        try {
+            URI normalized = new URI(uri, false);
+            normalized.normalize();
+            return normalized.toString();
+        } catch (URIException e) {
+            LOG.warn("Invalid uri: {}", uri);
+            LOG.error("Can not convert URL", e);
+            throw new URISyntaxException(uri, e.getMessage());
+        }
     }
 
     private void ensureOpen() throws IOException {
@@ -80,5 +93,5 @@ public class HTTPDocumentSource implements DocumentSource {
     public boolean isLocal() {
         return false;
     }
-    
+
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/4249ef32/service/src/main/java/org/apache/any23/servlet/Servlet.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java
index 0a968de..31f104e 100644
--- a/service/src/main/java/org/apache/any23/servlet/Servlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java
@@ -26,14 +26,16 @@ import org.apache.any23.source.ByteArrayDocumentSource;
 import org.apache.any23.source.DocumentSource;
 import org.apache.any23.source.HTTPDocumentSource;
 import org.apache.any23.source.StringDocumentSource;
+import org.apache.commons.httpclient.URI;
 import org.openrdf.rio.RDFFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServlet;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 import java.io.IOException;
-import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.regex.Pattern;
 
@@ -48,6 +50,8 @@ import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
  */
 public class Servlet extends HttpServlet {
 
+    private static final Logger LOG = LoggerFactory.getLogger(Servlet.class);
+
     public static final String DEFAULT_BASE_URI = "http://any23.org/tmp/";
 
     private static final long serialVersionUID = 8207685628715421336L;
@@ -135,23 +139,17 @@ public class Servlet extends HttpServlet {
         MediaRangeSpec result = Any23Negotiator.getNegotiator().getBestMatch(request.getHeader("Accept"));
         if (result == null) {
             return null;
-        }
-        else if (RDFFormat.TURTLE.hasMIMEType(result.getMediaType())) {
+        } else if (RDFFormat.TURTLE.hasMIMEType(result.getMediaType())) {
             return "turtle";
-        }
-        else if (RDFFormat.N3.hasMIMEType(result.getMediaType())) {
+        } else if (RDFFormat.N3.hasMIMEType(result.getMediaType())) {
             return "n3";
-        }
-        else if (RDFFormat.NQUADS.hasMIMEType(result.getMediaType())) {
+        } else if (RDFFormat.NQUADS.hasMIMEType(result.getMediaType())) {
             return "nq";
-        }
-        else if (RDFFormat.RDFXML.hasMIMEType(result.getMediaType())) {
+        } else if (RDFFormat.RDFXML.hasMIMEType(result.getMediaType())) {
             return "rdf";
-        }
-        else if (RDFFormat.NTRIPLES.hasMIMEType(result.getMediaType())) {
+        } else if (RDFFormat.NTRIPLES.hasMIMEType(result.getMediaType())) {
             return "nt";
-        }
-        else {
+        } else {
             return "turtle";    // shouldn't happen
         }
     }
@@ -220,13 +218,14 @@ public class Servlet extends HttpServlet {
     }
 
     private DocumentSource createHTTPDocumentSource(WebResponder responder, String uri, boolean
report)
-    throws IOException {
+            throws IOException {
         try {
             if (!isValidURI(uri)) {
                 throw new URISyntaxException(uri, "@@@");
             }
             return createHTTPDocumentSource(responder.getRunner().getHTTPClient(), uri);
         } catch (URISyntaxException ex) {
+            LOG.error("Invalid URI detected", ex);
             responder.sendError(400, "Invalid input URI " + uri, report);
             return null;
         }
@@ -239,11 +238,11 @@ public class Servlet extends HttpServlet {
 
     private boolean isValidURI(String s) {
         try {
-            URI uri = new URI(s);
+            URI uri = new URI(s, false);
             if (!"http".equals(uri.getScheme()) && !"https".equals(uri.getScheme()))
{
                 return false;
             }
-        } catch (URISyntaxException e) {
+        } catch (Exception e) {
             return false;
         }
         return true;
@@ -252,15 +251,15 @@ public class Servlet extends HttpServlet {
     private ValidationMode getValidationMode(HttpServletRequest request) {
         final String PARAMETER = "validation-mode";
         final String validationMode = request.getParameter(PARAMETER);
-        if(validationMode == null) return ValidationMode.None;
-        if("none".equalsIgnoreCase(validationMode)) return ValidationMode.None;
-        if("validate".equalsIgnoreCase(validationMode)) return ValidationMode.Validate;
-        if("validate-fix".equalsIgnoreCase(validationMode)) return ValidationMode.ValidateAndFix;
+        if (validationMode == null) return ValidationMode.None;
+        if ("none".equalsIgnoreCase(validationMode)) return ValidationMode.None;
+        if ("validate".equalsIgnoreCase(validationMode)) return ValidationMode.Validate;
+        if ("validate-fix".equalsIgnoreCase(validationMode)) return ValidationMode.ValidateAndFix;
         throw new IllegalArgumentException(
                 String.format("Invalid value '%s' for '%s' parameter.", validationMode, PARAMETER)
         );
     }
-    
+
     private ExtractionParameters getExtractionParameters(HttpServletRequest request) {
         final ValidationMode mode = getValidationMode(request);
         return new ExtractionParameters(DefaultConfiguration.singleton(), mode);


Mime
View raw message