Fix URL encoding issues
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/4249ef32
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/4249ef32
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/4249ef32
Branch: refs/heads/master
Commit: 4249ef3229565cd810eff2f79c1c6b06013d96a0
Parents: c224e26
Author: Eugene Dzhurinsky <jdevelop@gmail.com>
Authored: Sun Dec 22 23:37:04 2013 -0500
Committer: Eugene Dzhurinsky <jdevelop@gmail.com>
Committed: Thu May 8 23:03:21 2014 -0400
----------------------------------------------------------------------
.../apache/any23/source/HTTPDocumentSource.java | 19 +++++++--
.../java/org/apache/any23/servlet/Servlet.java | 41 ++++++++++----------
2 files changed, 36 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/4249ef32/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
index 6ea2cc8..709bf5a 100644
--- a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
+++ b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
@@ -18,10 +18,13 @@
package org.apache.any23.source;
import org.apache.any23.http.HTTPClient;
+import org.apache.commons.httpclient.URI;
+import org.apache.commons.httpclient.URIException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
-import java.net.URI;
import java.net.URISyntaxException;
/**
@@ -29,6 +32,8 @@ import java.net.URISyntaxException;
*/
public class HTTPDocumentSource implements DocumentSource {
+ private static final Logger LOG = LoggerFactory.getLogger(HTTPDocumentSource.class);
+
private final HTTPClient client;
private String uri;
@@ -43,7 +48,15 @@ public class HTTPDocumentSource implements DocumentSource {
}
private String normalize(String uri) throws URISyntaxException {
- return new URI(uri).normalize().toString();
+ try {
+ URI normalized = new URI(uri, false);
+ normalized.normalize();
+ return normalized.toString();
+ } catch (URIException e) {
+ LOG.warn("Invalid uri: {}", uri);
+ LOG.error("Can not convert URL", e);
+ throw new URISyntaxException(uri, e.getMessage());
+ }
}
private void ensureOpen() throws IOException {
@@ -80,5 +93,5 @@ public class HTTPDocumentSource implements DocumentSource {
public boolean isLocal() {
return false;
}
-
+
}
http://git-wip-us.apache.org/repos/asf/any23/blob/4249ef32/service/src/main/java/org/apache/any23/servlet/Servlet.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java
index 0a968de..31f104e 100644
--- a/service/src/main/java/org/apache/any23/servlet/Servlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java
@@ -26,14 +26,16 @@ import org.apache.any23.source.ByteArrayDocumentSource;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.source.StringDocumentSource;
+import org.apache.commons.httpclient.URI;
import org.openrdf.rio.RDFFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
-import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.Pattern;
@@ -48,6 +50,8 @@ import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
*/
public class Servlet extends HttpServlet {
+ private static final Logger LOG = LoggerFactory.getLogger(Servlet.class);
+
public static final String DEFAULT_BASE_URI = "http://any23.org/tmp/";
private static final long serialVersionUID = 8207685628715421336L;
@@ -135,23 +139,17 @@ public class Servlet extends HttpServlet {
MediaRangeSpec result = Any23Negotiator.getNegotiator().getBestMatch(request.getHeader("Accept"));
if (result == null) {
return null;
- }
- else if (RDFFormat.TURTLE.hasMIMEType(result.getMediaType())) {
+ } else if (RDFFormat.TURTLE.hasMIMEType(result.getMediaType())) {
return "turtle";
- }
- else if (RDFFormat.N3.hasMIMEType(result.getMediaType())) {
+ } else if (RDFFormat.N3.hasMIMEType(result.getMediaType())) {
return "n3";
- }
- else if (RDFFormat.NQUADS.hasMIMEType(result.getMediaType())) {
+ } else if (RDFFormat.NQUADS.hasMIMEType(result.getMediaType())) {
return "nq";
- }
- else if (RDFFormat.RDFXML.hasMIMEType(result.getMediaType())) {
+ } else if (RDFFormat.RDFXML.hasMIMEType(result.getMediaType())) {
return "rdf";
- }
- else if (RDFFormat.NTRIPLES.hasMIMEType(result.getMediaType())) {
+ } else if (RDFFormat.NTRIPLES.hasMIMEType(result.getMediaType())) {
return "nt";
- }
- else {
+ } else {
return "turtle"; // shouldn't happen
}
}
@@ -220,13 +218,14 @@ public class Servlet extends HttpServlet {
}
private DocumentSource createHTTPDocumentSource(WebResponder responder, String uri, boolean
report)
- throws IOException {
+ throws IOException {
try {
if (!isValidURI(uri)) {
throw new URISyntaxException(uri, "@@@");
}
return createHTTPDocumentSource(responder.getRunner().getHTTPClient(), uri);
} catch (URISyntaxException ex) {
+ LOG.error("Invalid URI detected", ex);
responder.sendError(400, "Invalid input URI " + uri, report);
return null;
}
@@ -239,11 +238,11 @@ public class Servlet extends HttpServlet {
private boolean isValidURI(String s) {
try {
- URI uri = new URI(s);
+ URI uri = new URI(s, false);
if (!"http".equals(uri.getScheme()) && !"https".equals(uri.getScheme()))
{
return false;
}
- } catch (URISyntaxException e) {
+ } catch (Exception e) {
return false;
}
return true;
@@ -252,15 +251,15 @@ public class Servlet extends HttpServlet {
private ValidationMode getValidationMode(HttpServletRequest request) {
final String PARAMETER = "validation-mode";
final String validationMode = request.getParameter(PARAMETER);
- if(validationMode == null) return ValidationMode.None;
- if("none".equalsIgnoreCase(validationMode)) return ValidationMode.None;
- if("validate".equalsIgnoreCase(validationMode)) return ValidationMode.Validate;
- if("validate-fix".equalsIgnoreCase(validationMode)) return ValidationMode.ValidateAndFix;
+ if (validationMode == null) return ValidationMode.None;
+ if ("none".equalsIgnoreCase(validationMode)) return ValidationMode.None;
+ if ("validate".equalsIgnoreCase(validationMode)) return ValidationMode.Validate;
+ if ("validate-fix".equalsIgnoreCase(validationMode)) return ValidationMode.ValidateAndFix;
throw new IllegalArgumentException(
String.format("Invalid value '%s' for '%s' parameter.", validationMode, PARAMETER)
);
}
-
+
private ExtractionParameters getExtractionParameters(HttpServletRequest request) {
final ValidationMode mode = getValidationMode(request);
return new ExtractionParameters(DefaultConfiguration.singleton(), mode);
|