any23-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hansbre...@apache.org
Subject any23 git commit: ANY23-420 don't let IAE in jsonld-java crash entire extraction process
Date Thu, 22 Nov 2018 16:06:11 GMT
Repository: any23
Updated Branches:
  refs/heads/master b37d08621 -> c9e4dbe22


ANY23-420 don't let IAE in jsonld-java crash entire extraction process


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/c9e4dbe2
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/c9e4dbe2
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/c9e4dbe2

Branch: refs/heads/master
Commit: c9e4dbe226fe08f80343f6cdc810afb7aeec2754
Parents: b37d086
Author: Hans <firedrake93@gmail.com>
Authored: Thu Nov 22 10:03:53 2018 -0600
Committer: Hans <firedrake93@gmail.com>
Committed: Thu Nov 22 10:03:53 2018 -0600

----------------------------------------------------------------------
 .../any23/extractor/rdf/BaseRDFExtractor.java   | 25 +++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/c9e4dbe2/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index 767f6ee..c11aa8c 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -28,9 +28,7 @@ import org.apache.any23.extractor.IssueReport;
 import org.apache.any23.extractor.html.JsoupUtils;
 import org.eclipse.rdf4j.common.net.ParsedIRI;
 import org.eclipse.rdf4j.rio.RDFFormat;
-import org.eclipse.rdf4j.rio.RDFParseException;
 import org.eclipse.rdf4j.rio.RDFParser;
-import org.eclipse.rdf4j.rio.RDFHandlerException;
 import org.jsoup.nodes.Attribute;
 import org.jsoup.nodes.Comment;
 import org.jsoup.nodes.DataNode;
@@ -45,6 +43,8 @@ import org.jsoup.select.NodeTraversor;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
@@ -224,9 +224,10 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor
{
             }
 
             parser.parse(in, iri);
-        } catch (RDFHandlerException ex) {
-            throw new IllegalStateException("Unexpected exception.", ex);
-        } catch (RDFParseException ex) {
+        } catch (Exception ex) {
+            // ANY23-420: jsonld-java can sometimes throw IllegalArgumentException,
+            // so don't limit catch block to RDFParseExceptions
+
             Throwable cause = ex.getCause();
             if (cause instanceof JsonProcessingException) {
                 JsonProcessingException err = (JsonProcessingException)cause;
@@ -237,9 +238,21 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor
{
                     extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, err.getOriginalMessage(),
loc.getLineNr(), loc.getColumnNr());
                 }
             } else {
-                throw new ExtractionException("Error while parsing RDF document.", ex, extractionResult);
+                extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, toString(ex),
-1, -1);
             }
         }
     }
 
+    private static String toString(Throwable th) {
+        StringWriter writer = new StringWriter();
+        try (PrintWriter pw = new PrintWriter(writer)) {
+            th.printStackTrace(pw);
+        }
+        String string = writer.toString();
+        if (string.length() > 1024) {
+            return string.substring(0, 1021) + "...";
+        }
+        return string;
+    }
+
 }


Mime
View raw message