any23-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lewi...@apache.org
Subject [1/9] git commit: ANY23-137 : Initial replacement of Any23 RDFA with Semargl
Date Fri, 09 May 2014 01:59:53 GMT
Repository: any23
Updated Branches:
  refs/heads/master 7934f79da -> c224e2658


ANY23-137 : Initial replacement of Any23 RDFA with Semargl

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/9f60d325
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/9f60d325
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/9f60d325

Branch: refs/heads/master
Commit: 9f60d3252fbd39cd6ea7670b43deeff0045d2b18
Parents: 43743fd
Author: Peter Ansell <p_ansell@yahoo.com>
Authored: Mon Sep 2 09:48:24 2013 +1000
Committer: Peter Ansell <p_ansell@yahoo.com>
Committed: Mon Sep 2 09:48:24 2013 +1000

----------------------------------------------------------------------
 core/pom.xml                                    |   4 +
 .../any23/extractor/rdf/RDFParserFactory.java   |  54 ++-
 .../any23/extractor/rdfa/RDFa11Extractor.java   |  78 +---
 .../any23/extractor/rdfa/RDFaExtractor.java     | 133 +-----
 .../any23/filter/IgnoreAccidentalRDFa.java      |   3 +-
 .../test/java/org/apache/any23/Any23Test.java   | 454 ++++++++++---------
 .../extractor/rdfa/XSLTStylesheetTest.java      |  84 ----
 pom.xml                                         |   6 +
 8 files changed, 314 insertions(+), 502 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 41978f9..7e83f5b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -133,6 +133,10 @@
       <groupId>org.openrdf.sesame</groupId>
       <artifactId>sesame-repository-api</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.semarglproject</groupId>
+      <artifactId>semargl-sesame</artifactId>
+    </dependency>
     <!-- END: Sesame -->
 
     <!-- BEGIN:  Apache Commons, this version is hosted in the 

http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
index 606364b..423f64f 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
@@ -28,6 +28,8 @@ import org.openrdf.rio.RDFHandlerException;
 import org.openrdf.rio.RDFParseException;
 import org.openrdf.rio.RDFParser;
 import org.openrdf.rio.Rio;
+import org.openrdf.rio.helpers.RDFaParserSettings;
+import org.openrdf.rio.helpers.RDFaVersion;
 import org.openrdf.rio.turtle.TurtleParser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -56,7 +58,7 @@ public class RDFParserFactory {
     }
 
     /**
-     * Returns a new instance of a configured {@link org.openrdf.rio.turtle.TurtleParser}.
+     * Returns a new instance of a configured TurtleParser.
      *
      * @param verifyDataType data verification enable if <code>true</code>.
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -79,7 +81,49 @@ public class RDFParserFactory {
     }
 
     /**
-     * Returns a new instance of a configured {@link org.openrdf.rio.rdfxml.RDFXMLParser}.
+     * Returns a new instance of a configured RDFaParser, set to RDFa-1.0 compatibility mode.
+     *
+     * @param verifyDataType data verification enable if <code>true</code>.
+     * @param stopAtFirstError the parser stops at first error if <code>true</code>.
+     * @param extractionContext the extraction context where the parser is used.
+     * @param extractionResult the output extraction result.
+     * @return a new instance of a configured RDFXML parser.
+     */
+    public RDFParser getRDFa10Parser(
+            final boolean verifyDataType,
+            final boolean stopAtFirstError,
+            final ExtractionContext extractionContext,
+            final ExtractionResult extractionResult
+    ) {
+        final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
+        parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_0);
+        configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
+        return parser;
+    }
+
+    /**
+     * Returns a new instance of a configured RDFaParser, set to RDFa-1.1 compatibility mode.
+     *
+     * @param verifyDataType data verification enable if <code>true</code>.
+     * @param stopAtFirstError the parser stops at first error if <code>true</code>.
+     * @param extractionContext the extraction context where the parser is used.
+     * @param extractionResult the output extraction result.
+     * @return a new instance of a configured RDFXML parser.
+     */
+    public RDFParser getRDFa11Parser(
+            final boolean verifyDataType,
+            final boolean stopAtFirstError,
+            final ExtractionContext extractionContext,
+            final ExtractionResult extractionResult
+    ) {
+        final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
+        parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_1);
+        configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
+        return parser;
+    }
+
+    /**
+     * Returns a new instance of a configured RDFXMLParser.
      *
      * @param verifyDataType data verification enable if <code>true</code>.
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -99,7 +143,7 @@ public class RDFParserFactory {
     }
 
     /**
-     * Returns a new instance of a configured {@link org.openrdf.rio.ntriples.NTriplesParser}.
+     * Returns a new instance of a configured NTriplesParser.
      *
      * @param verifyDataType data verification enable if <code>true</code>.
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -119,7 +163,7 @@ public class RDFParserFactory {
     }
 
     /**
-     * Returns a new instance of a configured {@link org.apache.any23.io.nquads.NQuadsParser}.
+     * Returns a new instance of a configured NQuadsParser.
      *
      * @param verifyDataType data verification enable if <code>true</code>.
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -139,7 +183,7 @@ public class RDFParserFactory {
     }
 
     /**
-     * Returns a new instance of a configured {@link TriXParser}.
+     * Returns a new instance of a configured TriXParser.
      *
      * @param verifyDataType data verification enable if <code>true</code>.
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.

http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
index 0a37adc..76d3fa3 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
@@ -18,91 +18,37 @@
 package org.apache.any23.extractor.rdfa;
 
 import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.extractor.ExtractionResult;
-import org.apache.any23.extractor.Extractor;
 import org.apache.any23.extractor.ExtractorDescription;
-import org.w3c.dom.Document;
-
-import java.io.IOException;
-import java.net.URL;
+import org.apache.any23.extractor.rdf.BaseRDFExtractor;
+import org.apache.any23.extractor.rdf.RDFParserFactory;
+import org.openrdf.rio.RDFParser;
 
 /**
  * {@link org.apache.any23.extractor.Extractor} implementation for
- * <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa 1.1</a> specification.
+ * <a href="http://www.w3.org/TR/rdfa-core/">RDFa 1.1</a> specification.
  *
  * @author Michele Mostarda (mostarda@fbk.eu)
  */
-public class RDFa11Extractor implements Extractor.TagSoupDOMExtractor {
-
-    private final RDFa11Parser parser;
-
-    private boolean verifyDataType;
+public class RDFa11Extractor extends BaseRDFExtractor {
 
-    private boolean stopAtFirstError;
-
-    /**
-     * Constructor, allows to specify the validation and error handling
-     * policies.
-     * 
-     * @param verifyDataType
-     *            if <code>true</code> the data types will be verified, if
-     *            <code>false</code> will be ignored.
-     * @param stopAtFirstError
-     *            if <code>true</code> the parser will stop at first parsing
-     *            error, if <code>false</code> will ignore non blocking errors.
-     */
     public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
-        this.parser = new RDFa11Parser();
-        this.verifyDataType = verifyDataType;
-        this.stopAtFirstError = stopAtFirstError;
+        super(verifyDataType, stopAtFirstError);
     }
 
-    /**
-     * Default constructor, with no verification of data types and not stop at
-     * first error.
-     */
     public RDFa11Extractor() {
         this(false, false);
     }
 
-    public boolean isVerifyDataType() {
-        return verifyDataType;
-    }
-
-    public void setVerifyDataType(boolean verifyDataType) {
-        this.verifyDataType = verifyDataType;
-    }
-
-    public boolean isStopAtFirstError() {
-        return stopAtFirstError;
-    }
-
-    public void setStopAtFirstError(boolean stopAtFirstError) {
-        this.stopAtFirstError = stopAtFirstError;
-    }
-
-    @Override
-    public void run(ExtractionParameters extractionParameters,
-            ExtractionContext extractionContext, Document in,
-            ExtractionResult out) throws IOException, ExtractionException {
-        try {
-            parser.processDocument(new URL(extractionContext.getDocumentURI()
-                    .toString()), in, out);
-        } catch (RDFa11ParserException rpe) {
-            throw new ExtractionException("Error while performing extraction.",
-                    rpe);
-        }
-    }
-
-    /**
-     * @return the {@link org.apache.any23.extractor.ExtractorDescription} of
-     *         this extractor
-     */
     @Override
     public ExtractorDescription getDescription() {
         return RDFa11ExtractorFactory.getDescriptionInstance();
     }
 
+    @Override
+    protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) {
+        return RDFParserFactory.getInstance().getRDFa11Parser(
+                isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult
+        );
+    }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
index aec0866..fc11ba8 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
@@ -17,147 +17,38 @@
 
 package org.apache.any23.extractor.rdfa;
 
-import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.rdf.BaseRDFExtractor;
 import org.apache.any23.extractor.rdf.RDFParserFactory;
-import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
 import org.openrdf.rio.RDFParser;
-import org.w3c.dom.Document;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.io.StringWriter;
 
 /**
- * Extractor for RDFa in HTML, based on Fabien Gadon's XSLT transform, found
- * <a href="http://ns.inria.fr/grddl/rdfa/">here</a>. It works by first
- * parsing the HTML using a tagsoup parser, then applies the XSLT to the
- * DOM tree, then parses the resulting RDF/XML.
+ * {@link org.apache.any23.extractor.Extractor} implementation for
+ * <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa 1.0</a> specification.
  *
- * @author Gabriele Renzi
- * @author Richard Cyganiak (richard@cyganiak.de)
+ * @author Michele Mostarda (mostarda@fbk.eu)
  */
-public class RDFaExtractor implements TagSoupDOMExtractor {
-
-    public final static String NAME = "html-rdfa";
-
-    public final static String xsltFilename =
-            DefaultConfiguration.singleton().getPropertyOrFail("any23.rdfa.extractor.xslt");
-
-    private static XSLTStylesheet xslt = null;
-
-    /**
-     * Returns a {@link XSLTStylesheet} able to distill RDFa from
-     * HTML pages.
-     *
-     * @return returns a not <code>null</code> XSLT instance.
-     */
-    public static synchronized XSLTStylesheet getXSLT() {
-        // Lazily initialized static instance, so we don't parse
-        // the XSLT unless really necessary, and only once
-        if (xslt == null) {
-            InputStream in = RDFaExtractor.class.getResourceAsStream(xsltFilename);
-            if (in == null) {
-                throw new RuntimeException("Couldn't load '" + xsltFilename +
-                        "', maybe the file is not bundled in the jar?");
-            }
-            xslt = new XSLTStylesheet(in);
-        }
-        return xslt;
-    }
-
-    private boolean verifyDataType;
+public class RDFaExtractor extends BaseRDFExtractor {
 
-    private boolean stopAtFirstError;
-
-    /**
-     * Constructor, allows to specify the validation and error handling policies.
-     *
-     * @param verifyDataType if <code>true</code> the data types will be verified,
-     *         if <code>false</code> will be ignored.
-     * @param stopAtFirstError if <code>true</code> the parser will stop at first parsing error,
-     *        if <code>false</code> will ignore non blocking errors.
-     */
     public RDFaExtractor(boolean verifyDataType, boolean stopAtFirstError) {
-        this.verifyDataType   = verifyDataType;
-        this.stopAtFirstError = stopAtFirstError;
+        super(verifyDataType, stopAtFirstError);
     }
 
-    /**
-     * Default constructor, with no verification of data types and not stop at first error.
-     */    
     public RDFaExtractor() {
         this(false, false);
     }
 
-    public boolean isVerifyDataType() {
-        return verifyDataType;
-    }
-
-    public void setVerifyDataType(boolean verifyDataType) {
-        this.verifyDataType = verifyDataType;
-    }
-
-    public boolean isStopAtFirstError() {
-        return stopAtFirstError;
-    }
-
-    public void setStopAtFirstError(boolean stopAtFirstError) {
-        this.stopAtFirstError = stopAtFirstError;
-    }
-
-    @Override
-    public void run(
-            ExtractionParameters extractionParameters,
-            ExtractionContext extractionContext,
-            Document in,
-            ExtractionResult out
-    ) throws IOException, ExtractionException {
-
-        StringWriter buffer = new StringWriter();
-        try {
-            getXSLT().applyTo(in, buffer);
-        } catch (XSLTStylesheetException xslte) {
-            throw new ExtractionException("An error occurred during the XSLT application.", xslte);
-        }
-
-        try {
-            RDFParser parser
-                    = RDFParserFactory.getInstance().getRDFXMLParser(
-                        verifyDataType, stopAtFirstError, extractionContext, out
-                    );
-            parser.parse(
-                    new StringReader(buffer.getBuffer().toString()),
-                    extractionContext.getDocumentURI().stringValue()
-            );
-        } catch (RDFHandlerException ex) {
-            throw new IllegalStateException(
-                    "Should not happen, RDFHandlerAdapter does not throw RDFHandlerException", ex
-            );
-        } catch (RDFParseException ex) {
-            throw new ExtractionException(
-                    "Invalid RDF/XML produced by RDFa transform.", ex, out
-            );
-        }
-    }
-
-    private String getDocType(Document in) {
-        return in.getDoctype().getPublicId();
-    }
-
-    /**
-     * @return the {@link org.apache.any23.extractor.ExtractorDescription} of this extractor
-     */
     @Override
     public ExtractorDescription getDescription() {
         return RDFaExtractorFactory.getDescriptionInstance();
     }
 
+    @Override
+    protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) {
+        return RDFParserFactory.getInstance().getRDFa10Parser(
+                isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult
+        );
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java b/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
index 6fbd073..9c14744 100644
--- a/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
+++ b/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
@@ -19,6 +19,7 @@ package org.apache.any23.filter;
 
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.rdfa.RDFaExtractor;
+import org.apache.any23.extractor.rdfa.RDFaExtractorFactory;
 import org.apache.any23.vocab.XHTML;
 import org.apache.any23.writer.TripleHandler;
 import org.apache.any23.writer.TripleHandlerException;
@@ -95,7 +96,7 @@ public class IgnoreAccidentalRDFa implements TripleHandler {
     }
 
     private boolean isRDFaContext(ExtractionContext context) {
-        return context.getExtractorName().equals(RDFaExtractor.NAME);
+        return context.getExtractorName().equals(RDFaExtractorFactory.NAME);
     }
 
     public void endDocument(URI documentURI) throws TripleHandlerException {

http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/test/java/org/apache/any23/Any23Test.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java
index 13ba903..ae6c13f 100644
--- a/core/src/test/java/org/apache/any23/Any23Test.java
+++ b/core/src/test/java/org/apache/any23/Any23Test.java
@@ -17,7 +17,7 @@
 
 package org.apache.any23;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.configuration.ModifiableConfiguration;
 import org.apache.any23.extractor.ExtractionException;
@@ -47,6 +47,7 @@ import org.apache.commons.io.IOUtils;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.openrdf.model.Statement;
+import org.openrdf.repository.Repository;
 import org.openrdf.repository.RepositoryConnection;
 import org.openrdf.repository.RepositoryException;
 import org.openrdf.repository.RepositoryResult;
@@ -67,6 +68,7 @@ import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
 
 /**
  * Test case for {@link Any23} facade.
+ * 
  * @author Davide Palmisano ( dpalmisano@gmail.com )
  * @author Michele Mostarda ( michele.mostarda@gmail.com )
  */
@@ -77,7 +79,8 @@ public class Any23Test extends Any23OnlineTestBase {
 
     private static final String PAGE_URL = "http://bob.com";
 
-    private static final Logger logger = LoggerFactory.getLogger(Any23Test.class);
+    private static final Logger logger = LoggerFactory
+            .getLogger(Any23Test.class);
 
     @Test
     public void testTTLDetection() throws Exception {
@@ -93,8 +96,7 @@ public class Any23Test extends Any23OnlineTestBase {
     public void testN3Detection2() throws Exception {
         assertDetection(
                 "<http://example.org/path> <http://foo.com> <http://example.org/Document/foo#> .",
-                "rdf-nt"
-        );
+                "rdf-nt");
     }
 
     @Test
@@ -103,28 +105,25 @@ public class Any23Test extends Any23OnlineTestBase {
     }
 
     /**
-     * This tests the behavior of <i>Any23</i> to execute the extraction explicitly specifying the charset
-     * encoding of the input.
-     *
+     * This tests the behavior of <i>Any23</i> to execute the extraction
+     * explicitly specifying the charset encoding of the input.
+     * 
      * @throws org.apache.any23.extractor.ExtractionException
      * @throws IOException
      * @throws SailException
      * @throws RepositoryException
      */
     @Test
-    public void testExplicitEncoding()
-    throws Exception {
-        assertEncodingDetection(
-                "UTF-8",
-                "/html/encoding-test.html",
-                "Knud M\u00F6ller"
-        );
+    public void testExplicitEncoding() throws Exception {
+        assertEncodingDetection("UTF-8", "/html/encoding-test.html",
+                "Knud M\u00F6ller");
     }
 
     /**
-     * This tests the behavior of <i>Any23</i> to perform the extraction without passing it any charset encoding.
-     * The encoding is therefore guessed using {@link org.apache.any23.encoding.TikaEncodingDetector} class.
-     *
+     * This tests the behavior of <i>Any23</i> to perform the extraction without
+     * passing it any charset encoding. The encoding is therefore guessed using
+     * {@link org.apache.any23.encoding.TikaEncodingDetector} class.
+     * 
      * @throws org.apache.any23.extractor.ExtractionException
      * @throws IOException
      * @throws SailException
@@ -132,26 +131,19 @@ public class Any23Test extends Any23OnlineTestBase {
      * @throws org.apache.any23.writer.TripleHandlerException
      */
     @Test
-    public void testImplicitEncoding()
-    throws Exception {
-        assertEncodingDetection(
-                null, // The encoding will be auto detected.
-                "/html/encoding-test.html",
-                "Knud M\u00F6ller"
-        );
+    public void testImplicitEncoding() throws Exception {
+        assertEncodingDetection(null, // The encoding will be auto detected.
+                "/html/encoding-test.html", "Knud M\u00F6ller");
     }
 
     @Test
-    public void testRDFXMLDetectionAndExtraction()
-    throws Exception {
-        String rdfXML =
-                "<?xml version='1.0'?> " +
-                "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' " +
-                        "xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
-                "<rdf:Description rdf:about='http://www.example.com'>" +
-                "<dc:title>x</dc:title>" +
-                "</rdf:Description>" +
-                "</rdf:RDF>";
+    public void testRDFXMLDetectionAndExtraction() throws Exception {
+        String rdfXML = "<?xml version='1.0'?> "
+                + "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' "
+                + "xmlns:dc='http://purl.org/dc/elements/1.1/'>"
+                + "<rdf:Description rdf:about='http://www.example.com'>"
+                + "<dc:title>x</dc:title>" + "</rdf:Description>"
+                + "</rdf:RDF>";
         assertDetectionAndExtraction(rdfXML);
     }
 
@@ -163,47 +155,47 @@ public class Any23Test extends Any23OnlineTestBase {
 
     @Test
     public void testNturtleDetectionAndExtraction() throws Exception {
-        String nTurtle =
-                "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n" +
-                "@prefix dc: <http://purl.org/dc/elements/1.1/> .\n" +
-                "@prefix ex: <http://example.org/stuff/1.0/> .\n" +
-                "\n" +
-                "<http://www.w3.org/TR/rdf-syntax-grammar>\n" +
-                "  dc:title \"RDF/XML Syntax Specification (Revised)\" ;\n" +
-                "  ex:editor [\n" +
-                "    ex:fullname \"Dave Beckett\";\n" +
-                "    ex:homePage <http://purl.org/net/dajobe/>\n" +
-                "  ] .";
+        String nTurtle = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n"
+                + "@prefix dc: <http://purl.org/dc/elements/1.1/> .\n"
+                + "@prefix ex: <http://example.org/stuff/1.0/> .\n"
+                + "\n"
+                + "<http://www.w3.org/TR/rdf-syntax-grammar>\n"
+                + "  dc:title \"RDF/XML Syntax Specification (Revised)\" ;\n"
+                + "  ex:editor [\n"
+                + "    ex:fullname \"Dave Beckett\";\n"
+                + "    ex:homePage <http://purl.org/net/dajobe/>\n" + "  ] .";
         assertDetectionAndExtraction(nTurtle);
     }
 
     /**
      * Tests out the first code snipped used in <i>Developer Manual</i>.
-     *
+     * 
      * @throws IOException
      * @throws org.apache.any23.extractor.ExtractionException
      */
     @Test
     public void testDemoCodeSnippet1() throws Exception {
-        /*1*/ Any23 runner = new Any23();
-        /*2*/ final String content = "@prefix foo: <http://example.org/ns#> .   " +
-                                     "@prefix : <http://other.example.org/ns#> ." +
-                                     "foo:bar foo: : .                          " +
-                                     ":bar : foo:bar .                           ";
-        //    The second argument of StringDocumentSource() must be a valid URI.
-        /*3*/ DocumentSource source = new StringDocumentSource(content, "http://host.com/service");
-        /*4*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
-        /*5*/ TripleHandler handler = new NTriplesWriter(out);
-              try {
-        /*6*/     runner.extract(source, handler);
-              } finally {
-        /*7*/     handler.close();
-              }
-        /*8*/ String nt = out.toString("UTF-8");
+        /* 1 */Any23 runner = new Any23();
+        /* 2 */final String content = "@prefix foo: <http://example.org/ns#> .   "
+                + "@prefix : <http://other.example.org/ns#> ."
+                + "foo:bar foo: : .                          "
+                + ":bar : foo:bar .                           ";
+        // The second argument of StringDocumentSource() must be a valid URI.
+        /* 3 */DocumentSource source = new StringDocumentSource(content,
+                "http://host.com/service");
+        /* 4 */ByteArrayOutputStream out = new ByteArrayOutputStream();
+        /* 5 */TripleHandler handler = new NTriplesWriter(out);
+        try {
+            /* 6 */runner.extract(source, handler);
+        } finally {
+            /* 7 */handler.close();
+        }
+        /* 8 */String nt = out.toString("UTF-8");
 
         /*
-            <http://example.org/ns#bar> <http://example.org/ns#> <http://other.example.org/ns#> .
-            <http://other.example.org/ns#bar> <http://other.example.org/ns#> <http://example.org/ns#bar> .
+         * <http://example.org/ns#bar> <http://example.org/ns#>
+         * <http://other.example.org/ns#> . <http://other.example.org/ns#bar>
+         * <http://other.example.org/ns#> <http://example.org/ns#bar> .
          */
         logger.debug("nt: " + nt);
         Assert.assertTrue(nt.length() > 0);
@@ -211,52 +203,57 @@ public class Any23Test extends Any23OnlineTestBase {
 
     /**
      * Tests out the second code snipped used in <i>Developer Manual</i>.
-     *
+     * 
      * @throws IOException
      * @throws org.apache.any23.extractor.ExtractionException
      */
     @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
     @Test
-    public void testDemoCodeSnippet2() throws Exception{
+    public void testDemoCodeSnippet2() throws Exception {
         assumeOnlineAllowed();
 
-        /*1*/ Any23 runner = new Any23();
-        /*2*/ runner.setHTTPUserAgent("test-user-agent");
-        /*3*/ HTTPClient httpClient = runner.getHTTPClient();
-        /*4*/ DocumentSource source = new HTTPDocumentSource(
-                 httpClient,
-                 "http://dbpedia.org/resource/Trento"
-              );
-        /*5*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
-        /*6*/ TripleHandler handler = new NTriplesWriter(out);
-              try {
-        /*7*/     runner.extract(source, handler);
-              } finally {
-        /*8*/     handler.close();
-              }
-        /*9*/ String n3 = out.toString("UTF-8");
+        /* 1 */Any23 runner = new Any23();
+        /* 2 */runner.setHTTPUserAgent("test-user-agent");
+        /* 3 */HTTPClient httpClient = runner.getHTTPClient();
+        /* 4 */DocumentSource source = new HTTPDocumentSource(httpClient,
+                "http://dbpedia.org/resource/Trento");
+        /* 5 */ByteArrayOutputStream out = new ByteArrayOutputStream();
+        /* 6 */TripleHandler handler = new NTriplesWriter(out);
+        try {
+            /* 7 */runner.extract(source, handler);
+        } finally {
+            /* 8 */handler.close();
+        }
+        /* 9 */String n3 = out.toString("UTF-8");
 
         /*
-            <http://dbpedia.org/resource/Trent> <http://dbpedia.org/ontology/wikiPageDisambiguates> <http://dbpedia.org/resource/Trento> .
-            <http://dbpedia.org/resource/Andrea_Pozzo> <http://dbpedia.org/ontology/birthPlace> <http://dbpedia.org/resource/Trento> .
-            <http://dbpedia.org/resource/Union_for_Trentino> <http://dbpedia.org/ontology/headquarter> <http://dbpedia.org/resource/Trento> .
-            [...]
+         * <http://dbpedia.org/resource/Trent>
+         * <http://dbpedia.org/ontology/wikiPageDisambiguates>
+         * <http://dbpedia.org/resource/Trento> .
+         * <http://dbpedia.org/resource/Andrea_Pozzo>
+         * <http://dbpedia.org/ontology/birthPlace>
+         * <http://dbpedia.org/resource/Trento> .
+         * <http://dbpedia.org/resource/Union_for_Trentino>
+         * <http://dbpedia.org/ontology/headquarter>
+         * <http://dbpedia.org/resource/Trento> . [...]
          */
         logger.debug("n3: " + n3);
         Assert.assertTrue(n3.length() > 0);
     }
 
     /**
-     * This test checks the extraction behavior when the library is used programatically.
-     * This test is related to the issue #45, to verify the different behaviors between Maven and Ant.
-     * The behavior was related to a 2nd-level dependency introduced by Maven.
-     *
+     * This test checks the extraction behavior when the library is used
+     * programatically. This test is related to the issue #45, to verify the
+     * different behaviors between Maven and Ant. The behavior was related to a
+     * 2nd-level dependency introduced by Maven.
+     * 
      * @throws org.apache.any23.extractor.ExtractionException
      * @throws IOException
      * @throws URISyntaxException
      */
     @Test
-    public void testProgrammaticExtraction() throws ExtractionException, IOException, URISyntaxException {
+    public void testProgrammaticExtraction() throws ExtractionException,
+            IOException, URISyntaxException {
         Any23 any23 = new Any23();
         any23.setHTTPUserAgent("Any23-Servlet");
         any23.setHTTPClient(new DefaultHTTPClient() {
@@ -276,10 +273,11 @@ public class Any23Test extends Any23OnlineTestBase {
         ReportingTripleHandler reporting = new ReportingTripleHandler(rdfWriter);
 
         DocumentSource source = getDocumentSourceFromResource(
-                    "/html/rdfa/ansa_2010-02-26_12645863.html",
-                    "http://host.com/service");
+                "/html/rdfa/ansa_2010-02-26_12645863.html",
+                "http://host.com/service");
 
-        Assert.assertTrue( any23.extract(source, reporting).hasMatchingExtractors() );
+        Assert.assertTrue(any23.extract(source, reporting)
+                .hasMatchingExtractors());
         try {
             handler.close();
         } catch (TripleHandlerException e) {
@@ -288,30 +286,30 @@ public class Any23Test extends Any23OnlineTestBase {
 
         final String bufferContent = byteArrayOutputStream.toString();
         logger.debug(bufferContent);
-        Assert.assertSame("Unexpected number of triples.", 60, StringUtils.countNL(bufferContent));
-        
+        Assert.assertSame("Unexpected number of triples.", 60,
+                StringUtils.countNL(bufferContent));
+
     }
 
     /**
-     * This test checks if a URL that is supposed to be GZIPPED is correctly opened and parsed with
-     * the {@link Any23} facade.
-     *
+     * This test checks if a URL that is supposed to be GZIPPED is correctly
+     * opened and parsed with the {@link Any23} facade.
+     * 
      * @throws IOException
      * @throws URISyntaxException
      * @throws ExtractionException
      */
     @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
     @Test
-    public void testGZippedContent() throws IOException, URISyntaxException, ExtractionException {
+    public void testGZippedContent() throws IOException, URISyntaxException,
+            ExtractionException {
         assumeOnlineAllowed();
 
         Any23 runner = new Any23();
         runner.setHTTPUserAgent("test-user-agent");
         HTTPClient httpClient = runner.getHTTPClient();
-        DocumentSource source = new HTTPDocumentSource(
-                httpClient,
-                "http://products.semweb.bestbuy.com/y/products/7590289/"
-        );
+        DocumentSource source = new HTTPDocumentSource(httpClient,
+                "http://products.semweb.bestbuy.com/y/products/7590289/");
         ByteArrayOutputStream out = new ByteArrayOutputStream();
         TripleHandler handler = new NTriplesWriter(out);
         runner.extract(source, handler);
@@ -323,13 +321,13 @@ public class Any23Test extends Any23OnlineTestBase {
     }
 
     @Test
-    public void testExtractionParameters() throws IOException, ExtractionException, TripleHandlerException {
-        final int EXPECTED_TRIPLES  = 6;
+    public void testExtractionParameters() throws IOException,
+            ExtractionException, TripleHandlerException {
+        final int EXPECTED_TRIPLES = 6;
         Any23 runner = new Any23();
         DocumentSource source = getDocumentSourceFromResource(
                 "/org/apache/any23/validator/missing-og-namespace.html",
-                "http://www.test.com"
-        );
+                "http://www.test.com");
 
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
 
@@ -340,18 +338,14 @@ public class Any23Test extends Any23OnlineTestBase {
         compositeTH1.addChild(ctw1);
         try {
             runner.extract(
-                    new ExtractionParameters(
-                            DefaultConfiguration.singleton(),
-                            ValidationMode.None
-                    ),
-                    source,
-                    compositeTH1
-            );
+                    new ExtractionParameters(DefaultConfiguration.singleton(),
+                            ValidationMode.None), source, compositeTH1);
         } finally {
             compositeTH1.close();
         }
         logger.info(baos.toString());
-        Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES, cth1.getCount() );
+        Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES,
+                cth1.getCount());
 
         baos.reset();
         CountingTripleHandler cth2 = new CountingTripleHandler();
@@ -360,26 +354,21 @@ public class Any23Test extends Any23OnlineTestBase {
         compositeTH2.addChild(cth2);
         compositeTH2.addChild(ctw2);
         runner.extract(
-                new ExtractionParameters(
-                        DefaultConfiguration.singleton(),
-                        ValidationMode.ValidateAndFix
-                ),
-                source,
-                compositeTH2
-        );
-        logger.debug( baos.toString() );
-        Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES + 5, cth2.getCount() );
+                new ExtractionParameters(DefaultConfiguration.singleton(),
+                        ValidationMode.ValidateAndFix), source, compositeTH2);
+        logger.debug(baos.toString());
+        Assert.assertEquals("Unexpected number of triples.",
+                EXPECTED_TRIPLES + 5, cth2.getCount());
     }
 
     @Test
     public void testExtractionParametersWithNestingDisabled()
-    throws IOException, ExtractionException, TripleHandlerException {
+            throws IOException, ExtractionException, TripleHandlerException {
         final int EXPECTED_TRIPLES = 19;
         Any23 runner = new Any23();
         DocumentSource source = getDocumentSourceFromResource(
                 "/microformats/nested-microformats-a1.html",
-                "http://www.test.com"
-        );
+                "http://www.test.com");
 
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
 
@@ -389,16 +378,12 @@ public class Any23Test extends Any23OnlineTestBase {
         compositeTH1.addChild(cth1);
         compositeTH1.addChild(ctw1);
         runner.extract(
-                new ExtractionParameters(
-                        DefaultConfiguration.singleton(),
-                        ValidationMode.None, true
-                ),
-                source,
-                compositeTH1
-        );
+                new ExtractionParameters(DefaultConfiguration.singleton(),
+                        ValidationMode.None, true), source, compositeTH1);
         compositeTH1.close();
         logger.debug("Out1: " + baos.toString());
-        Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES + 3, cth1.getCount() );
+        Assert.assertEquals("Unexpected number of triples.",
+                EXPECTED_TRIPLES + 3, cth1.getCount());
 
         baos.reset();
         CountingTripleHandler cth2 = new CountingTripleHandler();
@@ -407,24 +392,20 @@ public class Any23Test extends Any23OnlineTestBase {
         compositeTH2.addChild(cth2);
         compositeTH2.addChild(ctw2);
         runner.extract(
-                new ExtractionParameters(
-                        DefaultConfiguration.singleton(),
-                        ValidationMode.ValidateAndFix, false),
-                source,
-                compositeTH2
-        );
+                new ExtractionParameters(DefaultConfiguration.singleton(),
+                        ValidationMode.ValidateAndFix, false), source,
+                compositeTH2);
         compositeTH2.close();
         logger.debug("Out2: " + baos.toString());
-        Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES, cth2.getCount() );
+        Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES,
+                cth2.getCount());
     }
 
     @Test
     public void testExceptionPropagation() throws IOException {
         Any23 any23 = new Any23();
         DocumentSource source = getDocumentSourceFromResource(
-                "/application/turtle/geolinkeddata.ttl",
-                "http://www.test.com"
-        );
+                "/application/turtle/geolinkeddata.ttl", "http://www.test.com");
         CountingTripleHandler cth1 = new CountingTripleHandler();
         try {
             any23.extract(source, cth1);
@@ -436,16 +417,19 @@ public class Any23Test extends Any23OnlineTestBase {
 
     /**
      * Test correct management of general <i>XML</i> content.
-     *
+     * 
      * @throws IOException
      * @throws ExtractionException
      */
     @Test
-    public void testXMLMimeTypeManagement() throws IOException, ExtractionException {
+    public void testXMLMimeTypeManagement() throws IOException,
+            ExtractionException {
         final String documentURI = "http://www.test.com/resource.xml";
         final String contentType = "application/xml";
-        final String in = StreamUtils.asString( this.getClass().getResourceAsStream("any23-xml-mimetype.xml") );
-        final DocumentSource doc = new StringDocumentSource(in, documentURI, contentType);
+        final String in = StreamUtils.asString(this.getClass()
+                .getResourceAsStream("any23-xml-mimetype.xml"));
+        final DocumentSource doc = new StringDocumentSource(in, documentURI,
+                contentType);
         final Any23 any23 = new Any23();
         final CountingTripleHandler cth = new CountingTripleHandler(false);
         final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
@@ -455,20 +439,23 @@ public class Any23Test extends Any23OnlineTestBase {
     }
 
     /**
-     * Test correct management of general <i>XML</i> content from <i>URL</i> source.
-     *
+     * Test correct management of general <i>XML</i> content from <i>URL</i>
+     * source.
+     * 
      * @throws IOException
      * @throws ExtractionException
      */
     @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
     @Test
-    public void testXMLMimeTypeManagementViaURL() throws IOException, ExtractionException {
+    public void testXMLMimeTypeManagementViaURL() throws IOException,
+            ExtractionException {
         assumeOnlineAllowed();
         final Any23 any23 = new Any23();
         any23.setHTTPUserAgent("test-user-agent");
         final CountingTripleHandler cth = new CountingTripleHandler(false);
         final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
-        final ExtractionReport report = any23.extract("http://www.nativeremedies.com/XML/combos.xml", rth);
+        final ExtractionReport report = any23.extract(
+                "http://www.nativeremedies.com/XML/combos.xml", rth);
         Assert.assertFalse(report.hasMatchingExtractors());
         Assert.assertEquals(0, cth.getCount());
     }
@@ -481,23 +468,26 @@ public class Any23Test extends Any23OnlineTestBase {
         any23.setHTTPUserAgent("test-user-agent");
         final CountingTripleHandler cth = new CountingTripleHandler(false);
         final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
-        final ExtractionReport report = any23.extract("http://www.usarab.org/news/?tag=england", rth);
-        Assert.assertTrue( report.hasMatchingExtractors() );
+        final ExtractionReport report = any23.extract(
+                "http://www.usarab.org/news/?tag=england", rth);
+        Assert.assertTrue(report.hasMatchingExtractors());
     }
 
     @Test
     public void testMicrodataSupport() throws Exception {
-        final String htmlWithMicrodata = IOUtils.toString(
-                this.getClass().getResourceAsStream("/microdata/microdata-basic.html")
-        );
+        final String htmlWithMicrodata = IOUtils.toString(this.getClass()
+                .getResourceAsStream("/microdata/microdata-basic.html"));
         assertExtractorActivation(htmlWithMicrodata, MicrodataExtractor.class);
     }
 
     @Test
-    public void testAbstractMethodErrorIssue186_1() throws IOException, ExtractionException{
+    public void testAbstractMethodErrorIssue186_1() throws IOException,
+            ExtractionException {
         final Any23 runner = new Any23();
-        final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml");
-        final DocumentSource source = new StringDocumentSource(content, "http://base.com");
+        final String content = FileUtils
+                .readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml");
+        final DocumentSource source = new StringDocumentSource(content,
+                "http://base.com");
         final ByteArrayOutputStream out = new ByteArrayOutputStream();
         final TripleHandler handler = new NTriplesWriter(out);
         runner.extract(source, handler);
@@ -506,10 +496,13 @@ public class Any23Test extends Any23OnlineTestBase {
     }
 
     @Test
-    public void testAbstractMethodErrorIssue186_2() throws IOException, ExtractionException{
+    public void testAbstractMethodErrorIssue186_2() throws IOException,
+            ExtractionException {
         final Any23 runner = new Any23();
-        final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml");
-        final DocumentSource source = new StringDocumentSource(content, "http://richard.cyganiak.de/");
+        final String content = FileUtils
+                .readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml");
+        final DocumentSource source = new StringDocumentSource(content,
+                "http://richard.cyganiak.de/");
         final ByteArrayOutputStream out = new ByteArrayOutputStream();
         final TripleHandler handler = new NTriplesWriter(out);
         runner.extract(source, handler);
@@ -519,12 +512,15 @@ public class Any23Test extends Any23OnlineTestBase {
 
     @Test
     public void testModifiableConfiguration_issue183() throws Exception {
-        final ModifiableConfiguration modifiableConf = DefaultConfiguration.copy();
+        final ModifiableConfiguration modifiableConf = DefaultConfiguration
+                .copy();
         modifiableConf.setProperty("any23.extraction.metadata.timesize", "off");
         final Any23 any23 = new Any23(modifiableConf);
 
-        final String content = FileUtils.readResourceContent("/rdf/rdf-issue183.ttl");
-        final DocumentSource source = new StringDocumentSource(content, "http://base.com");
+        final String content = FileUtils
+                .readResourceContent("/rdf/rdf-issue183.ttl");
+        final DocumentSource source = new StringDocumentSource(content,
+                "http://base.com");
         final ByteArrayOutputStream out = new ByteArrayOutputStream();
         final TripleHandler handler = new NTriplesWriter(out);
         any23.extract(source, handler);
@@ -534,19 +530,18 @@ public class Any23Test extends Any23OnlineTestBase {
         logger.debug(n3);
         Assert.assertFalse(
                 "Should not contain triple with http://vocab.sindice.net/date",
-                n3.contains("http://vocab.sindice.net/date")
-        );
+                n3.contains("http://vocab.sindice.net/date"));
         Assert.assertFalse(
                 "Should not contain triple with http://vocab.sindice.net/size",
-                n3.contains("http://vocab.sindice.net/size")
-        );
+                n3.contains("http://vocab.sindice.net/size"));
     }
 
     /**
-     * Performs detection and extraction on the given input string
-     * and return the {@link ExtractionReport}.
-     *
-     * @param in input string.
+     * Performs detection and extraction on the given input string and return
+     * the {@link ExtractionReport}.
+     * 
+     * @param in
+     *            input string.
      * @return
      * @throws IOException
      * @throws ExtractionException
@@ -555,19 +550,17 @@ public class Any23Test extends Any23OnlineTestBase {
         Any23 any23 = new Any23();
         ByteArrayOutputStream out = new ByteArrayOutputStream();
         ReportingTripleHandler outputHandler = new ReportingTripleHandler(
-                new IgnoreAccidentalRDFa(
-                        new IgnoreTitlesOfEmptyDocuments(
-                                new NTriplesWriter(out)
-                        )
-                )
-        );
+                new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(
+                        new NTriplesWriter(out))));
         return any23.extract(in, "http://host.com/path", outputHandler);
     }
 
     /**
-     * Asserts that a list an {@link Extractor} has been activated for the given input data.
-     *
-     * @param in input data as string.
+     * Asserts that a list an {@link Extractor} has been activated for the given
+     * input data.
+     * 
+     * @param in
+     *            input data as string.
      * @throws IOException
      * @throws ExtractionException
      */
@@ -575,36 +568,38 @@ public class Any23Test extends Any23OnlineTestBase {
         final ExtractionReport extractionReport = detectAndExtract(in);
         Assert.assertTrue(
                 "Detection and extraction failed, no matching extractors.",
-                extractionReport.hasMatchingExtractors()
-        );
+                extractionReport.hasMatchingExtractors());
     }
 
     /**
-     * Assert the correct activation of the given list of {@link Extractor}s for the given input string.
-     *
-     * @param in input data as string.
+     * Assert the correct activation of the given list of {@link Extractor}s for
+     * the given input string.
+     * 
+     * @param in
+     *            input data as string.
      * @param expectedExtractors
      * @throws IOException
      * @throws ExtractionException
      */
-    private void assertExtractorActivation(String in, Class<? extends Extractor>... expectedExtractors)
-    throws Exception {
+    private void assertExtractorActivation(String in,
+            Class<? extends Extractor>... expectedExtractors) throws Exception {
         final ExtractionReport extractionReport = detectAndExtract(in);
         for (Class<? extends Extractor> expectedExtractorClass : expectedExtractors) {
             Assert.assertTrue(
                     String.format(
                             "Detection and extraction failed, expected extractor [%s] not found.",
-                            expectedExtractorClass
-                    ),
-                    containsClass( extractionReport.getMatchingExtractors(), expectedExtractorClass )
-            );
+                            expectedExtractorClass),
+                    containsClass(extractionReport.getMatchingExtractors(),
+                            expectedExtractorClass));
         }
     }
 
     /**
      * Asserts the correct encoding detection for a specified data.
-     *
-     * @param encoding the expected specified encoding, if <code>null</code> will be auto detected.
+     * 
+     * @param encoding
+     *            the expected specified encoding, if <code>null</code> will be
+     *            auto detected.
      * @param input
      * @param expectedContent
      * @throws Exception
@@ -613,49 +608,60 @@ public class Any23Test extends Any23OnlineTestBase {
     throws Exception {
         DocumentSource fileDocumentSource = getDocumentSourceFromResource(input);
         Any23 any23;
-        RepositoryConnection conn;
-        RepositoryWriter repositoryWriter;
+        RepositoryConnection conn = null;
+        RepositoryWriter repositoryWriter = null;
         
         any23 = new Any23();
-        Sail store = new MemoryStore();
+        Repository store = new SailRepository(new MemoryStore());
         store.initialize();
-        conn = new SailRepository(store).getConnection();
-        repositoryWriter = new RepositoryWriter(conn);
-        Assert.assertTrue( any23.extract(fileDocumentSource, repositoryWriter, encoding).hasMatchingExtractors() );
-
-        RepositoryResult<Statement> statements = conn.getStatements(null, vDCTERMS.title, null, false);
-        try {
-            while (statements.hasNext()) {
-                Statement statement = statements.next();
-                printStatement(statement);
-                org.junit.Assert.assertTrue(statement.getObject().stringValue().contains(expectedContent));
+        try
+        {
+            conn = store.getConnection();
+            repositoryWriter = new RepositoryWriter(conn);
+            Assert.assertTrue( any23.extract(fileDocumentSource, repositoryWriter, encoding).hasMatchingExtractors() );
+    
+            RepositoryResult<Statement> statements = conn.getStatements(null, vDCTERMS.title, null, false);
+            try {
+                while (statements.hasNext()) {
+                    Statement statement = statements.next();
+                    printStatement(statement);
+                    Assert.assertTrue(statement.getObject().stringValue().contains(expectedContent));
+                }
+            } finally {
+                statements.close();
+            }
+        }
+        finally {
+            if(conn != null) {
+                conn.close();
+            }
+            if(repositoryWriter != null) {
+                repositoryWriter.close();
             }
-        } finally {
-            statements.close();
         }
-
         fileDocumentSource = null;
         any23 = null;
-        conn.close();
-        repositoryWriter.close();
     }
 
     /**
      * Will try to detect the <i>content</i> trying sequentially with all
      * specified parser.
-     *
+     * 
      * @param content
      * @param parsers
      * @throws Exception
      */
-    private void assertDetection(String content, String... parsers) throws Exception {
+    private void assertDetection(String content, String... parsers)
+            throws Exception {
         ByteArrayOutputStream out = new ByteArrayOutputStream();
         Any23 runner = new Any23(parsers.length == 0 ? null : parsers);
         if (parsers.length != 0) {
-            runner.setMIMETypeDetector(null);   // Use all the provided extractors.
+            runner.setMIMETypeDetector(null); // Use all the provided
+                                              // extractors.
         }
         final NTriplesWriter tripleHandler = new NTriplesWriter(out);
-        runner.extract(new StringDocumentSource(content, PAGE_URL), tripleHandler);
+        runner.extract(new StringDocumentSource(content, PAGE_URL),
+                tripleHandler);
         tripleHandler.close();
         String result = out.toString("us-ascii");
         Assert.assertNotNull(result);
@@ -663,19 +669,17 @@ public class Any23Test extends Any23OnlineTestBase {
     }
 
     private void printStatement(Statement statement) {
-        logger.debug(String.format("%s\t%s\t%s",
-                statement.getSubject(),
-                statement.getPredicate(),
-                statement.getObject()));
+        logger.debug(String.format("%s\t%s\t%s", statement.getSubject(),
+                statement.getPredicate(), statement.getObject()));
     }
 
     private boolean containsClass(List<?> list, Class clazz) {
-        for(Object o : list) {
-            if(o.getClass().equals(clazz)) {
+        for (Object o : list) {
+            if (o.getClass().equals(clazz)) {
                 return true;
             }
         }
         return false;
     }
-    
+
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java b/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java
deleted file mode 100644
index c8052c7..0000000
--- a/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.extractor.rdfa;
-
-import org.apache.any23.extractor.html.TagSoupParser;
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.StringWriter;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Test case for {@link XSLTStylesheet} class.
- * Through this test we verify regressions on the <i>RDFa XSLT transformer</i> for <i>HTML/XHTML</i>
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class XSLTStylesheetTest {
-
-    private static final Logger logger = LoggerFactory.getLogger(XSLTStylesheetTest.class);
-
-    /**
-     * This test verifies the correct handling of base management for an <i>HTML</i> input.
-     *
-     * @throws java.io.IOException
-     * @throws XSLTStylesheetException
-     */
-    @Test
-    public void testHTMLRDFaBaseHanding() throws IOException, XSLTStylesheetException {
-        final String[] vars = checkPageBaseHandling("/html/rdfa/base-handling.html");
-        Assert.assertEquals("Unexpected value for this_location", "http://di2.deri.ie/people/", vars[0]);
-        Assert.assertEquals("Unexpected value for this_root"    , "http://di2.deri.ie/"      , vars[1]);
-        Assert.assertEquals("Unexpected value for html_base"    , "http://di2.deri.ie/people/", vars[2]);
-    }
-
-    /**
-     * This test verifies the correct handling of base management for an <i>XHTML</i> input.
-     *
-     * @throws java.io.IOException
-     * @throws XSLTStylesheetException
-     */
-    @Test
-    public void testXHTMLRDFaBaseHanding() throws IOException, XSLTStylesheetException {
-        final String[] vars = checkPageBaseHandling("/html/rdfa/base-handling.xhtml");
-        Assert.assertEquals("Unexpected value for this_location", "http://example.org/john-d/", vars[0]);
-        Assert.assertEquals("Unexpected value for this_root"    , "http://example.org/"       , vars[1]);
-        Assert.assertEquals("Unexpected value for html_base"    , "http://example.org/john-d/", vars[2]);
-    }
-
-    private String[] checkPageBaseHandling(String testFile) throws IOException, XSLTStylesheetException {
-        final TagSoupParser tagSoupParser = new TagSoupParser(
-                this.getClass().getResourceAsStream(testFile),
-                "http://test/document/uri"
-        );
-        final StringWriter sw = new StringWriter();
-        RDFaExtractor.getXSLT().applyTo(tagSoupParser.getDOM(), sw);
-        final String content = sw.toString();
-        logger.debug(content);
-        final Pattern pattern = Pattern.compile("<!--this_location: '(.+)' this_root: '(.+)' html_base: '(.+)'-->");
-        final Matcher matcher = pattern.matcher(content);
-        Assert.assertTrue("Cannot find comment matching within generated output.", matcher.find());
-        return new String[]{ matcher.group(1), matcher.group(2), matcher.group(3) };
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 60ec1e7..43dfb39 100644
--- a/pom.xml
+++ b/pom.xml
@@ -228,6 +228,7 @@
     <implementation.build.tstamp>${implementation.build}; ${maven.build.timestamp}</implementation.build.tstamp>
     <slf4j.logger.version>1.7.5</slf4j.logger.version>
     <sesame.version>2.7.5</sesame.version>
+    <semargl.version>0.6</semargl.version>
     <latest.stable.released>0.8.0</latest.stable.released>
 
     <!-- Maven Plugin Versions -->
@@ -385,6 +386,11 @@
         <artifactId>sesame-repository-api</artifactId>
         <version>${sesame.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.semarglproject</groupId>
+        <artifactId>semargl-sesame</artifactId>
+        <version>${semargl.version}</version>
+      </dependency>
       <!-- END: Sesame -->
 
       <!-- BEGIN:  Apache Commons -->


Mime
View raw message