maven-doxia-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vsive...@apache.org
Subject svn commit: r712147 - in /maven/doxia/doxia/trunk/doxia-core: pom.xml src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
Date Fri, 07 Nov 2008 15:05:47 GMT
Author: vsiveton
Date: Fri Nov  7 07:05:39 2008
New Revision: 712147

URL: http://svn.apache.org/viewvc?rev=712147&view=rev
Log:
DOXIA-263: Improve validation of input documents

o add validation before processing xml

Added:
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
  (with props)
Modified:
    maven/doxia/doxia/trunk/doxia-core/pom.xml
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java

Modified: maven/doxia/doxia/trunk/doxia-core/pom.xml
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/pom.xml?rev=712147&r1=712146&r2=712147&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/pom.xml (original)
+++ maven/doxia/doxia/trunk/doxia-core/pom.xml Fri Nov  7 07:05:39 2008
@@ -49,14 +49,13 @@
       <groupId>org.codehaus.plexus</groupId>
       <artifactId>plexus-container-default</artifactId>
     </dependency>
-
-    <!-- test -->
     <dependency>
       <groupId>xerces</groupId>
       <artifactId>xercesImpl</artifactId>
       <version>2.8.1</version>
-      <scope>test</scope>
     </dependency>
+
+    <!-- test -->
     <dependency>
       <groupId>org.apache.maven.scm</groupId>
       <artifactId>maven-scm-api</artifactId>

Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=712147&r1=712146&r2=712147&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
(original)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
Fri Nov  7 07:05:39 2008
@@ -19,21 +19,33 @@
  * under the License.
  */
 
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.xml.XMLConstants;
+
 import org.apache.maven.doxia.macro.MacroExecutionException;
 import org.apache.maven.doxia.markup.XmlMarkup;
 import org.apache.maven.doxia.sink.Sink;
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
+import org.codehaus.plexus.util.IOUtil;
 import org.codehaus.plexus.util.StringUtils;
 import org.codehaus.plexus.util.xml.pull.MXParser;
 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.XMLReaderFactory;
 
 /**
  * An abstract class that defines some convenience methods for <code>XML</code>
parsers.
@@ -46,14 +58,20 @@
     extends AbstractParser
     implements XmlMarkup
 {
-    /** Entity pattern for HTML entity, i.e. &#38;nbsp; */
+    /** Entity pattern for HTML entity, i.e. &#38;nbsp; see http://www.w3.org/TR/REC-xml/#NT-EntityDecl
*/
     private static final Pattern PATTERN_ENTITY_1 =
         Pattern.compile( "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>"
);
 
-    /** Entity pattern for Unicode entity, i.e. &#38;#38; */
+    /** Entity pattern for Unicode entity, i.e. &#38;#38; see http://www.w3.org/TR/REC-xml/#NT-EntityDecl
*/
     private static final Pattern PATTERN_ENTITY_2 =
         Pattern.compile( "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&#x?[0-9a-fA-F]{1,4};)(\\s)*\"(\\s)*>"
);
 
+    /** Doctype pattern as defined in http://www.w3.org/TR/REC-xml/#NT-doctypedecl */
+    private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*<!DOCTYPE([^>]*)>.*"
);
+
+    /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
+    private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*"
);
+
     private boolean ignorable;
 
     private boolean collapsible;
@@ -62,10 +80,34 @@
 
     private Map entities;
 
+    private boolean validate = true;
+
+    /** lazy xmlReader to validate xml content*/
+    private XMLReader xmlReader;
+
     /** {@inheritDoc} */
     public void parse( Reader source, Sink sink )
         throws ParseException
     {
+        // 1 first parsing if validation is required
+        if ( isValidate() )
+        {
+            String content;
+            try
+            {
+                content = IOUtil.toString( new BufferedReader( source ) );
+            }
+            catch ( IOException e )
+            {
+                throw new ParseException( "Error reading the model: " + e.getMessage(), e
);
+            }
+
+            validate( content );
+
+            source = new StringReader( content );
+        }
+
+        // 2 second parsing to process
         try
         {
             XmlPullParser parser = new MXParser();
@@ -426,4 +468,107 @@
 
         return entities;
     }
+
+    /**
+     * @return <code>true</code> if XML content will be validate, <code>false</code>
otherwise.
+     */
+    public boolean isValidate()
+    {
+        return validate;
+    }
+
+    /**
+     * Specify a flag to validate or not the XML content.
+     *
+     * @param validate the validate to set
+     * @see #parse(Reader, Sink)
+     */
+    public void setValidate( boolean validate )
+    {
+        this.validate = validate;
+    }
+
+    // ----------------------------------------------------------------------
+    // Private methods
+    // ----------------------------------------------------------------------
+
+    /**
+     * Validate an XML content with SAX.
+     *
+     * @param content a not null xml content
+     * @throws ParseException if any.
+     */
+    private void validate( String content )
+        throws ParseException
+    {
+        try
+        {
+            // 1 if there's a doctype
+            boolean hasDoctype = false;
+            Matcher matcher = PATTERN_DOCTYPE.matcher( content );
+            if ( matcher.find() )
+            {
+                hasDoctype = true;
+            }
+
+            // 2 if no doctype, check for an xmlns instance
+            boolean hasXsd = false;
+            if ( !hasDoctype )
+            {
+                matcher = PATTERN_TAG.matcher( content );
+                if ( matcher.find() )
+                {
+                    String value = matcher.group( 2 );
+
+                    if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) !=
-1 )
+                    {
+                        hasXsd = true;
+                    }
+                }
+            }
+
+            // 3 validate content if doctype or xsd
+            if ( hasDoctype || hasXsd )
+            {
+                getLog().info( "Validating the content..." );
+                getXmlReader().parse( new InputSource( new ByteArrayInputStream( content.getBytes()
) ) );
+            }
+        }
+        catch ( IOException e )
+        {
+            throw new ParseException( "Error validating the model: " + e.getMessage(), e
);
+        }
+        catch ( SAXNotRecognizedException e )
+        {
+            throw new ParseException( "Error validating the model: " + e.getMessage(), e
);
+        }
+        catch ( SAXNotSupportedException e )
+        {
+            throw new ParseException( "Error validating the model: " + e.getMessage(), e
);
+        }
+        catch ( SAXException e )
+        {
+            throw new ParseException( "Error validating the model: " + e.getMessage(), e
);
+        }
+    }
+
+    /**
+     * @return an xmlReader instance.
+     * @throws SAXException if any
+     */
+    private XMLReader getXmlReader()
+        throws SAXException
+    {
+        if ( xmlReader == null )
+        {
+            MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() );
+
+            xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser"
);
+            xmlReader.setFeature( "http://xml.org/sax/features/validation", true );
+            xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true
);
+            xmlReader.setErrorHandler( errorHandler );
+        }
+
+        return xmlReader;
+    }
 }

Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java?rev=712147&view=auto
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
(added)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
Fri Nov  7 07:05:39 2008
@@ -0,0 +1,127 @@
+package org.apache.maven.doxia.parser;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.maven.doxia.logging.Log;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Convenience class to beautify SAXParseException messages.
+ *
+ * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
+ * @version $Id$
+ */
+class MessagesErrorHandler
+    extends DefaultHandler
+{
+    /** The vm line separator */
+    private static final String EOL = System.getProperty( "line.separator" );
+
+    private static final int TYPE_UNKNOWN = 0;
+
+    private static final int TYPE_WARNING = 1;
+
+    private static final int TYPE_ERROR = 2;
+
+    private static final int TYPE_FATAL = 3;
+
+    private final Log log;
+
+    public MessagesErrorHandler( Log log )
+    {
+        this.log = log;
+    }
+
+    /** {@inheritDoc} */
+    public void warning( SAXParseException e )
+        throws SAXException
+    {
+        processException( TYPE_WARNING, e );
+    }
+
+    /** {@inheritDoc} */
+    public void error( SAXParseException e )
+        throws SAXException
+    {
+        processException( TYPE_ERROR, e );
+    }
+
+    /** {@inheritDoc} */
+    public void fatalError( SAXParseException e )
+        throws SAXException
+    {
+        processException( TYPE_FATAL, e );
+    }
+
+    // ----------------------------------------------------------------------
+    // Private methods
+    // ----------------------------------------------------------------------
+
+    private void processException( int type, SAXParseException e )
+        throws SAXException
+    {
+        StringBuffer message = new StringBuffer();
+
+        switch ( type )
+        {
+            case TYPE_WARNING:
+                message.append( "Warning:" );
+                break;
+
+            case TYPE_ERROR:
+                message.append( "Error:" );
+                break;
+
+            case TYPE_FATAL:
+                message.append( "Fatal error:" );
+                break;
+
+            case TYPE_UNKNOWN:
+            default:
+                message.append( "Unknown:" );
+                break;
+        }
+
+        message.append( EOL );
+        message.append( "  Public ID: " + e.getPublicId() ).append( EOL );
+        message.append( "  System ID: " + e.getSystemId() ).append( EOL );
+        message.append( "  Line number: " + e.getLineNumber() ).append( EOL );
+        message.append( "  Column number: " + e.getColumnNumber() ).append( EOL );
+        message.append( "  Message: " + e.getMessage() ).append( EOL );
+
+        switch ( type )
+        {
+            case TYPE_WARNING:
+                if ( log.isWarnEnabled() )
+                {
+                    log.warn( message.toString() );
+                }
+                break;
+
+            case TYPE_UNKNOWN:
+            case TYPE_ERROR:
+            case TYPE_FATAL:
+            default:
+                throw new SAXException( message.toString() );
+        }
+    }
+}
\ No newline at end of file

Propchange: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision



Mime
View raw message