james-server-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mwiederk...@apache.org
Subject svn commit: r735872 - in /james/mime4j/trunk/src: main/java/org/apache/james/mime4j/codec/DecoderUtil.java main/java/org/apache/james/mime4j/util/CharsetUtil.java test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Date Mon, 19 Jan 2009 23:16:11 GMT
Author: mwiederkehr
Date: Mon Jan 19 15:16:06 2009
New Revision: 735872

URL: http://svn.apache.org/viewvc?rev=735872&view=rev
Log:
decodeEncodedWords removed white spaces that separates a pair of adjacent encoded-words (fixes
MIME4J-104).

Modified:
    james/mime4j/trunk/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
    james/mime4j/trunk/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
    james/mime4j/trunk/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java

Modified: james/mime4j/trunk/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java?rev=735872&r1=735871&r2=735872&view=diff
==============================================================================
--- james/mime4j/trunk/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java (original)
+++ james/mime4j/trunk/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java Mon Jan
19 15:16:06 2009
@@ -31,9 +31,6 @@
 
 /**
  * Static methods for decoding strings, byte arrays and encoded words.
- *
- * 
- * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
  */
 public class DecoderUtil {
     private static Log log = LogFactory.getLog(DecoderUtil.class);
@@ -151,127 +148,106 @@
      * @return the decoded string.
      */
     public static String decodeEncodedWords(String body) {
-        CharArrayBuffer sb = new CharArrayBuffer(128);
-        
-        int p1 = 0;
-        int p2 = 0;
-        
-        try {
-            
-            /*
-             * Encoded words in headers have the form 
-             * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for 
-             * quoted printable and 'B' and 'b' for Base64
-             */
-            
-            while (p2 < body.length()) {
-                /*
-                 * Find beginning of first encoded word
-                 */
-                p1 = body.indexOf("=?", p2);
-                if (p1 == -1) {
-                    /*
-                     * None found. Emit the rest of the header and exit.
-                     */
-                    sb.append(body.substring(p2));
-                    break;
-                }
-                
-                /*
-                 * p2 points to the previously found end marker or the start
-                 * of the entire header text. Append the text between that
-                 * marker and the one pointed to by p1.
-                 */
-                if (p1 - p2 > 0) {
-                    sb.append(body.substring(p2, p1));
-                }
+        int previousEnd = 0;
+        boolean previousWasEncoded = false;
+
+        StringBuilder sb = new StringBuilder();
+
+        while (true) {
+            int begin = body.indexOf("=?", previousEnd);
+            int end = begin == -1 ? -1 : body.indexOf("?=", begin + 2);
+            if (end == -1) {
+                if (previousEnd == 0)
+                    return body;
 
-                /*
-                 * Find the first and second '?':s after the marker pointed to
-                 * by p1.
-                 */
-                int t1 = body.indexOf('?', p1 + 2);
-                int t2 = t1 != -1 ? body.indexOf('?', t1 + 1) : -1;
-
-                /*
-                 * Find this words end marker.
-                 */
-                p2 = t2 != -1 ? body.indexOf("?=", t2 + 1) : -1;
-                if (p2 == -1) {
-                    if (t2 != -1 && (body.length() - 1 == t2 || body.charAt(t2 +
1) == '=')) {
-                        /*
-                         * Treat "=?charset?enc?" and "=?charset?enc?=" as
-                         * empty strings.
-                         */
-                        p2 = t2;
-                    } else {
-                        /*
-                         * No end marker was found. Append the rest of the 
-                         * header and exit.
-                         */
-                        sb.append(body.substring(p1));
-                        break;
-                    }
+                sb.append(body.substring(previousEnd));
+                return sb.toString();
+            }
+            end += 2;
+
+            String sep = body.substring(previousEnd, begin);
+
+            String decoded = decodeEncodedWord(body, begin, end);
+            if (decoded == null) {
+                sb.append(sep);
+                sb.append(body.substring(begin, end));
+            } else {
+                if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
+                    sb.append(sep);
                 }
+                sb.append(decoded);
+            }
+
+            previousEnd = end;
+            previousWasEncoded = decoded != null;
+        }
+    }
+
+    // return null on error
+    private static String decodeEncodedWord(String body, int begin, int end) {
+        int qm1 = body.indexOf('?', begin + 2);
+        if (qm1 == end - 2)
+            return null;
+
+        int qm2 = body.indexOf('?', qm1 + 1);
+        if (qm2 == end - 2)
+            return null;
+
+        String mimeCharset = body.substring(begin + 2, qm1);
+        String encoding = body.substring(qm1 + 1, qm2);
+        String encodedText = body.substring(qm2 + 1, end - 2);
+
+        String charset = CharsetUtil.toJavaCharset(mimeCharset);
+        if (charset == null) {
+            if (log.isWarnEnabled()) {
+                log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
+                        + body.substring(begin, end) + "' doesn't have a "
+                        + "corresponding Java charset");
+            }
+            return null;
+        } else if (!CharsetUtil.isDecodingSupported(charset)) {
+            if (log.isWarnEnabled()) {
+                log.warn("Current JDK doesn't support decoding of charset '"
+                        + charset + "' (MIME charset '" + mimeCharset
+                        + "' in encoded word '" + body.substring(begin, end)
+                        + "')");
+            }
+            return null;
+        }
 
-                /*
-                 * [p1+2, t1] -> charset
-                 * [t1+1, t2] -> encoding
-                 * [t2+1, p2] -> encoded word
-                 */
-                
-                String decodedWord = null;
-                if (t2 == p2) {
-                    /*
-                     * The text is empty
-                     */
-                    decodedWord = "";
-                } else {
-
-                    String mimeCharset = body.substring(p1 + 2, t1);
-                    String enc = body.substring(t1 + 1, t2);
-                    String encodedWord = body.substring(t2 + 1, p2);
-
-                    /*
-                     * Convert the MIME charset to a corresponding Java one.
-                     */
-                    String charset = CharsetUtil.toJavaCharset(mimeCharset);
-                    if (charset == null) {
-                        decodedWord = body.substring(p1, p2 + 2);
-                        if (log.isWarnEnabled()) {
-                            log.warn("MIME charset '" + mimeCharset 
-                                    + "' in header field doesn't have a "
-                                    +"corresponding Java charset");
-                        }
-                    } else if (!CharsetUtil.isDecodingSupported(charset)) {
-                        decodedWord = body.substring(p1, p2 + 2);
-                        if (log.isWarnEnabled()) {
-                            log.warn("Current JDK doesn't support decoding "
-                                   + "of charset '" + charset 
-                                   + "' (MIME charset '" 
-                                   + mimeCharset + "')");
-                        }
-                    } else {
-                        if (enc.equalsIgnoreCase("Q")) {
-                            decodedWord = DecoderUtil.decodeQ(encodedWord, charset);
-                        } else if (enc.equalsIgnoreCase("B")) {
-                            decodedWord = DecoderUtil.decodeB(encodedWord, charset);
-                        } else {
-                            decodedWord = encodedWord;
-                            if (log.isWarnEnabled()) {
-                                log.warn("Warning: Unknown encoding in "
-                                        + "header field '" + enc + "'");
-                            }
-                        }
-                    }
+        if (encodedText.length() == 0) {
+            if (log.isWarnEnabled()) {
+                log.warn("Missing encoded text in encoded word: '"
+                        + body.substring(begin, end) + "'");
+            }
+            return null;
+        }
+
+        try {
+            if (encoding.equalsIgnoreCase("Q")) {
+                return DecoderUtil.decodeQ(encodedText, charset);
+            } else if (encoding.equalsIgnoreCase("B")) {
+                return DecoderUtil.decodeB(encodedText, charset);
+            } else {
+                if (log.isWarnEnabled()) {
+                    log.warn("Warning: Unknown encoding in encoded word '"
+                            + body.substring(begin, end) + "'");
                 }
-                p2 += 2;
-                sb.append(decodedWord);
+                return null;
+            }
+        } catch (UnsupportedEncodingException e) {
+            // should not happen because of isDecodingSupported check above
+            if (log.isWarnEnabled()) {
+                log.warn("Unsupported encoding in encoded word '"
+                        + body.substring(begin, end) + "'", e);
             }
-        } catch (Throwable t) {
-            log.error("Decoding header field body '" + body + "'", t);
+            return null;
+        } catch (RuntimeException e) {
+            if (log.isWarnEnabled()) {
+                log.warn("Could not decode encoded word '"
+                        + body.substring(begin, end) + "'", e);
+            }
+            return null;
         }
-        
-        return sb.toString();
     }
 }

Modified: james/mime4j/trunk/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java?rev=735872&r1=735871&r2=735872&view=diff
==============================================================================
--- james/mime4j/trunk/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java (original)
+++ james/mime4j/trunk/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java Mon Jan
19 15:16:06 2009
@@ -1131,6 +1131,28 @@
     }
 
     /**
+     * Returns <code>true</code> if the specified string consists entirely of
+     * whitespace characters.
+     * 
+     * @param s
+     *            string to test.
+     * @return <code>true</code> if the specified string consists entirely of
+     *         whitespace characters, <code>false</code> otherwise.
+     */
+    public static boolean isWhitespace(final String s) {
+        if (s == null) {
+            throw new IllegalArgumentException("String may not be null");
+        }
+        final int len = s.length();
+        for (int i = 0; i < len; i++) {
+            if (!isWhitespace(s.charAt(i))) {
+                return false;
+            }
+        }
+        return true;
+    }
+    
+    /**
      * Determines if the VM supports encoding (chars to bytes) the 
      * specified character set. NOTE: the given character set name may 
      * not be known to the VM even if this method returns <code>true</code>.

Modified: james/mime4j/trunk/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java?rev=735872&r1=735871&r2=735872&view=diff
==============================================================================
--- james/mime4j/trunk/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java (original)
+++ james/mime4j/trunk/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java Mon
Jan 19 15:16:06 2009
@@ -77,14 +77,35 @@
                 DecoderUtil.decodeEncodedWords("=?US-ASCII?B?QSBzaG9ydCB0ZXh0?="));
         assertEquals("A short text again!", 
                 DecoderUtil.decodeEncodedWords("=?US-ASCII?b?QSBzaG9ydCB0ZXh0IGFnYWluIQ==?="));
-        assertEquals("", DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
-        assertEquals("", DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));
-        assertEquals("", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?"));
+
+        // invalid encoded words should be returned unchanged
+        assertEquals("=?iso8859-1?Q?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
+        assertEquals("=?iso8859-1?b?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));
+        assertEquals("=?ISO-8859-1?Q?", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?"));
+        assertEquals("=?ISO-8859-1?R?abc?=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?R?abc?="));
+
+        // encoded-text requires at least one character according to rfc 2047
+        assertEquals("=?ISO-8859-1?Q??=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q??="));
+        assertEquals("=?ISO-8859-1?B??=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?B??="));
         
-        /*
-         * Bug detected on June 7, 2005. Decoding the following string caused
-         * OutOfMemoryError.
-         */
+        // white space between encoded words should be removed (MIME4J-104)
+        assertEquals("a", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?="));
+        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b"));
+        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="));
+        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?="));
+        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\r\n  =?ISO-8859-1?Q?b?="));
+        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a_b?="));
+        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="));
+
+        // non white space between encoded words should be retained
+        assertEquals("a b c", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?="));
+
+        // text before and after encoded words should be retained
+        assertEquals(" a b c ", DecoderUtil.decodeEncodedWords(" =?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?=
"));
+        assertEquals("! a b c !", DecoderUtil.decodeEncodedWords("! =?ISO-8859-1?Q?a?= b
=?ISO-8859-1?Q?c?= !"));
+        
+        // Bug detected on June 7, 2005. Decoding the following string caused
+        // OutOfMemoryError.
         assertEquals("=3?!!\\=?\"!g6P\"!Xp:\"!", DecoderUtil.decodeEncodedWords("=3?!!\\=?\"!g6P\"!Xp:\"!"));
     }    
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org


Mime
View raw message