james-server-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From btell...@apache.org
Subject [1/6] james-project git commit: JAMES-2018 Manage list levels well
Date Thu, 01 Jun 2017 09:04:59 GMT
Repository: james-project
Updated Branches:
  refs/heads/master 1925eebdb -> f19648a51


JAMES-2018 Manage list levels well


Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/49fbba73
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/49fbba73
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/49fbba73

Branch: refs/heads/master
Commit: 49fbba73a3f5a599c6a29bf31b9fcd9b627ccf8b
Parents: 7203945
Author: benwa <btellier@linagora.com>
Authored: Tue May 30 17:23:12 2017 +0700
Committer: benwa <btellier@linagora.com>
Committed: Thu Jun 1 16:03:20 2017 +0700

----------------------------------------------------------------------
 .../jmap/utils/JsoupHtmlTextExtractor.java      | 59 ++++++++++++++++----
 .../jmap/utils/JsoupHtmlTextExtractorTest.java  | 22 ++++++++
 2 files changed, 71 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/james-project/blob/49fbba73/server/protocols/jmap/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
----------------------------------------------------------------------
diff --git a/server/protocols/jmap/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
b/server/protocols/jmap/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
index c153ae5..d5b359e 100644
--- a/server/protocols/jmap/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
+++ b/server/protocols/jmap/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
@@ -22,6 +22,7 @@ package org.apache.james.jmap.utils;
 import java.util.Optional;
 import java.util.stream.Stream;
 
+import org.apache.commons.lang.StringUtils;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -40,6 +41,7 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
     public static final String P_TAG = "p";
     public static final String IMG_TAG = "img";
     public static final String ALT_TAG = "alt";
+    public static final int INITIAL_LIST_NESTED_LEVEL = 0;
 
     @Override
     public String toPlainText(String html) {
@@ -48,7 +50,7 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
 
             Element body = Optional.ofNullable(document.body()).orElse(document);
 
-            return flatten(body)
+            return flatten(body, INITIAL_LIST_NESTED_LEVEL)
                 .map(this::convertNodeToText)
                 .reduce("", (s1, s2) -> s1 + s2);
         } catch (Exception e) {
@@ -57,7 +59,8 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
         }
     }
 
-    private String convertNodeToText(Node node) {
+    private String convertNodeToText(HTMLNode htmlNode) {
+        Node node = htmlNode.underlyingNode;
         if (node instanceof TextNode) {
             TextNode textNode = (TextNode) node;
             return textNode.getWholeText();
@@ -67,14 +70,14 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
             if (element.tagName().equals(BR_TAG)) {
                 return "\n";
             }
-            if (element.tagName().equals(UL_TAG)) {
-                return "\n\n";
+            if (isList(element)) {
+                return convertListElement(htmlNode.listNestedLevel);
             }
             if (element.tagName().equals(OL_TAG)) {
                 return "\n\n";
             }
             if (element.tagName().equals(LI_TAG)) {
-                return "\n - ";
+                return "\n" + StringUtils.repeat(" ", htmlNode.listNestedLevel) + "- ";
             }
             if (element.tagName().equals(P_TAG)) {
                 return "\n\n";
@@ -86,21 +89,47 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
         return "";
     }
 
-    Stream<Node> flatten(Node base) {
+    private String convertListElement(int nestedLevel) {
+        if (nestedLevel == 0) {
+            return "\n\n";
+        } else {
+            return "";
+        }
+    }
+
+    Stream<HTMLNode> flatten(Node base, int listNestedLevel) {
         Position position = getPosition(base);
-        Stream<Node> flatChildren = base.childNodes()
+        int nextElementLevel = getNewNestedLevel(listNestedLevel, base);
+
+        Stream<HTMLNode> baseStream = Stream.of(new HTMLNode(base, listNestedLevel));
+        Stream<HTMLNode> flatChildren = base.childNodes()
             .stream()
-            .flatMap(this::flatten);
+            .flatMap(node -> flatten(node, nextElementLevel));
+        
         switch (position) {
             case PREFIX:
-                return Stream.concat(Stream.of(base), flatChildren);
+                return Stream.concat(baseStream, flatChildren);
             case SUFFIX:
-                return Stream.concat(flatChildren, Stream.of(base));
+                return Stream.concat(flatChildren, baseStream);
             default:
                 throw new RuntimeException("Unexpected POSITION for node element: " + position);
         }
     }
 
+    private int getNewNestedLevel(int listNestedLevel, Node node) {
+        if (node instanceof Element) {
+            Element element = (Element) node;
+            if (isList(element)) {
+                return listNestedLevel + 1;
+            }
+        }
+        return listNestedLevel;
+    }
+
+    private boolean isList(Element element) {
+        return element.tagName().equals(UL_TAG) || element.tagName().equals(OL_TAG);
+    }
+
     private enum Position {
         PREFIX,
         SUFFIX
@@ -116,4 +145,14 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
         return Position.SUFFIX;
     }
 
+    private static class HTMLNode {
+        private final Node underlyingNode;
+        private final int listNestedLevel;
+
+        public HTMLNode(Node underlyingNode, int listNestedLevel) {
+            this.underlyingNode = underlyingNode;
+            this.listNestedLevel = listNestedLevel;
+        }
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/james-project/blob/49fbba73/server/protocols/jmap/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
----------------------------------------------------------------------
diff --git a/server/protocols/jmap/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
b/server/protocols/jmap/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
index 75ba62a..30e858a 100644
--- a/server/protocols/jmap/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
+++ b/server/protocols/jmap/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
@@ -155,6 +155,28 @@ public class JsoupHtmlTextExtractorTest {
     }
 
     @Test
+    public void nestedListsShouldBeWellHandled() {
+        String html = " <ul>" +
+            "  <li>Coffee</li>" +
+            "  <li>Tea" +
+            "    <ul>" +
+            "      <li>Black tea</li>" +
+            "      <li>Green tea</li>" +
+            "    </ul>" +
+            "  </li>" +
+            "  <li>Milk</li>" +
+            "</ul>";
+        String expectedPlainText = "  \n" +
+            " - Coffee  \n" +
+            " - Tea          \n" +
+            "  - Black tea      \n" +
+            "  - Green tea        \n" +
+            " - Milk\n" +
+            "\n";
+        assertThat(textExtractor.toPlainText(html)).isEqualTo(expectedPlainText);
+    }
+
+    @Test
     public void nonClosedHtmlShouldBeTranslated() {
         String html = "This is an <b>HTML text !";
         String expectedPlainText = "This is an HTML text !";


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org


Mime
View raw message