james-server-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From btell...@apache.org
Subject [6/9] james-project git commit: JAMES-2013 Extract Tika service from James
Date Thu, 08 Jun 2017 02:46:43 GMT
JAMES-2013 Extract Tika service from James


Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/1425048e
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/1425048e
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/1425048e

Branch: refs/heads/master
Commit: 1425048e3be2f8f7c8d9a074d21e1c87fdf7c45e
Parents: ae1eac7
Author: Antoine Duprat <aduprat@linagora.com>
Authored: Tue May 2 16:23:10 2017 +0200
Committer: benwa <btellier@linagora.com>
Committed: Thu Jun 8 09:46:25 2017 +0700

----------------------------------------------------------------------
 mailbox/elasticsearch/pom.xml                   |  12 +
 .../json/IndexableMessageTest.java              |  29 ++-
 .../json/MessageToElasticSearchJsonTest.java    |  19 +-
 mailbox/pom.xml                                 |  22 ++
 .../src/test/resources/eml/nonTextual.json      |   1 -
 mailbox/tika/pom.xml                            |  58 ++++-
 .../james/mailbox/tika/TikaConfiguration.java   |  89 ++++++++
 .../james/mailbox/tika/TikaException.java       |  26 +++
 .../james/mailbox/tika/TikaHttpClient.java      |  26 +++
 .../james/mailbox/tika/TikaHttpClientImpl.java  |  64 ++++++
 .../james/mailbox/tika/TikaTextExtractor.java   | 171 ++++++++++++++
 .../tika/extractor/TikaTextExtractor.java       |  99 --------
 .../james/mailbox/tika/TikaContainer.java       |  73 ++++++
 .../mailbox/tika/TikaTextExtractorTest.java     | 226 +++++++++++++++++++
 .../tika/extractor/TikaTextExtractorTest.java   | 125 ----------
 mpt/impl/smtp/cassandra/pom.xml                 |  12 +
 mpt/pom.xml                                     |   6 +
 protocols/smtp/dependency-reduced-pom.xml       |  18 +-
 server/container/guice/cassandra-guice/pom.xml  |   6 +
 .../apache/james/CassandraJamesServerMain.java  |   2 +
 .../mailbox/ElasticSearchMailboxModule.java     |   5 -
 .../modules/mailbox/TikaMailboxModule.java      |  89 ++++++++
 .../org/apache/james/CassandraJmapTestRule.java |   3 +
 .../org/apache/james/CassandraWithTikaTest.java |  44 ++++
 .../java/org/apache/james/GuiceTikaRule.java    |  53 +++++
 .../modules/CassandraJmapServerModule.java      |   3 +
 .../apache/james/modules/TestTikaModule.java    |  56 +++++
 .../util/streams/SwarmGenericContainer.java     |  18 +-
 server/pom.xml                                  |   6 +
 .../cassandra-jmap-integration-testing/pom.xml  |  12 +
 server/protocols/jmap/pom.xml                   |  17 ++
 31 files changed, 1139 insertions(+), 251 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/elasticsearch/pom.xml
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/pom.xml b/mailbox/elasticsearch/pom.xml
index 2144741..fd5b0fa 100644
--- a/mailbox/elasticsearch/pom.xml
+++ b/mailbox/elasticsearch/pom.xml
@@ -189,6 +189,18 @@
                     <scope>test</scope>
                 </dependency>
                 <dependency>
+                    <groupId>org.apache.james</groupId>
+                    <artifactId>apache-james-mailbox-tika</artifactId>
+                    <type>test-jar</type>
+                    <scope>test</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.james</groupId>
+                    <artifactId>james-server-util-java8</artifactId>
+                    <type>test-jar</type>
+                    <scope>test</scope>
+                </dependency>
+                <dependency>
                     <groupId>com.fasterxml.jackson.core</groupId>
                     <artifactId>jackson-databind</artifactId>
                 </dependency>

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
index 726eb69..84b0ee0 100644
--- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
+++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
@@ -43,7 +43,12 @@ import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.store.mail.model.MailboxMessage;
 import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
 import org.apache.james.mailbox.store.mail.model.impl.SimpleProperty;
-import org.apache.james.mailbox.tika.extractor.TikaTextExtractor;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaContainer;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+import org.apache.james.mailbox.tika.TikaTextExtractor;
+import org.junit.Before;
+import org.junit.ClassRule;
 import org.junit.Test;
 
 import com.google.common.collect.ImmutableList;
@@ -53,6 +58,20 @@ public class IndexableMessageTest {
 
     public static final MessageUid MESSAGE_UID = MessageUid.of(154);
 
+    @ClassRule
+    public static TikaContainer tika = new TikaContainer();
+
+    private TikaTextExtractor textExtractor;
+
+    @Before
+    public void setUp() throws Exception {
+        textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder()
+                .host(tika.getIp())
+                .port(tika.getPort())
+                .timeoutInMillis(tika.getTimeoutInMillis())
+                .build()));
+    }
+
     @Test
     public void textShouldBeEmptyWhenNoMatchingHeaders() throws Exception {
         MailboxMessage mailboxMessage = mock(MailboxMessage.class);
@@ -475,7 +494,7 @@ public class IndexableMessageTest {
         IndexableMessage indexableMessage = IndexableMessage.builder()
                 .message(mailboxMessage)
                 .users(ImmutableList.of(new MockMailboxSession("username").getUser()))
-                .extractor(new TikaTextExtractor())
+                .extractor(textExtractor)
                 .zoneId(ZoneId.of("Europe/Paris"))
                 .indexAttachments(IndexAttachments.YES)
                 .build();
@@ -507,7 +526,7 @@ public class IndexableMessageTest {
         IndexableMessage indexableMessage = IndexableMessage.builder()
                 .message(mailboxMessage)
                 .users(ImmutableList.of(new MockMailboxSession("username").getUser()))
-                .extractor(new TikaTextExtractor())
+                .extractor(textExtractor)
                 .zoneId(ZoneId.of("Europe/Paris"))
                 .indexAttachments(IndexAttachments.YES)
                 .build();
@@ -539,7 +558,7 @@ public class IndexableMessageTest {
         IndexableMessage indexableMessage = IndexableMessage.builder()
                 .message(mailboxMessage)
                 .users(ImmutableList.of(new MockMailboxSession("username").getUser()))
-                .extractor(new TikaTextExtractor())
+                .extractor(textExtractor)
                 .zoneId(ZoneId.of("Europe/Paris"))
                 .indexAttachments(IndexAttachments.YES)
                 .build();
@@ -568,7 +587,7 @@ public class IndexableMessageTest {
         IndexableMessage indexableMessage = IndexableMessage.builder()
                 .message(mailboxMessage)
                 .users(ImmutableList.of(new MockMailboxSession("username").getUser()))
-                .extractor(new TikaTextExtractor())
+                .extractor(textExtractor)
                 .zoneId(ZoneId.of("Europe/Paris"))
                 .indexAttachments(IndexAttachments.YES)
                 .build();

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
index ba27b58..96ae5d8 100644
--- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
+++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
@@ -37,6 +37,7 @@ import org.apache.james.mailbox.FlagsBuilder;
 import org.apache.james.mailbox.MailboxSession.User;
 import org.apache.james.mailbox.MessageUid;
 import org.apache.james.mailbox.elasticsearch.IndexAttachments;
+import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.mock.MockMailboxSession;
 import org.apache.james.mailbox.model.MessageId;
 import org.apache.james.mailbox.model.TestId;
@@ -45,8 +46,12 @@ import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.store.mail.model.MailboxMessage;
 import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
 import org.apache.james.mailbox.store.mail.model.impl.SimpleMailboxMessage;
-import org.apache.james.mailbox.tika.extractor.TikaTextExtractor;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaContainer;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+import org.apache.james.mailbox.tika.TikaTextExtractor;
 import org.junit.Before;
+import org.junit.ClassRule;
 import org.junit.Test;
 
 import com.google.common.base.Charsets;
@@ -63,11 +68,21 @@ public class MessageToElasticSearchJsonTest {
     public static final MessageUid UID = MessageUid.of(25);
     public static final Charset CHARSET = Charsets.UTF_8;
 
+    private TextExtractor textExtractor;
+
     private Date date;
     private PropertyBuilder propertyBuilder;
 
+    @ClassRule
+    public static TikaContainer tika = new TikaContainer();
+
     @Before
     public void setUp() throws Exception {
+        textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder()
+                .host(tika.getIp())
+                .port(tika.getPort())
+                .timeoutInMillis(tika.getTimeoutInMillis())
+                .build()));
         // 2015/06/07 00:00:00 0200 (Paris time zone)
         date = new Date(1433628000000L);
         propertyBuilder = new PropertyBuilder();
@@ -328,7 +343,7 @@ public class MessageToElasticSearchJsonTest {
     @Test
     public void spamEmailShouldBeWellConvertedToJsonWithApacheTika() throws IOException {
         MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(
-            new TikaTextExtractor(),
+            textExtractor,
             ZoneId.of("Europe/Paris"),
             IndexAttachments.YES);
         MailboxMessage spamMail = new SimpleMailboxMessage(MESSAGE_ID, date,

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/pom.xml
----------------------------------------------------------------------
diff --git a/mailbox/pom.xml b/mailbox/pom.xml
index fc7f1c1..a62c85f 100644
--- a/mailbox/pom.xml
+++ b/mailbox/pom.xml
@@ -116,6 +116,11 @@
     <dependencyManagement>
         <dependencies>
             <dependency>
+                <groupId>com.github.steveash.guavate</groupId>
+                <artifactId>guavate</artifactId>
+                <version>1.0.0</version>
+            </dependency>
+            <dependency>
                 <groupId>com.google.guava</groupId>
                 <artifactId>guava</artifactId>
                 <version>${guava.version}</version>
@@ -199,6 +204,12 @@
                 <artifactId>apache-james-mailbox-tika</artifactId>
                 <version>${project.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.apache.james</groupId>
+                <artifactId>apache-james-mailbox-tika</artifactId>
+                <version>${project.version}</version>
+                <type>test-jar</type>
+            </dependency>
 
            <dependency>
                 <groupId>org.apache.james</groupId>
@@ -210,6 +221,17 @@
                 <artifactId>james-server-util</artifactId>
                 <version>${project.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.apache.james</groupId>
+                <artifactId>james-server-util-java8</artifactId>
+                <version>${project.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.james</groupId>
+                <artifactId>james-server-util-java8</artifactId>
+                <version>${project.version}</version>
+                <type>test-jar</type>
+            </dependency>
             <!--
                 END Modules
             -->

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/store/src/test/resources/eml/nonTextual.json
----------------------------------------------------------------------
diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json
index e44e2d8..af831cb 100644
--- a/mailbox/store/src/test/resources/eml/nonTextual.json
+++ b/mailbox/store/src/test/resources/eml/nonTextual.json
@@ -133,7 +133,6 @@
         "meta:table-count":["0"],
         "Creation-Date":["2015-06-18T12:41:25.197399866"],
         "xmpTPg:NPages":["1"],
-        "resourceName":["toto.odt"],
         "Character Count":["47"],
         "editing-cycles":["2"],
         "Page-Count":["1"],

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/pom.xml
----------------------------------------------------------------------
diff --git a/mailbox/tika/pom.xml b/mailbox/tika/pom.xml
index 836b70f..ae861d8 100644
--- a/mailbox/tika/pom.xml
+++ b/mailbox/tika/pom.xml
@@ -148,29 +148,52 @@
             <activation>
                 <jdk>[1.8,)</jdk>
             </activation>
-	        <dependencies>
+            <dependencies>
                 <dependency>
                     <groupId>org.apache.james</groupId>
                     <artifactId>apache-james-mailbox-store</artifactId>
                 </dependency>
                 <dependency>
+                    <groupId>org.apache.james</groupId>
+                    <artifactId>james-server-util-java8</artifactId>
+                    <type>test-jar</type>
+                    <scope>test</scope>
+                </dependency>
+                <dependency>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </dependency>
+                <dependency>
+                    <groupId>com.fasterxml.jackson.datatype</groupId>
+                    <artifactId>jackson-datatype-guava</artifactId>
+                </dependency>
+                <dependency>
+                    <groupId>com.github.steveash.guavate</groupId>
+                    <artifactId>guavate</artifactId>
+                </dependency>
+                <dependency>
                     <groupId>com.google.guava</groupId>
                     <artifactId>guava</artifactId>
                 </dependency>
                 <dependency>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-core</artifactId>
-                    <version>1.12</version>
+                    <groupId>com.jayway.awaitility</groupId>
+                    <artifactId>awaitility</artifactId>
+                    <version>1.6.3</version>
+                    <scope>test</scope>
                 </dependency>
                 <dependency>
-                    <groupId>org.apache.tika</groupId>
-                    <artifactId>tika-parsers</artifactId>
-                    <version>1.12</version>
+                    <groupId>commons-configuration</groupId>
+                    <artifactId>commons-configuration</artifactId>
                 </dependency>
                 <dependency>
-                    <groupId>junit</groupId>
-                    <artifactId>junit</artifactId>
-                    <scope>test</scope>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-lang3</artifactId>
+                    <version>3.3.2</version>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.httpcomponents</groupId>
+                    <artifactId>fluent-hc</artifactId>
+                    <version>4.5.3</version>
                 </dependency>
                 <dependency>
                     <groupId>org.assertj</groupId>
@@ -178,6 +201,21 @@
                     <version>${assertj-3.version}</version>
                     <scope>test</scope>
                 </dependency>
+                <dependency>
+                    <groupId>org.mockito</groupId>
+                    <artifactId>mockito-core</artifactId>
+                    <scope>test</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.testcontainers</groupId>
+                    <artifactId>testcontainers</artifactId>
+                    <version>1.1.7</version>
+                </dependency>
+                <dependency>
+                    <groupId>junit</groupId>
+                    <artifactId>junit</artifactId>
+                    <scope>test</scope>
+                </dependency>
             </dependencies>
             <build>
                 <plugins>

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
new file mode 100644
index 0000000..1915fb6
--- /dev/null
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java
@@ -0,0 +1,89 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika;
+
+import java.util.Optional;
+
+import com.google.common.base.Preconditions;
+
+public class TikaConfiguration {
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public static class Builder {
+
+        private Optional<String> host;
+        private Optional<Integer> port;
+        private Optional<Integer> timeoutInMillis;
+
+        private Builder() {
+            host = Optional.empty();
+            port = Optional.empty();
+            timeoutInMillis = Optional.empty();
+        }
+
+        public Builder host(String host) {
+            this.host = Optional.ofNullable(host);
+            return this;
+        }
+
+        public Builder port(int port) {
+            this.port = Optional.of(port);
+            return this;
+        }
+
+        public Builder timeoutInMillis(int timeoutInMillis) {
+            this.timeoutInMillis = Optional.of(timeoutInMillis);
+            return this;
+        }
+
+        public TikaConfiguration build() {
+            Preconditions.checkState(host.isPresent(), "'host' is mandatory");
+            Preconditions.checkState(port.isPresent(), "'port' is mandatory");
+            Preconditions.checkState(timeoutInMillis.isPresent(), "'timeoutInMillis' is mandatory");
+
+            return new TikaConfiguration(host.get(), port.get(), timeoutInMillis.get());
+        }
+    }
+
+    private final String host;
+    private final int port;
+    private final int timeoutInMillis;
+
+    private TikaConfiguration(String host, int port, int timeoutInMillis) {
+        this.host = host;
+        this.port = port;
+        this.timeoutInMillis = timeoutInMillis;
+    }
+
+    public String getHost() {
+        return host;
+    }
+
+    public int getPort() {
+        return port;
+    }
+
+    public int getTimeoutInMillis() {
+        return timeoutInMillis;
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaException.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaException.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaException.java
new file mode 100644
index 0000000..ecdc742
--- /dev/null
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaException.java
@@ -0,0 +1,26 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+package org.apache.james.mailbox.tika;
+
+public class TikaException extends RuntimeException {
+
+    public TikaException(Exception exception) {
+        super(exception);
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClient.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClient.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClient.java
new file mode 100644
index 0000000..c00ab77
--- /dev/null
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClient.java
@@ -0,0 +1,26 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+package org.apache.james.mailbox.tika;
+
+import java.io.InputStream;
+
+public interface TikaHttpClient {
+
+    InputStream rmetaAsJson(InputStream inputStream, String contentType) throws TikaException;
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java
new file mode 100644
index 0000000..45a66c6
--- /dev/null
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java
@@ -0,0 +1,64 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+package org.apache.james.mailbox.tika;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.http.client.fluent.Request;
+import org.apache.http.client.utils.URIBuilder;
+import org.apache.http.entity.ContentType;
+
+public class TikaHttpClientImpl implements TikaHttpClient {
+
+    private static final String RMETA_AS_TEXT_ENDPOINT = "/rmeta/text";
+
+    private final TikaConfiguration tikaConfiguration;
+    private final URI rmeta;
+
+    public TikaHttpClientImpl(TikaConfiguration tikaConfiguration) throws URISyntaxException {
+        this.tikaConfiguration = tikaConfiguration;
+        this.rmeta = buildURI(tikaConfiguration).resolve(RMETA_AS_TEXT_ENDPOINT);
+    }
+
+    private URI buildURI(TikaConfiguration tikaConfiguration) throws URISyntaxException {
+        return new URIBuilder()
+                .setHost(tikaConfiguration.getHost())
+                .setPort(tikaConfiguration.getPort())
+                .setScheme("http")
+                .build();
+    }
+
+    @Override
+    public InputStream rmetaAsJson(InputStream inputStream, String contentType) throws TikaException {
+        try {
+            return Request.Put(rmeta)
+                    .socketTimeout(tikaConfiguration.getTimeoutInMillis())
+                    .bodyStream(inputStream, ContentType.create(contentType))
+                    .execute()
+                    .returnContent()
+                    .asStream();
+        } catch (IOException e) {
+            throw new TikaException(e);
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java
new file mode 100644
index 0000000..04eee79
--- /dev/null
+++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java
@@ -0,0 +1,171 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.function.Predicate;
+
+import javax.inject.Inject;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.james.mailbox.extractor.ParsedContent;
+import org.apache.james.mailbox.extractor.TextExtractor;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.TreeNode;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonDeserializer;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.module.SimpleModule;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.github.steveash.guavate.Guavate;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+public class TikaTextExtractor implements TextExtractor {
+
+    private final TikaHttpClient tikaHttpClient;
+    private final ObjectMapper objectMapper;
+
+    @Inject
+    public TikaTextExtractor(TikaHttpClient tikaHttpClient) {
+        this.tikaHttpClient = tikaHttpClient;
+        this.objectMapper = initializeObjectMapper();
+    }
+
+    private ObjectMapper initializeObjectMapper() {
+        ObjectMapper objectMapper = new ObjectMapper();
+        SimpleModule mapModule = new SimpleModule();
+        mapModule.addDeserializer(ContentAndMetadata.class, new ContentAndMetadataDeserializer());
+        objectMapper.registerModule(mapModule);
+        return objectMapper;
+    }
+
+    @Override
+    public ParsedContent extractContent(InputStream inputStream, String contentType, String fileName) throws Exception {
+        ContentAndMetadata contentAndMetadata = convert(tikaHttpClient.rmetaAsJson(inputStream, contentType));
+        return new ParsedContent(contentAndMetadata.getContent(), contentAndMetadata.getMetadata());
+    }
+
+    private ContentAndMetadata convert(InputStream json) throws IOException, JsonParseException, JsonMappingException {
+        return objectMapper.readValue(json, ContentAndMetadata.class);
+    }
+
+    @VisibleForTesting
+    static class ContentAndMetadataDeserializer extends JsonDeserializer<ContentAndMetadata> {
+
+        @Override
+        public ContentAndMetadata deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException, JsonProcessingException {
+            TreeNode treeNode = jsonParser.getCodec().readTree(jsonParser);
+            Preconditions.checkState(treeNode.isArray() && treeNode.size() == 1, "The response should have only one element");
+            Preconditions.checkState(treeNode.get(0).isObject(), "The element should be a Json object");
+            ObjectNode node = (ObjectNode) treeNode.get(0);
+            return ContentAndMetadata.from(ImmutableList.copyOf(node.fields())
+                .stream()
+                .collect(Guavate.toImmutableMap(Entry::getKey, entry -> asListOfString(entry.getValue()))));
+        }
+
+        @VisibleForTesting List<String> asListOfString(JsonNode jsonNode) {
+            if (jsonNode.isArray()) {
+                return ImmutableList.copyOf(jsonNode.elements()).stream()
+                    .map(JsonNode::asText)
+                    .collect(Guavate.toImmutableList());
+            }
+            return ImmutableList.of(jsonNode.asText());
+        }
+
+    }
+
+    private static class ContentAndMetadata {
+
+        private static final String TIKA_HEADER = "X-TIKA";
+        private static final String CONTENT_METADATA_HEADER_NAME = TIKA_HEADER + ":content";
+
+        public static ContentAndMetadata from(Map<String, List<String>> contentAndMetadataMap) {
+            return new ContentAndMetadata(content(contentAndMetadataMap),
+                    contentAndMetadataMap.entrySet().stream()
+                        .filter(allHeadersButTika())
+                        .collect(Guavate.toImmutableMap(Entry::getKey, Entry::getValue)));
+        }
+
+        private static Predicate<? super Entry<String, List<String>>> allHeadersButTika() {
+            return entry -> !entry.getKey().startsWith(TIKA_HEADER);
+        }
+
+        private static String content(Map<String, List<String>> contentAndMetadataMap) {
+            List<String> content = contentAndMetadataMap.get(CONTENT_METADATA_HEADER_NAME);
+            if (content == null) {
+                return null;
+            }
+            String onlySpaces = null;
+            return StringUtils.stripStart(content.get(0), onlySpaces);
+        }
+
+        private final String content;
+        private final Map<String, List<String>> metadata;
+
+        private ContentAndMetadata(String content, Map<String, List<String>> metadata) {
+            this.content = content;
+            this.metadata = metadata;
+        }
+
+        public String getContent() {
+            return content;
+        }
+
+        public Map<String, List<String>> getMetadata() {
+            return metadata;
+        }
+
+        @Override
+        public final boolean equals(Object o) {
+            if (o instanceof ContentAndMetadata) {
+                ContentAndMetadata other = (ContentAndMetadata) o;
+                return Objects.equals(content, other.content)
+                    && Objects.equals(metadata, other.metadata);
+            }
+            return false;
+        }
+
+        @Override
+        public final int hashCode() {
+            return Objects.hash(content, metadata);
+        }
+
+        @Override
+        public String toString() {
+            return MoreObjects.toStringHelper(this)
+                .add("content", content)
+                .add("metadata", metadata)
+                .toString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java
deleted file mode 100644
index d4a376e..0000000
--- a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/****************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- ****************************************************************/
-
-package org.apache.james.mailbox.tika.extractor;
-
-import java.io.InputStream;
-import java.io.StringWriter;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.james.mailbox.extractor.ParsedContent;
-import org.apache.james.mailbox.extractor.TextExtractor;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-
-import com.google.common.collect.ImmutableList;
-
-public class TikaTextExtractor implements TextExtractor {
-
-    private static class MetadataEntry {
-
-        private final String name;
-        private final ImmutableList<String> entries;
-
-        public MetadataEntry(String name, List<String> entries) {
-            this.name = name;
-            this.entries = ImmutableList.copyOf(entries);
-        }
-
-        public String getName() {
-            return name;
-        }
-
-        public List<String> getEntries() {
-            return entries;
-        }
-    }
-
-    private final Parser parser;
-    
-    public TikaTextExtractor() {
-        parser = new AutoDetectParser();
-    }
-
-    public ParsedContent extractContent(InputStream inputStream, String contentType, String fileName) throws Exception {
-        Metadata metadata = createInitializedMetadata(contentType, fileName);
-
-        StringWriter stringWriter = new StringWriter();
-        BodyContentHandler bodyContentHandler = new BodyContentHandler(stringWriter);
-        parser.parse(inputStream, bodyContentHandler, metadata, new ParseContext());
-
-        return new ParsedContent(stringWriter.toString(), convertMetadataToMultimap(metadata));
-    }
-
-    private Metadata createInitializedMetadata(String contentType, String fileName) {
-        Metadata metadata = new Metadata();
-        if (contentType != null) {
-            metadata.set(Metadata.CONTENT_TYPE, contentType);
-        }
-        if (fileName != null) {
-            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
-        }
-        return metadata;
-    }
-
-    private Map<String, List<String>> convertMetadataToMultimap(Metadata metadata) {
-        return Arrays.stream(metadata.names())
-            .map(name -> new MetadataEntry(name, Arrays.asList(metadata.getValues(name))))
-            .reduce(new HashMap<>(), (metadataMultiMap, metadataEntry) -> {
-                    metadataMultiMap.put(metadataEntry.getName(), metadataEntry.getEntries());
-                    return metadataMultiMap;
-                }, (metadataMultimap1, metadataMultimap2) -> {
-                    metadataMultimap1.putAll(metadataMultimap2);
-                    return metadataMultimap1;
-                });
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java
new file mode 100644
index 0000000..3a2cf43
--- /dev/null
+++ b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java
@@ -0,0 +1,73 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+package org.apache.james.mailbox.tika;
+
+import java.time.Duration;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.james.util.streams.SwarmGenericContainer;
+import org.junit.rules.ExternalResource;
+import org.testcontainers.containers.wait.Wait;
+
+import com.google.common.primitives.Ints;
+
+public class TikaContainer extends ExternalResource {
+    
+    private static final int DEFAULT_TIKA_PORT = 9998;
+    private static final int DEFAULT_TIMEOUT_IN_MS = Ints.checkedCast(TimeUnit.MINUTES.toMillis(3));
+
+    private final SwarmGenericContainer tika;
+
+    public TikaContainer() {
+        tika = new SwarmGenericContainer("logicalspark/docker-tikaserver:latest")
+                .withExposedPorts(DEFAULT_TIKA_PORT)
+                .waitingFor(Wait.forHttp("/tika"))
+                .withStartupTimeout(Duration.ofSeconds(30));
+    }
+
+    @Override
+    protected void before() throws Throwable {
+        start();
+    }
+
+    public void start() throws Exception {
+        tika.start();
+    }
+
+    @Override
+    protected void after() {
+        stop();
+    }
+
+    public void stop() {
+        tika.stop();
+    }
+
+    public String getIp() {
+        return tika.getIp();
+    }
+
+    public int getPort() {
+        return DEFAULT_TIKA_PORT;
+    }
+
+    public int getTimeoutInMillis() {
+        return DEFAULT_TIMEOUT_IN_MS;
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java
new file mode 100644
index 0000000..24524a1
--- /dev/null
+++ b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java
@@ -0,0 +1,226 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.tika.TikaTextExtractor.ContentAndMetadataDeserializer;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.JsonNodeType;
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableList;
+
+public class TikaTextExtractorTest {
+
+    private TextExtractor textExtractor;
+
+    @Rule
+    public ExpectedException expectedException = ExpectedException.none();
+
+    @ClassRule
+    public static TikaContainer tika = new TikaContainer();
+
+    @Before
+    public void setUp() throws Exception {
+        textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder()
+                .host(tika.getIp())
+                .port(tika.getPort())
+                .timeoutInMillis(tika.getTimeoutInMillis())
+                .build()));
+    }
+
+    @Test
+    public void textualContentShouldReturnNullWhenInputStreamIsEmpty() throws Exception {
+        assertThat(textExtractor.extractContent(IOUtils.toInputStream(""), "text/plain", "Text.txt").getTextualContent())
+            .isNull();
+    }
+
+    @Test
+    public void textTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/Text.txt");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "text/plain", "Text.txt").getTextualContent())
+            .isEqualTo("This is some awesome text text.\n\n\n");
+    }
+
+    @Test
+    public void textMicrosoftWorldTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/writter.docx");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "writter.docx").getTextualContent())
+            .isEqualTo("This is an awesome document on libroffice writter !\n");
+    }
+
+    @Test
+    public void textOdtTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/writter.odt");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.text", "writter.odt").getTextualContent())
+            .isEqualTo("This is an awesome document on libroffice writter !\n");
+    }
+
+    @Test
+    public void documentWithBadDeclaredMetadataShouldBeWellHandled() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/fake.txt");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.text", "writter.odt").getTextualContent())
+            .isEqualTo("This is an awesome document on libroffice writter !\n");
+    }
+    
+    @Test
+    public void slidePowerPointTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/slides.pptx");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/vnd.openxmlformats-officedocument.presentationml.presentation", "slides.pptx").getTextualContent())
+            .isEqualTo("James is awesome\nIt manages attachments so well !\n\n\n");
+    }
+
+    @Test
+    public void slideOdpTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/slides.odp");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.presentation", "slides.odp").getTextualContent())
+            .isEqualTo("James is awesome\n\nIt manages attachments so well !\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n");
+    }
+    
+    @Test
+    public void pdfTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/PDF.pdf");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/pdf", "PDF.pdf").getTextualContent())
+            .isEqualTo("This is an awesome document on libroffice writter !\n\n\n");
+    }
+    
+    @Test
+    public void odsTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/calc.ods");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.spreadsheet", "calc.ods").getTextualContent())
+            .isEqualTo("This is an aesome LibreOffice document !\n" +
+                "\n" +
+                "\n" +
+                "???\n" +
+                "Page \n" +
+                "??? (???)\n" +
+                "00/00/0000, 00:00:00\n" +
+                "Page  / \n");
+    }
+    
+    @Test
+    public void excelTest() throws Exception {
+        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/calc.xlsx");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "calc.xlsx").getTextualContent())
+            .isEqualTo("Feuille1\n" +
+                "\tThis is an aesome LibreOffice document !\n" +
+                "\n" +
+                "&A\t\n" +
+                "\n" +
+                "Page &P\t\n" +
+                "\n" +
+                "\n");
+    }
+
+    @Test
+    public void deserializerShouldThrowWhenMoreThanOneNode() throws Exception {
+        expectedException.expect(IllegalStateException.class);
+        expectedException.expectMessage("The response should have only one element");
+
+        TikaTextExtractor textExtractor = new TikaTextExtractor(new TikaHttpClient() {
+            
+            @Override
+            public InputStream rmetaAsJson(InputStream inputStream, String contentType) throws TikaException {
+                return new ByteArrayInputStream("[{\"key1\":\"value1\"},{\"key2\":\"value2\"}]".getBytes(Charsets.UTF_8));
+            }
+        });
+
+        InputStream inputStream = null;
+        textExtractor.extractContent(inputStream, "text/plain", "fake.txt");
+    }
+
+    @Test
+    public void deserializerShouldThrowWhenNodeIsNotAnObject() throws Exception {
+        expectedException.expect(IllegalStateException.class);
+        expectedException.expectMessage("The element should be a Json object");
+
+        TikaTextExtractor textExtractor = new TikaTextExtractor(new TikaHttpClient() {
+            
+            @Override
+            public InputStream rmetaAsJson(InputStream inputStream, String contentType) throws TikaException {
+                return new ByteArrayInputStream("[\"value1\"]".getBytes(Charsets.UTF_8));
+            }
+        });
+
+        InputStream inputStream = null;
+        textExtractor.extractContent(inputStream, "text/plain", "fake.txt");
+    }
+
+    @Test
+    public void asListOfStringShouldReturnASingletonWhenOneElement() {
+        JsonNode jsonNode = mock(JsonNode.class);
+        when(jsonNode.getNodeType())
+            .thenReturn(JsonNodeType.STRING);
+        String expectedContent = "text";
+        when(jsonNode.asText())
+            .thenReturn(expectedContent);
+        
+        ContentAndMetadataDeserializer deserializer = new TikaTextExtractor.ContentAndMetadataDeserializer();
+        List<String> listOfString = deserializer.asListOfString(jsonNode);
+        
+        assertThat(listOfString).containsOnly(expectedContent);
+    }
+
+    @Test
+    public void asListOfStringShouldReturnAListWhenMultipleElements() {
+        JsonNode mainNode = mock(JsonNode.class);
+        when(mainNode.getNodeType())
+            .thenReturn(JsonNodeType.ARRAY);
+        JsonNode firstNode = mock(JsonNode.class);
+        when(firstNode.asText())
+            .thenReturn("first");
+        JsonNode secondNode = mock(JsonNode.class);
+        when(secondNode.asText())
+            .thenReturn("second");
+        JsonNode thirdNode = mock(JsonNode.class);
+        when(thirdNode.asText())
+            .thenReturn("third");
+        when(mainNode.elements())
+            .thenReturn(ImmutableList.of(firstNode, secondNode, thirdNode).iterator());
+        
+        ContentAndMetadataDeserializer deserializer = new TikaTextExtractor.ContentAndMetadataDeserializer();
+        List<String> listOfString = deserializer.asListOfString(mainNode);
+        
+        assertThat(listOfString).containsOnly("first", "second", "third");
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java
----------------------------------------------------------------------
diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java
deleted file mode 100644
index 7a03c93..0000000
--- a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/****************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- ****************************************************************/
-
-package org.apache.james.mailbox.tika.extractor;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-import java.io.InputStream;
-
-import org.apache.james.mailbox.extractor.TextExtractor;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TikaTextExtractorTest {
-    
-    private TextExtractor textExtractor;
-    
-    @Before
-    public void setUp() {
-        textExtractor = new TikaTextExtractor();
-    }
-    
-    @Test
-    public void textTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/Text.txt");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "text/plain", "Text.txt").getTextualContent())
-            .isEqualTo("This is some awesome text text.\n\n\n");
-    }
-
-    @Test
-    public void textMicrosoftWorldTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/writter.docx");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "writter.docx").getTextualContent())
-            .isEqualTo("This is an awesome document on libroffice writter !\n");
-    }
-
-    @Test
-    public void textOdtTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/writter.odt");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.text", "writter.odt").getTextualContent())
-            .isEqualTo("This is an awesome document on libroffice writter !\n");
-    }
-
-    @Test
-    public void documentWithBadDeclaredMetadataShouldBeWellHandled() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/fake.txt");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.text", "writter.odt").getTextualContent())
-            .isEqualTo("This is an awesome document on libroffice writter !\n");
-    }
-    
-    @Test
-    public void slidePowerPointTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/slides.pptx");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/vnd.openxmlformats-officedocument.presentationml.presentation", "slides.pptx").getTextualContent())
-            .isEqualTo("James is awesome\nIt manages attachments so well !\n\n\n");
-    }
-
-    @Test
-    public void slideOdpTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/slides.odp");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.presentation", "slides.odp").getTextualContent())
-            .isEqualTo("James is awesome\n\nIt manages attachments so well !\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n");
-    }
-    
-    @Test
-    public void pdfTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/PDF.pdf");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/pdf", "PDF.pdf").getTextualContent())
-            .isEqualTo("\nThis is an awesome document on libroffice writter !\n\n\n");
-    }
-    
-    @Test
-    public void odsTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/calc.ods");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/vnd.oasis.opendocument.spreadsheet", "calc.ods").getTextualContent())
-            .isEqualTo("\tThis is an aesome LibreOffice document !\n" +
-                "\n" +
-                "\n" +
-                "???\n" +
-                "Page \n" +
-                "??? (???)\n" +
-                "00/00/0000, 00:00:00\n" +
-                "Page  / \n");
-    }
-    
-    @Test
-    public void excelTest() throws Exception {
-        InputStream inputStream = ClassLoader.getSystemResourceAsStream("documents/calc.xlsx");
-        assertThat(inputStream).isNotNull();
-        assertThat(textExtractor.extractContent(inputStream, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "calc.xlsx").getTextualContent())
-            .isEqualTo("Feuille1\n" +
-                "\tThis is an aesome LibreOffice document !\n" +
-                "\n" +
-                "&A\t\n" +
-                "\n" +
-                "Page &P\t\n" +
-                "\n" +
-                "\n");
-    }
-    
-}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mpt/impl/smtp/cassandra/pom.xml
----------------------------------------------------------------------
diff --git a/mpt/impl/smtp/cassandra/pom.xml b/mpt/impl/smtp/cassandra/pom.xml
index 1ac6623..e25ebea 100644
--- a/mpt/impl/smtp/cassandra/pom.xml
+++ b/mpt/impl/smtp/cassandra/pom.xml
@@ -163,6 +163,12 @@
                 </dependency>
                 <dependency>
                     <groupId>org.apache.james</groupId>
+                    <artifactId>apache-james-mailbox-tika</artifactId>
+                    <type>test-jar</type>
+                    <scope>test</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.james</groupId>
                     <artifactId>apache-james-mpt-core</artifactId>
                 </dependency>
                 <dependency>
@@ -196,6 +202,12 @@
                     <type>test-jar</type>
                 </dependency>
                 <dependency>
+                    <groupId>org.apache.james</groupId>
+                    <artifactId>james-server-util-java8</artifactId>
+                    <type>test-jar</type>
+                    <scope>test</scope>
+                </dependency>
+                <dependency>
                     <groupId>com.google.guava</groupId>
                     <artifactId>guava</artifactId>
                     <version>18.0</version>

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/mpt/pom.xml
----------------------------------------------------------------------
diff --git a/mpt/pom.xml b/mpt/pom.xml
index 2ea006a..1f8f6d8 100644
--- a/mpt/pom.xml
+++ b/mpt/pom.xml
@@ -234,6 +234,12 @@
             </dependency>
             <dependency>
                 <groupId>org.apache.james</groupId>
+                <artifactId>apache-james-mailbox-tika</artifactId>
+                <type>test-jar</type>
+                <version>${project.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.james</groupId>
                 <artifactId>james-server-cassandra-guice</artifactId>
                 <version>${project.version}</version>
             </dependency>

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/protocols/smtp/dependency-reduced-pom.xml
----------------------------------------------------------------------
diff --git a/protocols/smtp/dependency-reduced-pom.xml b/protocols/smtp/dependency-reduced-pom.xml
index c926307..51135a9 100644
--- a/protocols/smtp/dependency-reduced-pom.xml
+++ b/protocols/smtp/dependency-reduced-pom.xml
@@ -79,6 +79,12 @@
       <scope>compile</scope>
     </dependency>
     <dependency>
+      <groupId>org.apache.james</groupId>
+      <artifactId>metrics-api</artifactId>
+      <version>3.0.0-beta6-SNAPSHOT</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
       <version>18.0</version>
@@ -97,6 +103,12 @@
       <scope>compile</scope>
     </dependency>
     <dependency>
+      <groupId>javax.inject</groupId>
+      <artifactId>javax.inject</artifactId>
+      <version>1</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <version>4.10</version>
@@ -109,9 +121,9 @@
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>javax.mail</groupId>
-      <artifactId>mail</artifactId>
-      <version>1.4.4</version>
+      <groupId>com.sun.mail</groupId>
+      <artifactId>javax.mail</artifactId>
+      <version>1.5.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/pom.xml
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/pom.xml b/server/container/guice/cassandra-guice/pom.xml
index 647e7e3..448768d 100644
--- a/server/container/guice/cassandra-guice/pom.xml
+++ b/server/container/guice/cassandra-guice/pom.xml
@@ -228,6 +228,12 @@
                     <scope>test</scope>
                 </dependency>
                 <dependency>
+                    <groupId>${project.groupId}</groupId>
+                    <artifactId>apache-james-mailbox-tika</artifactId>
+                    <type>test-jar</type>
+                    <scope>test</scope>
+                </dependency>
+                <dependency>
                     <groupId>org.apache.james</groupId>
                     <artifactId>apache-mailet-icalendar</artifactId>
                     <version>${project.version}</version>

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/main/java/org/apache/james/CassandraJamesServerMain.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/CassandraJamesServerMain.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/CassandraJamesServerMain.java
index 7cd4bb9..e804de1 100644
--- a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/CassandraJamesServerMain.java
+++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/CassandraJamesServerMain.java
@@ -28,6 +28,7 @@ import org.apache.james.modules.data.CassandraUsersRepositoryModule;
 import org.apache.james.modules.mailbox.CassandraMailboxModule;
 import org.apache.james.modules.mailbox.CassandraSessionModule;
 import org.apache.james.modules.mailbox.ElasticSearchMailboxModule;
+import org.apache.james.modules.mailbox.TikaMailboxModule;
 import org.apache.james.modules.protocols.IMAPServerModule;
 import org.apache.james.modules.protocols.JMAPServerModule;
 import org.apache.james.modules.protocols.LMTPServerModule;
@@ -68,6 +69,7 @@ public class CassandraJamesServerMain {
         new CassandraMailboxModule(),
         new CassandraSessionModule(),
         new ElasticSearchMailboxModule(),
+        new TikaMailboxModule(),
         new ActiveMQQueueModule(),
         new ESMetricReporterModule(),
         new MailboxModule());

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/ElasticSearchMailboxModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/ElasticSearchMailboxModule.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/ElasticSearchMailboxModule.java
index 4439411..f567d20 100644
--- a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/ElasticSearchMailboxModule.java
+++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/ElasticSearchMailboxModule.java
@@ -37,10 +37,8 @@ import org.apache.james.mailbox.elasticsearch.IndexAttachments;
 import org.apache.james.mailbox.elasticsearch.MailboxElasticsearchConstants;
 import org.apache.james.mailbox.elasticsearch.MailboxMappingFactory;
 import org.apache.james.mailbox.elasticsearch.events.ElasticSearchListeningMessageSearchIndex;
-import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.store.search.ListeningMessageSearchIndex;
 import org.apache.james.mailbox.store.search.MessageSearchIndex;
-import org.apache.james.mailbox.tika.extractor.TikaTextExtractor;
 import org.apache.james.utils.PropertiesProvider;
 import org.apache.james.utils.RetryExecutorUtil;
 import org.elasticsearch.client.Client;
@@ -75,9 +73,6 @@ public class ElasticSearchMailboxModule extends AbstractModule {
         bind(ElasticSearchListeningMessageSearchIndex.class).in(Scopes.SINGLETON);
         bind(MessageSearchIndex.class).to(ElasticSearchListeningMessageSearchIndex.class);
         bind(ListeningMessageSearchIndex.class).to(ElasticSearchListeningMessageSearchIndex.class);
-
-        bind(TikaTextExtractor.class).in(Scopes.SINGLETON);
-        bind(TextExtractor.class).to(TikaTextExtractor.class);
     }
 
     @Provides

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
new file mode 100644
index 0000000..c9cc6b8
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaMailboxModule.java
@@ -0,0 +1,89 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.modules.mailbox;
+
+import java.io.FileNotFoundException;
+import java.net.URISyntaxException;
+import java.util.concurrent.TimeUnit;
+
+import javax.inject.Singleton;
+
+import org.apache.commons.configuration.ConfigurationException;
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaHttpClient;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+import org.apache.james.mailbox.tika.TikaTextExtractor;
+import org.apache.james.utils.PropertiesProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.primitives.Ints;
+import com.google.inject.AbstractModule;
+import com.google.inject.Provides;
+import com.google.inject.Scopes;
+
+public class TikaMailboxModule extends AbstractModule {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(TikaMailboxModule.class);
+
+    private static final String TIKA_CONFIGURATION_NAME = "tika";
+    private static final String TIKA_HOST = "tika.host";
+    private static final String TIKA_PORT = "tika.port";
+    private static final String TIKA_TIMEOUT_IN_MS = "tika.timeoutInMillis";
+
+    private static final String DEFAULT_HOST = "127.0.0.1";
+    private static final int DEFAULT_PORT = 9998;
+    private static final int DEFAULT_TIMEOUT_IN_MS = Ints.checkedCast(TimeUnit.SECONDS.toMillis(30));
+
+    @Override
+    protected void configure() {
+        bind(TikaTextExtractor.class).in(Scopes.SINGLETON);
+        bind(TextExtractor.class).to(TikaTextExtractor.class);
+    }
+
+    @Provides
+    @Singleton
+    protected TikaHttpClient provideTikaHttpClient(TikaConfiguration tikaConfiguration) throws URISyntaxException {
+        return new TikaHttpClientImpl(tikaConfiguration);
+    }
+
+    @Provides
+    @Singleton
+    private TikaConfiguration getTikaConfiguration(PropertiesProvider propertiesProvider) throws ConfigurationException {
+        try {
+            PropertiesConfiguration configuration = propertiesProvider.getConfiguration(TIKA_CONFIGURATION_NAME);
+            return TikaConfiguration.builder()
+                    .host(configuration.getString(TIKA_HOST, DEFAULT_HOST))
+                    .port(configuration.getInt(TIKA_PORT, DEFAULT_PORT))
+                    .timeoutInMillis(configuration.getInt(TIKA_TIMEOUT_IN_MS, DEFAULT_TIMEOUT_IN_MS))
+                    .build();
+        } catch (FileNotFoundException e) {
+            LOGGER.warn("Could not find {} configuration file. Using {}:{} as contact point", TIKA_CONFIGURATION_NAME, DEFAULT_HOST, DEFAULT_PORT);
+            return TikaConfiguration.builder()
+                    .host(DEFAULT_HOST)
+                    .port(DEFAULT_PORT)
+                    .timeoutInMillis(DEFAULT_TIMEOUT_IN_MS)
+                    .build();
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraJmapTestRule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraJmapTestRule.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraJmapTestRule.java
index 8289b3c..d57d171 100644
--- a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraJmapTestRule.java
+++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraJmapTestRule.java
@@ -18,6 +18,8 @@
  ****************************************************************/
 
 package org.apache.james;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.modules.TestESMetricReporterModule;
 import org.apache.james.modules.TestJMAPServerModule;
 import org.junit.rules.TestRule;
@@ -48,6 +50,7 @@ public class CassandraJmapTestRule implements TestRule {
     public GuiceJamesServer jmapServer(Module... additionals) {
         return new GuiceJamesServer()
             .combineWith(CassandraJamesServerMain.cassandraServerModule, CassandraJamesServerMain.protocols)
+            .overrideWith(binder -> binder.bind(TextExtractor.class).to(DefaultTextExtractor.class))
             .overrideWith(new TestJMAPServerModule(LIMIT_TO_3_MESSAGES))
             .overrideWith(new TestESMetricReporterModule())
             .overrideWith(guiceModuleTestRule.getModule())

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraWithTikaTest.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraWithTikaTest.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraWithTikaTest.java
new file mode 100644
index 0000000..5894933
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/CassandraWithTikaTest.java
@@ -0,0 +1,44 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james;
+
+import org.junit.Rule;
+
+public class CassandraWithTikaTest extends AbstractJmapJamesServerTest {
+
+    private final GuiceTikaRule guiceTikaRule = new GuiceTikaRule();
+
+    @Rule
+    public CassandraJmapTestRule cassandraJmap = new CassandraJmapTestRule(
+        AggregateGuiceModuleTestRule.of(
+            new EmbeddedElasticSearchRule(),
+            new EmbeddedCassandraRule(),
+            guiceTikaRule));
+
+    @Override
+    protected GuiceJamesServer createJamesServer() {
+        return cassandraJmap.jmapServer(binder -> guiceTikaRule.getModule());
+    }
+
+    @Override
+    protected void clean() {
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/test/java/org/apache/james/GuiceTikaRule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/GuiceTikaRule.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/GuiceTikaRule.java
new file mode 100644
index 0000000..b14086f
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/GuiceTikaRule.java
@@ -0,0 +1,53 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james;
+import org.apache.james.mailbox.tika.TikaContainer;
+import org.apache.james.modules.TestTikaModule;
+import org.junit.runner.Description;
+import org.junit.runners.model.Statement;
+
+import com.google.inject.Module;
+
+
+public class GuiceTikaRule implements GuiceModuleTestRule {
+
+    private TikaContainer tika;
+
+    @Override
+    public Statement apply(Statement base, Description description) {
+        return new Statement() {
+            @Override
+            public void evaluate() throws Throwable {
+                tika = new TikaContainer();
+                tika.start();
+                base.evaluate();
+            }
+        };
+    }
+
+    @Override
+    public void await() {
+    }
+
+    @Override
+    public Module getModule() {
+        return new TestTikaModule(tika);
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/CassandraJmapServerModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/CassandraJmapServerModule.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/CassandraJmapServerModule.java
index 140f0a7..5a61948 100644
--- a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/CassandraJmapServerModule.java
+++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/CassandraJmapServerModule.java
@@ -27,6 +27,8 @@ import javax.inject.Singleton;
 import org.apache.james.backends.cassandra.CassandraCluster;
 import org.apache.james.backends.cassandra.EmbeddedCassandra;
 import org.apache.james.backends.es.EmbeddedElasticSearch;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.junit.rules.TemporaryFolder;
 
 import com.datastax.driver.core.Session;
@@ -56,6 +58,7 @@ public class CassandraJmapServerModule extends AbstractModule {
         install(new TestElasticSearchModule(embeddedElasticSearch));
         install(new TestFilesystemModule(fileSupplier));
         install(new TestJMAPServerModule(LIMIT_TO_3_MESSAGES));
+        install(binder -> binder.bind(TextExtractor.class).to(DefaultTextExtractor.class));
         bind(EmbeddedCassandra.class).toInstance(cassandra);
     }
     

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/TestTikaModule.java
----------------------------------------------------------------------
diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/TestTikaModule.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/TestTikaModule.java
new file mode 100644
index 0000000..3985e2a
--- /dev/null
+++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/TestTikaModule.java
@@ -0,0 +1,56 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.modules;
+
+import java.net.URISyntaxException;
+
+import javax.inject.Singleton;
+
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaContainer;
+import org.apache.james.mailbox.tika.TikaHttpClient;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+
+import com.google.inject.AbstractModule;
+import com.google.inject.Provides;
+
+public class TestTikaModule extends AbstractModule{
+
+    private final TikaContainer tika;
+
+    public TestTikaModule(TikaContainer tika) {
+        this.tika = tika;
+    }
+
+    @Override
+    protected void configure() {
+
+    }
+
+    @Provides
+    @Singleton
+    protected TikaHttpClient provideTikaHttpClient() throws URISyntaxException {
+        return new TikaHttpClientImpl(TikaConfiguration.builder()
+                .host(tika.getIp())
+                .port(tika.getPort())
+                .timeoutInMillis(tika.getTimeoutInMillis())
+                .build());
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/1425048e/server/container/util-java8/src/test/java/org/apache/james/util/streams/SwarmGenericContainer.java
----------------------------------------------------------------------
diff --git a/server/container/util-java8/src/test/java/org/apache/james/util/streams/SwarmGenericContainer.java b/server/container/util-java8/src/test/java/org/apache/james/util/streams/SwarmGenericContainer.java
index 4e29371..9174772 100644
--- a/server/container/util-java8/src/test/java/org/apache/james/util/streams/SwarmGenericContainer.java
+++ b/server/container/util-java8/src/test/java/org/apache/james/util/streams/SwarmGenericContainer.java
@@ -19,6 +19,7 @@
 
 package org.apache.james.util.streams;
 
+import java.time.Duration;
 import java.util.List;
 
 import org.junit.Assume;
@@ -28,6 +29,7 @@ import org.junit.runners.model.Statement;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.containers.wait.WaitStrategy;
 import org.testcontainers.images.builder.ImageFromDockerfile;
 import org.testcontainers.shaded.com.github.dockerjava.api.command.InspectContainerResponse;
 
@@ -56,7 +58,6 @@ public class SwarmGenericContainer implements TestRule {
             logAndCheckSkipTest(e);
         }
     }
-
     private void logAndCheckSkipTest(IllegalStateException e) {
         LOGGER.error("Cannot initial a docker container because: " + e);
         if (e.getMessage().startsWith(NO_DOCKER_ENVIRONMENT)) {
@@ -81,6 +82,21 @@ public class SwarmGenericContainer implements TestRule {
         return this;
     }
 
+    public SwarmGenericContainer withExposedPorts(Integer... ports) {
+        container.withExposedPorts(ports);
+        return this;
+    }
+
+    public SwarmGenericContainer waitingFor(WaitStrategy waitStrategy) {
+        container.waitingFor(waitStrategy);
+        return this;
+    }
+
+    public SwarmGenericContainer withStartupTimeout(Duration startupTimeout) {
+        container.withStartupTimeout(startupTimeout);
+        return this;
+    }
+
     public void start() {
         container.start();
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org


Mime
View raw message