manifoldcf-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Ameya Aware <ameya.aw...@gmail.com>
Subject Re: Query about content of the file
Date Tue, 22 Jul 2014 16:03:35 GMT
Hi Karl,

I was getting many TikkaException errors at first, so i ignored them by
setting that field in solrconfig.xml. After that crawling happened smoothly.

But now i ran into java heap space issue. Please see below log.


ERROR - 2014-07-22 11:38:59.370; org.apache.solr.common.SolrException;
null:java.lang.RuntimeException: java.lang.OutOfMemoryError: Java heap space
at
org.apache.solr.servlet.SolrDispatchFilter.sendError(SolrDispatchFilter.java:790)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:439)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
at
org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
at
org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
at
org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
at
org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
at org.eclipse.jetty.server.Server.handle(Server.java:368)
at
org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
at
org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
at
org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(AbstractHttpConnection.java:942)
at
org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.headerComplete(AbstractHttpConnection.java:1004)
at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:636)
at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)
at
org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
at
org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
at
org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.OutOfMemoryError: Java heap space
at org.apache.solr.common.util.JavaBinCodec.writeStr(JavaBinCodec.java:567)
at
org.apache.solr.common.util.JavaBinCodec.writePrimitive(JavaBinCodec.java:646)
at
org.apache.solr.common.util.JavaBinCodec.writeKnownType(JavaBinCodec.java:240)
at org.apache.solr.common.util.JavaBinCodec.writeVal(JavaBinCodec.java:153)
at
org.apache.solr.common.util.JavaBinCodec.writeSolrInputDocument(JavaBinCodec.java:409)
at org.apache.solr.update.TransactionLog.write(TransactionLog.java:353)
at org.apache.solr.update.UpdateLog.add(UpdateLog.java:397)
at org.apache.solr.update.UpdateLog.add(UpdateLog.java:382)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:255)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:160)
at
org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:704)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:858)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:557)
at
org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.doAdd(ExtractingDocumentLoader.java:121)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.addDoc(ExtractingDocumentLoader.java:126)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
at
org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
at
org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
at
org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:241)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:1952)
at
org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:774)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
at
org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
at
org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)

WARN  - 2014-07-22 11:38:59.479; org.eclipse.jetty.servlet.ServletHandler;
Error for /solr/collection1/update/extract
java.lang.OutOfMemoryError: Java heap space
at org.apache.solr.common.util.JavaBinCodec.writeStr(JavaBinCodec.java:567)
at
org.apache.solr.common.util.JavaBinCodec.writePrimitive(JavaBinCodec.java:646)
at
org.apache.solr.common.util.JavaBinCodec.writeKnownType(JavaBinCodec.java:240)
at org.apache.solr.common.util.JavaBinCodec.writeVal(JavaBinCodec.java:153)
at
org.apache.solr.common.util.JavaBinCodec.writeSolrInputDocument(JavaBinCodec.java:409)
at org.apache.solr.update.TransactionLog.write(TransactionLog.java:353)
at org.apache.solr.update.UpdateLog.add(UpdateLog.java:397)
at org.apache.solr.update.UpdateLog.add(UpdateLog.java:382)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:255)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:160)
at
org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:704)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:858)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:557)
at
org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.doAdd(ExtractingDocumentLoader.java:121)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.addDoc(ExtractingDocumentLoader.java:126)
at
org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
at
org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
at
org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
at
org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:241)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:1952)
at
org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:774)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
at
org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
at
org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)


Can you advice me how can i fix this.


Thanks,
Ameya


On Mon, Jul 21, 2014 at 7:11 PM, Karl Wright <daddywri@gmail.com> wrote:

> Hi Ameya,
>
> We've not under the most wild circumstances ever considered the need to
> prevent the actual content of a file from being indexed.
>
> If you are indexing into Solr, and the thing that is failing is content
> extraction (and it is aborting your job), then please be aware there is a
> way in Solr to ignore this error.  Please search this list and you will see
> it posted numerous times.
>
> Karl
>
>
>
> On Mon, Jul 21, 2014 at 10:51 AM, Ameya Aware <ameya.aware@gmail.com>
> wrote:
>
>> Hi
>>
>> How can i not send content of the file to Solr?
>>
>> I do not want the content of the file being sent to Solr and getting
>> indexed because indexing the content is causing lots of errors.
>>
>>
>> Thanks,
>> Ameya
>>
>
>

Mime
View raw message