[ https://issues.apache.org/jira/browse/TIKA-2899?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16877776#comment-16877776
]
Pandurang commented on TIKA-2899:
---------------------------------
See Below error for .rtf file
<?xml version="1.0" encoding="UTF-8"?>
<response>
<lst name="responseHeader">[^ABC_PL_WI.rtf]
<int name="status">500</int>
<int name="QTime">7</int>
</lst>
<lst name="error">
<lst name="metadata">
<str name="error-class">org.apache.solr.common.SolrException</str>
<str name="root-error-class">java.lang.IllegalStateException</str>
</lst>
<str name="msg">org.apache.tika.exception.TikaException: Unexpected RuntimeException
from org.apache.tika.parser.rtf.RTFParser@43d2b389</str>
<str name="trace">org.apache.solr.common.SolrException: org.apache.tika.exception.TikaException:
Unexpected RuntimeException from org.apache.tika.parser.rtf.RTFParser@43d2b389
at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:234)
at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:199)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2559)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:516)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:394)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:340)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1602)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:540)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:146)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1588)
at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1345)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1557)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:220)
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.Server.handle(Server.java:502)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:364)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:305)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103)
at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:765)
at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:683)
at java.base/java.lang.Thread.run(Thread.java:835)
Caused by: org.apache.tika.exception.TikaException: Unexpected RuntimeException from org.apache.tika.parser.rtf.RTFParser@43d2b389
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:282)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
... 41 more
Caused by: java.lang.IllegalStateException: Internal: Internal error: element state is zero.
at org.apache.xml.serialize.BaseMarkupSerializer.leaveElementState(Unknown Source)
at org.apache.xml.serialize.TextSerializer.endElementIO(Unknown Source)
at org.apache.xml.serialize.TextSerializer.endElement(Unknown Source)
at org.apache.xml.serialize.TextSerializer.endElement(Unknown Source)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.SecureContentHandler.endElement(SecureContentHandler.java:256)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:136)
at org.apache.tika.sax.SafeContentHandler.endElement(SafeContentHandler.java:274)
at org.apache.tika.sax.XHTMLContentHandler.endElement(XHTMLContentHandler.java:271)
at org.apache.tika.sax.XHTMLContentHandler.endElement(XHTMLContentHandler.java:306)
at org.apache.tika.parser.rtf.TextExtractor.endList(TextExtractor.java:989)
at org.apache.tika.parser.rtf.TextExtractor.lazyStartParagraph(TextExtractor.java:583)
at org.apache.tika.parser.rtf.TextExtractor.processControlWord(TextExtractor.java:1106)
at org.apache.tika.parser.rtf.TextExtractor.parseControlWord(TextExtractor.java:567)
at org.apache.tika.parser.rtf.TextExtractor.parseControlToken(TextExtractor.java:491)
at org.apache.tika.parser.rtf.TextExtractor.extract(TextExtractor.java:453)
at org.apache.tika.parser.rtf.TextExtractor.extract(TextExtractor.java:442)
at org.apache.tika.parser.rtf.RTFParser.parse(RTFParser.java:98)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
... 44 more
</str>
<int name="code">500</int>
</lst>
</response>
> org.apache.tika.exception.TikaException: Unexpected RuntimeException from org.apache.tika.parser.rtf.RTFParser@375a26af
> -----------------------------------------------------------------------------------------------------------------------
>
> Key: TIKA-2899
> URL: https://issues.apache.org/jira/browse/TIKA-2899
> Project: Tika
> Issue Type: Bug
> Components: parser
> Affects Versions: 1.19
> Reporter: Pandurang
> Priority: Critical
> Attachments: ABC_PL_WI.rtf
>
>
> I am using Solr 8.0 by using solrnet liabrary we extracting some binary data to text.
In that case we are getting below error.
> Its working fine for 99 % documents but its failing for only 1 % docs
> Caused by: org.apache.tika.exception.TikaException: Unexpected RuntimeException from
org.apache.tika.parser.rtf.RTFParser@375a26af
> at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:282)
> at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
> at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
> at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:228)
> ... 41 more
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
|