[ https://issues.apache.org/jira/browse/TIKA-691?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13084853#comment-13084853 ] Nick Burch commented on TIKA-691: --------------------------------- Can you try switching the version of POI you use in Tika to be a recent nightly build, and see if that fixes it? Sergey has been doing some amazing work very recently on HWPF which may well have solved this issue > java.lang.ArrayIndexOutOfBoundsException by MS Word CDF V2 Document > ------------------------------------------------------------------- > > Key: TIKA-691 > URL: https://issues.apache.org/jira/browse/TIKA-691 > Project: Tika > Issue Type: Bug > Components: parser > Affects Versions: 1.0 > Environment: Ubuntu 11.10 > java version "1.6.0_22" > OpenJDK Runtime Environment (IcedTea6 1.10.2) (6b22-1.10.2-0ubuntu1~11.04.1) > Reporter: Eddie Verkhoturov > Fix For: 1.0 > > Attachments: konk.doc > > > When I open a document: > #> file konk.doc > konk.doc: CDF V2 Document, Little Endian, Os: Windows, Version 1.0, Code page: -535, Author: Master, Template: Normal.dotm, Last Saved By: 1, Revision Number: 2, Total Editing Time: 23d+22:11:08, Last Printed: Tue Mar 15 08:28:00 2011, Create Time/Date: Tue Feb 16 06:06:00 2010, Last Saved Time/Date: Tue Feb 16 06:06:00 2010 > then I get an error: > Apache Tika was unable to parse the document > at /home/eddie/proj/konk.doc. > The full exception stack trace is included below: > org.apache.tika.exception.TikaException: Unexpected RuntimeException from org.apache.tika.parser.microsoft.OfficeParser@1db5d2b2 > at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:244) > at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:129) > at org.apache.tika.gui.TikaGUI.handleStream(TikaGUI.java:320) > at org.apache.tika.gui.TikaGUI.openFile(TikaGUI.java:279) > at org.apache.tika.gui.TikaGUI.actionPerformed(TikaGUI.java:238) > at javax.swing.AbstractButton.fireActionPerformed(AbstractButton.java:2012) > at javax.swing.AbstractButton$Handler.actionPerformed(AbstractButton.java:2335) > at javax.swing.DefaultButtonModel.fireActionPerformed(DefaultButtonModel.java:404) > at javax.swing.DefaultButtonModel.setPressed(DefaultButtonModel.java:259) > at javax.swing.AbstractButton.doClick(AbstractButton.java:374) > at javax.swing.plaf.basic.BasicMenuItemUI.doClick(BasicMenuItemUI.java:829) > at javax.swing.plaf.basic.BasicMenuItemUI$Handler.mouseReleased(BasicMenuItemUI.java:873) > at java.awt.Component.processMouseEvent(Component.java:6268) > at javax.swing.JComponent.processMouseEvent(JComponent.java:3267) > at java.awt.Component.processEvent(Component.java:6033) > at java.awt.Container.processEvent(Container.java:2045) > at java.awt.Component.dispatchEventImpl(Component.java:4629) > at java.awt.Container.dispatchEventImpl(Container.java:2103) > at java.awt.Component.dispatchEvent(Component.java:4455) > at java.awt.LightweightDispatcher.retargetMouseEvent(Container.java:4633) > at java.awt.LightweightDispatcher.processMouseEvent(Container.java:4297) > at java.awt.LightweightDispatcher.dispatchEvent(Container.java:4227) > at java.awt.Container.dispatchEventImpl(Container.java:2089) > at java.awt.Window.dispatchEventImpl(Window.java:2517) > at java.awt.Component.dispatchEvent(Component.java:4455) > at java.awt.EventQueue.dispatchEventImpl(EventQueue.java:649) > at java.awt.EventQueue.access$000(EventQueue.java:96) > at java.awt.EventQueue$1.run(EventQueue.java:608) > at java.awt.EventQueue$1.run(EventQueue.java:606) > at java.security.AccessController.doPrivileged(Native Method) > at java.security.AccessControlContext$1.doIntersectionPrivilege(AccessControlContext.java:105) > at java.security.AccessControlContext$1.doIntersectionPrivilege(AccessControlContext.java:116) > at java.awt.EventQueue$2.run(EventQueue.java:622) > at java.awt.EventQueue$2.run(EventQueue.java:620) > at java.security.AccessController.doPrivileged(Native Method) > at java.security.AccessControlContext$1.doIntersectionPrivilege(AccessControlContext.java:105) > at java.awt.EventQueue.dispatchEvent(EventQueue.java:619) > at java.awt.EventDispatchThread.pumpOneEventForFilters(EventDispatchThread.java:275) > at java.awt.EventDispatchThread.pumpEventsForFilter(EventDispatchThread.java:200) > at java.awt.EventDispatchThread.pumpEventsForHierarchy(EventDispatchThread.java:190) > at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:185) > at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:177) > at java.awt.EventDispatchThread.run(EventDispatchThread.java:138) > Caused by: java.lang.ArrayIndexOutOfBoundsException: 263 > at org.apache.poi.util.LittleEndian.getShort(LittleEndian.java:45) > at org.apache.poi.hwpf.sprm.SprmOperation.getOperand(SprmOperation.java:98) > at org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor.unCompressPAPOperation(ParagraphSprmUncompressor.java:174) > at org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor.uncompressPAP(ParagraphSprmUncompressor.java:63) > at org.apache.poi.hwpf.model.PAPX.getParagraphProperties(PAPX.java:136) > at org.apache.poi.hwpf.usermodel.Range.getParagraph(Range.java:833) > at org.apache.tika.parser.microsoft.WordExtractor.parse(WordExtractor.java:81) > at org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:198) > at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > ... 43 more -- This message is automatically generated by JIRA. For more information on JIRA, see: http://www.atlassian.com/software/jira