tika-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Michael McCandless <luc...@mikemccandless.com>
Subject Re: svn commit: r1163970 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/extractor/ tika-core/src/main/java/org/apache/tika/io/ tika-core/src/main/java/org/apache/tika/parser/ tika-core/src/main/java/org/apache/tika/parser/external/ tika-pa
Date Thu, 01 Sep 2011 10:23:09 GMT
Can we just remove (not deprecate) TemporaryFiles...?  (We are not at
1.0 release yet).

Mike McCandless

http://blog.mikemccandless.com

On Thu, Sep 1, 2011 at 5:38 AM,  <jukka@apache.org> wrote:
> Author: jukka
> Date: Thu Sep  1 09:38:04 2011
> New Revision: 1163970
>
> URL: http://svn.apache.org/viewvc?rev=1163970&view=rev
> Log:
> TIKA-701: Fix problems with TemporaryFiles
>
> Add a more generic TemporaryResources class that can handle any kinds of Closeable resources.
>
> Use the new TemporaryResources class in TikaInputStream to better track all the resources being used.
>
> Update all client classes to use the TemporaryResources class instead of TemporaryFiles.
>
> Fix some problemns in how TikaInputStreams were being used.
>
> Added:
>    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
> Modified:
>    tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java
>    tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
>    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java
>    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
>    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
>    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
>    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java
>    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
>    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
>    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
>    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
>    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
>    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java Thu Sep  1 09:38:04 2011
> @@ -25,7 +25,7 @@ import org.apache.tika.config.TikaConfig
>  import org.apache.tika.detect.DefaultDetector;
>  import org.apache.tika.detect.Detector;
>  import org.apache.tika.exception.TikaException;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -106,7 +106,7 @@ public class ParserContainerExtractor im
>                 InputStream stream, ContentHandler ignored,
>                 Metadata metadata, ParseContext context)
>                 throws IOException, SAXException, TikaException {
> -            TemporaryFiles tmp = new TemporaryFiles();
> +            TemporaryResources tmp = new TemporaryResources();
>             try {
>                 TikaInputStream tis = TikaInputStream.get(stream, tmp);
>
> @@ -121,8 +121,13 @@ public class ParserContainerExtractor im
>                     // Use a temporary file to process the stream twice
>                     File file = tis.getFile();
>
> -                    // Let the handler process the embedded resource
> -                    handler.handle(filename, type, TikaInputStream.get(file));
> +                    // Let the handler process the embedded resource
> +                    InputStream input = TikaInputStream.get(file);
> +                    try {
> +                        handler.handle(filename, type, input);
> +                    } finally {
> +                        input.close();
> +                    }
>
>                     // Recurse
>                     extractor.extract(tis, extractor, handler);
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java Thu Sep  1 09:38:04 2011
> @@ -25,7 +25,7 @@ import java.io.InputStream;
>
>  import org.apache.tika.exception.TikaException;
>  import org.apache.tika.io.CloseShieldInputStream;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.parser.DelegatingParser;
> @@ -90,7 +90,7 @@ public class ParsingEmbeddedDocumentExtr
>         }
>
>         // Use the delegate parser to parse this entry
> -        TemporaryFiles tmp = new TemporaryFiles();
> +        TemporaryResources tmp = new TemporaryResources();
>         try {
>             DELEGATING_PARSER.parse(
>                     TikaInputStream.get(new CloseShieldInputStream(stream), tmp),
> @@ -99,7 +99,7 @@ public class ParsingEmbeddedDocumentExtr
>         } catch (TikaException e) {
>             // Could not parse the entry, just skip the content
>         } finally {
> -            tmp.dispose();
> +            tmp.close();
>         }
>
>         if(outputHtml) {
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java Thu Sep  1 09:38:04 2011
> @@ -16,24 +16,19 @@
>  */
>  package org.apache.tika.io;
>
> -import java.io.File;
>  import java.io.IOException;
> -import java.util.ArrayList;
> -import java.util.List;
>
> -public class TemporaryFiles {
> -
> -    private final List<File> files = new ArrayList<File>();
> -
> -    public File createTemporaryFile() throws IOException {
> -        File file = File.createTempFile("apache-tika-", ".tmp");
> -        files.add(file);
> -        return file;
> -    }
> +/**
> + * @deprecated Use the {@link TemporaryResources} class instead
> + */
> +public class TemporaryFiles extends TemporaryResources {
>
> +    @Override
>     public void dispose() {
> -        for (File file : files) {
> -            file.delete();
> +        try {
> +            close();
> +        } catch (IOException e) {
> +            throw new RuntimeException(e);
>         }
>     }
>
>
> Added: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java?rev=1163970&view=auto
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java (added)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java Thu Sep  1 09:38:04 2011
> @@ -0,0 +1,156 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.tika.io;
> +
> +import java.io.Closeable;
> +import java.io.File;
> +import java.io.IOException;
> +import java.util.LinkedList;
> +import java.util.List;
> +
> +import org.apache.tika.exception.TikaException;
> +
> +/**
> + * Utility class for tracking and ultimately closing or otherwise disposing
> + * a collection of temporary resources.
> + * <p>
> + * Note that this class is not thread-safe.
> + *
> + * @since Apache Tika 1.0
> + */
> +public class TemporaryResources implements Closeable {
> +
> +    /**
> +     * Tracked resources in LIFO order.
> +     */
> +    private final LinkedList<Closeable> resources = new LinkedList<Closeable>();
> +
> +    /**
> +     * Directory for temporary files, <code>null</code> for the system default.
> +     */
> +    private File tmp = null;
> +
> +    /**
> +     * Sets the directory to be used for the temporary files created by
> +     * the {@link #createTemporaryFile()} method.
> +     *
> +     * @param tmp temporary file directory,
> +     *            or <code>null</code> for the system default
> +     */
> +    public void setTemporaryFileDirectory(File tmp) {
> +        this.tmp = tmp;
> +    }
> +
> +    /**
> +     * Creates and returns a temporary file that will automatically be
> +     * deleted when the {@link #close()} method is called.
> +     *
> +     * @return
> +     * @throws IOException
> +     */
> +    public File createTemporaryFile() throws IOException {
> +        final File file = File.createTempFile("apache-tika-", ".tmp", tmp);
> +        addResource(new Closeable() {
> +            public void close() throws IOException {
> +                if (!file.delete()) {
> +                    throw new IOException(
> +                            "Could not delete temporary file "
> +                            + file.getPath());
> +                }
> +            }
> +        });
> +        return file;
> +    }
> +
> +    /**
> +     * Adds a new resource to the set of tracked resources that will all be
> +     * closed when the {@link #close()} method is called.
> +     *
> +     * @param resource resource to be tracked
> +     */
> +    public void addResource(Closeable resource) {
> +        resources.addFirst(resource);
> +    }
> +
> +    /**
> +     * Returns the latest of the tracked resources that implements or
> +     * extends the given interface or class.
> +     *
> +     * @param klass interface or class
> +     * @return matching resource, or <code>null</code> if not found
> +     */
> +    @SuppressWarnings("unchecked")
> +    public <T extends Closeable> T getResource(Class<T> klass) {
> +        for (Closeable resource : resources) {
> +            if (klass.isAssignableFrom(resource.getClass())) {
> +                return (T) resource;
> +            }
> +        }
> +        return null;
> +    }
> +
> +    /**
> +     * Closes all tracked resources. The resources are closed in reverse order
> +     * from how they were added.
> +     * <p>
> +     * Any thrown exceptions from managed resources are collected and
> +     * then re-thrown only once all the resources have been closed.
> +     *
> +     * @throws IOException if one or more of the tracked resources
> +     *                     could not be closed
> +     */
> +    public void close() throws IOException {
> +        // Release all resources and keep track of any exceptions
> +        List<IOException> exceptions = new LinkedList<IOException>();
> +        for (Closeable resource : resources) {
> +            try {
> +                resource.close();
> +            } catch (IOException e) {
> +                exceptions.add(e);
> +            }
> +        }
> +        resources.clear();
> +
> +        // Throw any exceptions that were captured from above
> +        if (!exceptions.isEmpty()) {
> +            if (exceptions.size() == 1) {
> +                throw exceptions.get(0);
> +            } else {
> +                throw new IOExceptionWithCause(
> +                        "Multiple IOExceptions" + exceptions,
> +                        exceptions.get(0));
> +            }
> +        }
> +    }
> +
> +    /**
> +     * Calls the {@link #close()} method and wraps the potential
> +     * {@link IOException} into a {@link TikaException} for convenience
> +     * when used within Tika.
> +     *
> +     * @throws TikaException if one or more of the tracked resources
> +     *                       could not be closed
> +     */
> +    public void dispose() throws TikaException {
> +        try {
> +            close();
> +        } catch (IOException e) {
> +            throw new TikaException("Failed to close temporary resources", e);
> +        }
> +    }
> +
> +}
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Thu Sep  1 09:38:04 2011
> @@ -30,6 +30,7 @@ import java.net.URI;
>  import java.net.URISyntaxException;
>  import java.net.URL;
>  import java.net.URLConnection;
> +import java.nio.channels.FileChannel;
>  import java.sql.Blob;
>  import java.sql.SQLException;
>
> @@ -84,34 +85,54 @@ public class TikaInputStream extends Tag
>      * when you <em>don't</em> explicitly close the returned stream. The
>      * recommended access pattern is:
>      * <pre>
> -     * TemporaryFiles tmp = new TemporaryFiles();
> +     * TemporaryResources tmp = new TemporaryResources();
>      * try {
>      *     TikaInputStream stream = TikaInputStream.get(..., tmp);
>      *     // process stream but don't close it
>      * } finally {
> -     *     tmp.dispose();
> +     *     tmp.close();
>      * }
>      * </pre>
> +     * <p>
> +     * The given stream instance will <em>not</em> be closed when the
> +     * {@link TemporaryResources#close()} method is called. The caller
> +     * is expected to explicitly close the original stream when it's no
> +     * longer used.
>      *
>      * @param stream normal input stream
>      * @return a TikaInputStream instance
>      */
> -    public static TikaInputStream get(InputStream stream, TemporaryFiles tmp) {
> +    public static TikaInputStream get(
> +            InputStream stream, TemporaryResources tmp) {
>         if (stream instanceof TikaInputStream) {
>             return (TikaInputStream) stream;
>         } else {
> +            // Make sure that the stream is buffered and that it
> +            // (properly) supports the mark feature
> +            if (!(stream instanceof BufferedInputStream)
> +                    && !(stream instanceof ByteArrayInputStream)) {
> +                stream = new BufferedInputStream(stream);
> +            }
>             return new TikaInputStream(stream, tmp, -1);
>         }
>     }
>
>     /**
> +     * @deprecated Use the {@link #get(InputStream, TemporaryResources)} instead
> +     */
> +    public static TikaInputStream get(InputStream stream, TemporaryFiles tmp) {
> +        return get(stream, (TemporaryResources) tmp);
> +    }
> +
> +    /**
>      * Casts or wraps the given stream to a TikaInputStream instance.
>      * This method can be used to access the functionality of this class
>      * even when given just a normal input stream instance.
>      * <p>
> -     * Use this method instead of the {@link #get(InputStream, TemporaryFiles)}
> -     * alternative when you <em>do</em> explicitly close the returned stream.
> -     * The recommended access pattern is:
> +     * Use this method instead of the
> +     * {@link #get(InputStream, TemporaryResources)} alternative when you
> +     * <em>do</em> explicitly close the returned stream. The recommended
> +     * access pattern is:
>      * <pre>
>      * TikaInputStream stream = TikaInputStream.get(...);
>      * try {
> @@ -120,12 +141,16 @@ public class TikaInputStream extends Tag
>      *     stream.close();
>      * }
>      * </pre>
> +     * <p>
> +     * The given stream instance will be closed along with any other resources
> +     * associated with the returned TikaInputStream instance when the
> +     * {@link #close()} method is called.
>      *
>      * @param stream normal input stream
>      * @return a TikaInputStream instance
>      */
>     public static TikaInputStream get(InputStream stream) {
> -        return get(stream, new TemporaryFiles());
> +        return get(stream, new TemporaryResources());
>     }
>
>     /**
> @@ -156,7 +181,8 @@ public class TikaInputStream extends Tag
>     public static TikaInputStream get(byte[] data, Metadata metadata) {
>         metadata.set(Metadata.CONTENT_LENGTH, Integer.toString(data.length));
>         return new TikaInputStream(
> -                new ByteArrayInputStream(data), new TemporaryFiles(), data.length);
> +                new ByteArrayInputStream(data),
> +                new TemporaryResources(), data.length);
>     }
>
>     /**
> @@ -247,7 +273,7 @@ public class TikaInputStream extends Tag
>         } else {
>             return new TikaInputStream(
>                     new BufferedInputStream(blob.getBinaryStream()),
> -                    null, length);
> +                    new TemporaryResources(), length);
>         }
>     }
>
> @@ -355,25 +381,7 @@ public class TikaInputStream extends Tag
>
>         return new TikaInputStream(
>                 new BufferedInputStream(connection.getInputStream()),
> -                new TemporaryFiles(), length);
> -    }
> -
> -    /**
> -     * Makes sure that a stream is buffered and correctly supports the
> -     * mark feature by wrapping the given stream to a
> -     * {@link BufferedInputStream} if needed.
> -     *
> -     * @param stream original stream
> -     * @return buffered stream that supports the mark feature
> -     */
> -    private static InputStream withBufferingAndMarkSupport(InputStream stream) {
> -        if (stream instanceof ByteArrayInputStream) {
> -            return stream;
> -        } else if (stream instanceof BufferedInputStream) {
> -            return stream;
> -        } else {
> -            return new BufferedInputStream(stream);
> -        }
> +                new TemporaryResources(), length);
>     }
>
>     /**
> @@ -386,9 +394,9 @@ public class TikaInputStream extends Tag
>     private File file;
>
>     /**
> -     * Temporary file provider.
> +     * Tracker of temporary resources.
>      */
> -    private final TemporaryFiles tmp;
> +    private final TemporaryResources tmp;
>
>     /**
>      * Total length of the stream, or -1 if unknown.
> @@ -422,20 +430,25 @@ public class TikaInputStream extends Tag
>     private TikaInputStream(File file) throws FileNotFoundException {
>         super(new BufferedInputStream(new FileInputStream(file)));
>         this.file = file;
> -        this.tmp = new TemporaryFiles();
> +        this.tmp = new TemporaryResources();
>         this.length = file.length();
>     }
>
>     /**
>      * Creates a TikaInputStream instance. This private constructor is used
>      * by the static factory methods based on the available information.
> +     * <p>
> +     * The given stream needs to be included in the given temporary resource
> +     * collection if the caller wants it also to get closed when the
> +     * {@link #close()} method is invoked.
>      *
>      * @param stream <em>buffered</em> stream (must support the mark feature)
> +     * @param tmp tracker for temporary resources associated with this stream
>      * @param length total length of the stream, or -1 if unknown
>      */
>     private TikaInputStream(
> -            InputStream stream, TemporaryFiles tmp, long length) {
> -        super(withBufferingAndMarkSupport(stream));
> +            InputStream stream, TemporaryResources tmp, long length) {
> +        super(stream);
>         this.file = null;
>         this.tmp = tmp;
>         this.length = length;
> @@ -489,6 +502,9 @@ public class TikaInputStream extends Tag
>      */
>     public void setOpenContainer(Object container) {
>         openContainer = container;
> +        if (container instanceof Closeable) {
> +            tmp.addResource((Closeable) container);
> +        }
>     }
>
>     public boolean hasFile() {
> @@ -497,11 +513,10 @@ public class TikaInputStream extends Tag
>
>     public File getFile() throws IOException {
>         if (file == null) {
> -            if (in == null) {
> -                throw new IOException("Stream has already been read");
> -            } else if (position > 0) {
> +            if (position > 0) {
>                 throw new IOException("Stream is already being read");
>             } else {
> +                // Spool the entire stream into a temporary file
>                 file = tmp.createTemporaryFile();
>                 OutputStream out = new FileOutputStream(file);
>                 try {
> @@ -509,15 +524,37 @@ public class TikaInputStream extends Tag
>                 } finally {
>                     out.close();
>                 }
> -                in.close();
> -                // Re-point the stream at the file now we have it
> -                in = new BufferedInputStream(new FileInputStream(file));
> +
> +                // Create a new input stream and make sure it'll get closed
> +                FileInputStream newStream = new FileInputStream(file);
> +                tmp.addResource(newStream);
> +
> +                // Replace the spooled stream with the new stream in a way
> +                // that still ends up closing the old stream if or when the
> +                // close() method is called. The closing of the new stream
> +                // is already being handled as noted above.
> +                final InputStream oldStream = in;
> +                in = new BufferedInputStream(newStream) {
> +                    @Override
> +                    public void close() throws IOException {
> +                        oldStream.close();
> +                    }
> +                };
> +
>                 length = file.length();
>             }
>         }
>         return file;
>     }
>
> +    public FileChannel getFileChannel() throws IOException {
> +        FileInputStream fis = new FileInputStream(getFile());
> +        tmp.addResource(fis);
> +        FileChannel channel = fis.getChannel();
> +        tmp.addResource(channel);
> +        return channel;
> +    }
> +
>     public boolean hasLength() {
>         return length != -1;
>     }
> @@ -549,46 +586,10 @@ public class TikaInputStream extends Tag
>     }
>
>     @Override
> -    public int available() throws IOException {
> -        if (in == null) {
> -            return 0;
> -        } else {
> -            return super.available();
> -        }
> -    }
> -
> -    @Override
>     public long skip(long ln) throws IOException {
> -        if (in == null) {
> -            return 0;
> -        } else {
> -            long n = super.skip(ln);
> -            position += n;
> -            return n;
> -        }
> -    }
> -
> -    @Override
> -    public int read() throws IOException {
> -        if (in == null) {
> -            return -1;
> -        } else {
> -            return super.read();
> -        }
> -    }
> -
> -    @Override
> -    public int read(byte[] bts, int off, int len) throws IOException {
> -        if (in == null) {
> -            return -1;
> -        } else {
> -            return super.read(bts, off, len);
> -        }
> -    }
> -
> -    @Override
> -    public int read(byte[] bts) throws IOException {
> -        return read(bts, 0, bts.length);
> +        long n = super.skip(ln);
> +        position += n;
> +        return n;
>     }
>
>     @Override
> @@ -611,33 +612,22 @@ public class TikaInputStream extends Tag
>
>     @Override
>     public void close() throws IOException {
> -        if (in != null) {
> -            in.close();
> -            in = null;
> -        }
> -        if (openContainer != null) {
> -           if (openContainer instanceof Closeable) {
> -              ((Closeable)openContainer).close();
> -           }
> -           openContainer = null;
> -        }
>         file = null;
> -        tmp.dispose();
> -    }
> +        mark = -1;
>
> -    @Override
> -    protected void beforeRead(int n) throws IOException {
> -        if (in == null) {
> -            throw new IOException("End of the stream reached");
> -        }
> +        // The close method was explicitly called, so we indeed
> +        // are expected to close the input stream. Handle that
> +        // by adding that stream as a resource to be tracked before
> +        // closing all of them. This way also possible exceptions from
> +        // the close() calls get managed properly.
> +        tmp.addResource(in);
> +        tmp.close();
>     }
>
>     @Override
> -    protected void afterRead(int n) throws IOException {
> +    protected void afterRead(int n) {
>         if (n != -1) {
>             position += n;
> -        } else if (mark == -1) {
> -            close();
>         }
>     }
>
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Thu Sep  1 09:38:04 2011
> @@ -23,7 +23,7 @@ import org.apache.tika.config.TikaConfig
>  import org.apache.tika.detect.DefaultDetector;
>  import org.apache.tika.detect.Detector;
>  import org.apache.tika.exception.TikaException;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -114,7 +114,7 @@ public class AutoDetectParser extends Co
>             InputStream stream, ContentHandler handler,
>             Metadata metadata, ParseContext context)
>             throws IOException, SAXException, TikaException {
> -        TemporaryFiles tmp = new TemporaryFiles();
> +        TemporaryResources tmp = new TemporaryResources();
>         try {
>             TikaInputStream tis = TikaInputStream.get(stream, tmp);
>
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java Thu Sep  1 09:38:04 2011
> @@ -27,7 +27,7 @@ import java.util.Map;
>  import java.util.Set;
>
>  import org.apache.tika.exception.TikaException;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -234,7 +234,7 @@ public class CompositeParser extends Abs
>             Metadata metadata, ParseContext context)
>             throws IOException, SAXException, TikaException {
>         Parser parser = getParser(metadata);
> -        TemporaryFiles tmp = new TemporaryFiles();
> +        TemporaryResources tmp = new TemporaryResources();
>         try {
>             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
>             TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java Thu Sep  1 09:38:04 2011
> @@ -30,7 +30,7 @@ import java.util.Set;
>  import org.apache.tika.exception.TikaException;
>  import org.apache.tika.io.CloseShieldInputStream;
>  import org.apache.tika.io.IOUtils;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -64,7 +64,7 @@ public class NetworkParser extends Abstr
>             InputStream stream, ContentHandler handler,
>             Metadata metadata, ParseContext context)
>             throws IOException, SAXException, TikaException {
> -        TemporaryFiles tmp = new TemporaryFiles();
> +        TemporaryResources tmp = new TemporaryResources();
>         try {
>             TikaInputStream tis = TikaInputStream.get(stream, tmp);
>             parse(tis, handler, metadata, context);
>
> Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java (original)
> +++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java Thu Sep  1 09:38:04 2011
> @@ -34,7 +34,7 @@ import java.util.regex.Pattern;
>  import org.apache.tika.exception.TikaException;
>  import org.apache.tika.io.IOUtils;
>  import org.apache.tika.io.NullOutputStream;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -80,9 +80,8 @@ public class ExternalParser extends Abst
>      * @see Runtime#exec(String[])
>      */
>     private String[] command = new String[] { "cat" };
> -
> -    private TemporaryFiles tmp = new TemporaryFiles();
> -
> +
> +    private TemporaryResources tmp = new TemporaryResources();
>
>     public Set<MediaType> getSupportedTypes(ParseContext context) {
>         return getSupportedTypes();
>
> Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java (original)
> +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java Thu Sep  1 09:38:04 2011
> @@ -16,14 +16,13 @@
>  */
>  package org.apache.tika.parser.jpeg;
>
> -import java.io.FilterInputStream;
>  import java.io.IOException;
>  import java.io.InputStream;
>  import java.util.Collections;
>  import java.util.Set;
>
>  import org.apache.tika.exception.TikaException;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -48,7 +47,7 @@ public class JpegParser extends Abstract
>             InputStream stream, ContentHandler handler,
>             Metadata metadata, ParseContext context)
>             throws IOException, SAXException, TikaException {
> -        TemporaryFiles tmp = new TemporaryFiles();
> +        TemporaryResources tmp = new TemporaryResources();
>         try {
>             TikaInputStream tis = TikaInputStream.get(stream, tmp);
>             new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile());
>
> Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
> +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Thu Sep  1 09:38:04 2011
> @@ -165,13 +165,13 @@ public class OfficeParser extends Abstra
>         NPOIFSFileSystem filesystem;
>         if(stream instanceof TikaInputStream) {
>             TikaInputStream tstream = (TikaInputStream)stream;
> -                  if(tstream.getOpenContainer() != null) {
> -                     filesystem = (NPOIFSFileSystem)tstream.getOpenContainer();
> -                  } else if(tstream.hasFile()) {
> -                     filesystem = new NPOIFSFileSystem(tstream.getFile());
> -                  } else {
> -                   filesystem = new NPOIFSFileSystem(tstream);
> -                  }
> +            if(tstream.getOpenContainer() != null) {
> +                filesystem = (NPOIFSFileSystem)tstream.getOpenContainer();
> +            } else if(tstream.hasFile()) {
> +                filesystem = new NPOIFSFileSystem(tstream.getFileChannel());
> +            } else {
> +                filesystem = new NPOIFSFileSystem(tstream);
> +            }
>         } else {
>             filesystem = new NPOIFSFileSystem(stream);
>         }
>
> Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java (original)
> +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java Thu Sep  1 09:38:04 2011
> @@ -21,6 +21,7 @@ import static org.apache.tika.mime.Media
>  import java.io.File;
>  import java.io.IOException;
>  import java.io.InputStream;
> +import java.nio.channels.FileChannel;
>  import java.util.Collections;
>  import java.util.HashSet;
>  import java.util.Set;
> @@ -28,7 +29,7 @@ import java.util.Set;
>  import org.apache.poi.poifs.filesystem.Entry;
>  import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
>  import org.apache.tika.detect.Detector;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -92,44 +93,41 @@ public class POIFSContainerDetector impl
>
>         // We can only detect the exact type when given a TikaInputStream
>         if (TikaInputStream.isTikaInputStream(input)) {
> -            TemporaryFiles tmp = new TemporaryFiles();
> -            try {
> -                // Look for known top level entry names to detect the document type
> -                Set<String> names =
> -                    getTopLevelNames(TikaInputStream.get(input, tmp));
> -                if (names.contains("Workbook")) {
> -                    return XLS;
> -                } else if (names.contains("EncryptedPackage")) {
> -                    return OLE;
> -                } else if (names.contains("WordDocument")) {
> -                    return DOC;
> -                } else if (names.contains("Quill")) {
> -                    return PUB;
> -                } else if (names.contains("PowerPoint Document")) {
> -                    return PPT;
> -                } else if (names.contains("VisioDocument")) {
> -                    return VSD;
> -                } else if (names.contains("CONTENTS")) {
> -                    return WPS;
> -                } else if (names.contains("\u0001Ole10Native")) {
> -                    return OLE;
> -                } else if (names.contains("PerfectOffice_MAIN")) {
> -                    if (names.contains("SlideShow")) {
> -                        return MediaType.application("x-corelpresentations"); // .shw
> -                    } else if (names.contains("PerfectOffice_OBJECTS")) {
> -                        return MediaType.application("x-quattro-pro"); // .wb?
> -                    }
> -                } else if (names.contains("NativeContent_MAIN")) {
> -                    return MediaType.application("x-quattro-pro"); // .qpw
> -                } else {
> -                    for (String name : names) {
> -                        if (name.startsWith("__substg1.0_")) {
> -                            return MSG;
> -                        }
> +            // No TemporaryResources as this is for sure a TikaInputStream
> +            TikaInputStream tis = TikaInputStream.get(input);
> +
> +            // Look for known top level entry names to detect the document type
> +            Set<String> names = getTopLevelNames(tis);
> +            if (names.contains("Workbook")) {
> +                return XLS;
> +            } else if (names.contains("EncryptedPackage")) {
> +                return OLE;
> +            } else if (names.contains("WordDocument")) {
> +                return DOC;
> +            } else if (names.contains("Quill")) {
> +                return PUB;
> +            } else if (names.contains("PowerPoint Document")) {
> +                return PPT;
> +            } else if (names.contains("VisioDocument")) {
> +                return VSD;
> +            } else if (names.contains("CONTENTS")) {
> +                return WPS;
> +            } else if (names.contains("\u0001Ole10Native")) {
> +                return OLE;
> +            } else if (names.contains("PerfectOffice_MAIN")) {
> +                if (names.contains("SlideShow")) {
> +                    return MediaType.application("x-corelpresentations"); // .shw
> +                } else if (names.contains("PerfectOffice_OBJECTS")) {
> +                    return MediaType.application("x-quattro-pro"); // .wb?
> +                }
> +            } else if (names.contains("NativeContent_MAIN")) {
> +                return MediaType.application("x-quattro-pro"); // .qpw
> +            } else {
> +                for (String name : names) {
> +                    if (name.startsWith("__substg1.0_")) {
> +                        return MSG;
>                     }
>                 }
> -            } finally {
> -                tmp.dispose();
>             }
>         }
>
> @@ -141,10 +139,10 @@ public class POIFSContainerDetector impl
>             throws IOException {
>         // Force the document stream to a (possibly temporary) file
>         // so we don't modify the current position of the stream
> -        File file = stream.getFile();
> +        FileChannel channel = stream.getFileChannel();
>
>         try {
> -            NPOIFSFileSystem fs = new NPOIFSFileSystem(file);
> +            NPOIFSFileSystem fs = new NPOIFSFileSystem(channel);
>
>             // Optimize a possible later parsing process by keeping
>             // a reference to the already opened POI file system
>
> Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
> +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu Sep  1 09:38:04 2011
> @@ -30,7 +30,7 @@ import org.apache.poi.openxml4j.opc.Pack
>  import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
>  import org.apache.tika.detect.Detector;
>  import org.apache.tika.io.IOUtils;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -70,7 +70,7 @@ public class ZipContainerDetector implem
>             return MediaType.APPLICATION_ZIP;
>         }
>
> -        TemporaryFiles tmp = new TemporaryFiles();
> +        TemporaryResources tmp = new TemporaryResources();
>         ZipFile zip = null;
>         try {
>             File file = TikaInputStream.get(input, tmp).getFile();
> @@ -99,7 +99,7 @@ public class ZipContainerDetector implem
>                 } catch (IOException e) {
>                 }
>             }
> -            tmp.dispose();
> +            tmp.close();
>         }
>     }
>
>
> Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
> URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
> ==============================================================================
> --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java (original)
> +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java Thu Sep  1 09:38:04 2011
> @@ -16,9 +16,32 @@
>  */
>  package org.apache.tika.parser.rtf;
>
> +import java.io.BufferedOutputStream;
> +import java.io.ByteArrayOutputStream;
> +import java.io.File;
> +import java.io.FileOutputStream;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.OutputStream;
> +import java.io.UnsupportedEncodingException;
> +import java.util.Collections;
> +import java.util.HashMap;
> +import java.util.LinkedList;
> +import java.util.Map;
> +import java.util.Set;
> +import java.util.regex.Matcher;
> +import java.util.regex.Pattern;
> +
> +import javax.swing.text.AttributeSet;
> +import javax.swing.text.BadLocationException;
> +import javax.swing.text.DefaultStyledDocument;
> +import javax.swing.text.Document;
> +import javax.swing.text.StyleContext;
> +import javax.swing.text.rtf.RTFEditorKit;
> +
>  import org.apache.tika.exception.TikaException;
>  import org.apache.tika.io.TaggedInputStream;
> -import org.apache.tika.io.TemporaryFiles;
> +import org.apache.tika.io.TemporaryResources;
>  import org.apache.tika.io.TikaInputStream;
>  import org.apache.tika.metadata.Metadata;
>  import org.apache.tika.mime.MediaType;
> @@ -28,13 +51,6 @@ import org.apache.tika.sax.XHTMLContentH
>  import org.xml.sax.ContentHandler;
>  import org.xml.sax.SAXException;
>
> -import javax.swing.text.*;
> -import javax.swing.text.rtf.RTFEditorKit;
> -import java.io.*;
> -import java.util.*;
> -import java.util.regex.Matcher;
> -import java.util.regex.Pattern;
> -
>  /**
>  * RTF parser
>  */
> @@ -106,7 +122,7 @@ public class RTFParser extends AbstractP
>             Metadata metadata, ParseContext context)
>             throws IOException, SAXException, TikaException {
>         TaggedInputStream tagged = new TaggedInputStream(stream);
> -        TemporaryFiles tmp = new TemporaryFiles();
> +        TemporaryResources tmp = new TemporaryResources();
>         try {
>             File tempFile = tmp.createTemporaryFile();
>             createUnicodeRtfTempFile(tempFile, stream);
>
>
>

Mime
View raw message