001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.ByteArrayInputStream;
022import java.io.Closeable;
023import java.io.EOFException;
024import java.io.File;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.SequenceInputStream;
028import java.nio.Buffer;
029import java.nio.ByteBuffer;
030import java.nio.channels.FileChannel;
031import java.nio.channels.SeekableByteChannel;
032import java.nio.file.Files;
033import java.nio.file.StandardOpenOption;
034import java.util.Arrays;
035import java.util.Collections;
036import java.util.Comparator;
037import java.util.Enumeration;
038import java.util.EnumSet;
039import java.util.HashMap;
040import java.util.LinkedList;
041import java.util.List;
042import java.util.Map;
043import java.util.zip.Inflater;
044import java.util.zip.ZipException;
045
046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
048import org.apache.commons.compress.utils.CountingInputStream;
049import org.apache.commons.compress.utils.IOUtils;
050import org.apache.commons.compress.utils.InputStreamStatistics;
051
052import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
053import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
054import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
055import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
056import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
057
058/**
059 * Replacement for <code>java.util.ZipFile</code>.
060 *
061 * <p>This class adds support for file name encodings other than UTF-8
062 * (which is required to work on ZIP files created by native zip tools
063 * and is able to skip a preamble like the one found in self
064 * extracting archives.  Furthermore it returns instances of
065 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
066 * instead of <code>java.util.zip.ZipEntry</code>.</p>
067 *
068 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
069 * have to reimplement all methods anyway.  Like
070 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
071 * covers and supports compressed and uncompressed entries.  As of
072 * Apache Commons Compress 1.3 it also transparently supports Zip64
073 * extensions and thus individual entries and archives larger than 4
074 * GB or with more than 65536 entries.</p>
075 *
076 * <p>The method signatures mimic the ones of
077 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
078 *
079 * <ul>
080 *   <li>There is no getName method.</li>
081 *   <li>entries has been renamed to getEntries.</li>
082 *   <li>getEntries and getEntry return
083 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
084 *   instances.</li>
085 *   <li>close is allowed to throw IOException.</li>
086 * </ul>
087 *
088 */
089public class ZipFile implements Closeable {
090    private static final int HASH_SIZE = 509;
091    static final int NIBLET_MASK = 0x0f;
092    static final int BYTE_SHIFT = 8;
093    private static final int POS_0 = 0;
094    private static final int POS_1 = 1;
095    private static final int POS_2 = 2;
096    private static final int POS_3 = 3;
097    private static final byte[] ONE_ZERO_BYTE = new byte[1];
098
099    /**
100     * List of entries in the order they appear inside the central
101     * directory.
102     */
103    private final List<ZipArchiveEntry> entries =
104        new LinkedList<>();
105
106    /**
107     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
108     */
109    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
110        new HashMap<>(HASH_SIZE);
111
112    /**
113     * The encoding to use for filenames and the file comment.
114     *
115     * <p>For a list of possible values see <a
116     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
117     * Defaults to UTF-8.</p>
118     */
119    private final String encoding;
120
121    /**
122     * The zip encoding to use for filenames and the file comment.
123     */
124    private final ZipEncoding zipEncoding;
125
126    /**
127     * File name of actual source.
128     */
129    private final String archiveName;
130
131    /**
132     * The actual data source.
133     */
134    private final SeekableByteChannel archive;
135
136    /**
137     * Whether to look for and use Unicode extra fields.
138     */
139    private final boolean useUnicodeExtraFields;
140
141    /**
142     * Whether the file is closed.
143     */
144    private volatile boolean closed = true;
145
146    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
147    private final byte[] dwordBuf = new byte[DWORD];
148    private final byte[] wordBuf = new byte[WORD];
149    private final byte[] cfhBuf = new byte[CFH_LEN];
150    private final byte[] shortBuf = new byte[SHORT];
151    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
152    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
153    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
154
155    /**
156     * Opens the given file for reading, assuming "UTF8" for file names.
157     *
158     * @param f the archive.
159     *
160     * @throws IOException if an error occurs while reading the file.
161     */
162    public ZipFile(final File f) throws IOException {
163        this(f, ZipEncodingHelper.UTF8);
164    }
165
166    /**
167     * Opens the given file for reading, assuming "UTF8".
168     *
169     * @param name name of the archive.
170     *
171     * @throws IOException if an error occurs while reading the file.
172     */
173    public ZipFile(final String name) throws IOException {
174        this(new File(name), ZipEncodingHelper.UTF8);
175    }
176
177    /**
178     * Opens the given file for reading, assuming the specified
179     * encoding for file names, scanning unicode extra fields.
180     *
181     * @param name name of the archive.
182     * @param encoding the encoding to use for file names, use null
183     * for the platform's default encoding
184     *
185     * @throws IOException if an error occurs while reading the file.
186     */
187    public ZipFile(final String name, final String encoding) throws IOException {
188        this(new File(name), encoding, true);
189    }
190
191    /**
192     * Opens the given file for reading, assuming the specified
193     * encoding for file names and scanning for unicode extra fields.
194     *
195     * @param f the archive.
196     * @param encoding the encoding to use for file names, use null
197     * for the platform's default encoding
198     *
199     * @throws IOException if an error occurs while reading the file.
200     */
201    public ZipFile(final File f, final String encoding) throws IOException {
202        this(f, encoding, true);
203    }
204
205    /**
206     * Opens the given file for reading, assuming the specified
207     * encoding for file names.
208     *
209     * @param f the archive.
210     * @param encoding the encoding to use for file names, use null
211     * for the platform's default encoding
212     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
213     * Extra Fields (if present) to set the file names.
214     *
215     * @throws IOException if an error occurs while reading the file.
216     */
217    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
218        throws IOException {
219        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
220             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
221    }
222
223    /**
224     * Opens the given channel for reading, assuming "UTF8" for file names.
225     *
226     * <p>{@link
227     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
228     * allows you to read from an in-memory archive.</p>
229     *
230     * @param channel the archive.
231     *
232     * @throws IOException if an error occurs while reading the file.
233     * @since 1.13
234     */
235    public ZipFile(final SeekableByteChannel channel)
236            throws IOException {
237        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
238    }
239
240    /**
241     * Opens the given channel for reading, assuming the specified
242     * encoding for file names.
243     *
244     * <p>{@link
245     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
246     * allows you to read from an in-memory archive.</p>
247     *
248     * @param channel the archive.
249     * @param encoding the encoding to use for file names, use null
250     * for the platform's default encoding
251     *
252     * @throws IOException if an error occurs while reading the file.
253     * @since 1.13
254     */
255    public ZipFile(final SeekableByteChannel channel, final String encoding)
256        throws IOException {
257        this(channel, "unknown archive", encoding, true);
258    }
259
260    /**
261     * Opens the given channel for reading, assuming the specified
262     * encoding for file names.
263     *
264     * <p>{@link
265     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
266     * allows you to read from an in-memory archive.</p>
267     *
268     * @param channel the archive.
269     * @param archiveName name of the archive, used for error messages only.
270     * @param encoding the encoding to use for file names, use null
271     * for the platform's default encoding
272     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
273     * Extra Fields (if present) to set the file names.
274     *
275     * @throws IOException if an error occurs while reading the file.
276     * @since 1.13
277     */
278    public ZipFile(final SeekableByteChannel channel, final String archiveName,
279                   final String encoding, final boolean useUnicodeExtraFields)
280        throws IOException {
281        this(channel, archiveName, encoding, useUnicodeExtraFields, false);
282    }
283
284    private ZipFile(final SeekableByteChannel channel, final String archiveName,
285                    final String encoding, final boolean useUnicodeExtraFields,
286                    final boolean closeOnError)
287        throws IOException {
288        this.archiveName = archiveName;
289        this.encoding = encoding;
290        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
291        this.useUnicodeExtraFields = useUnicodeExtraFields;
292        archive = channel;
293        boolean success = false;
294        try {
295            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
296                populateFromCentralDirectory();
297            resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
298            success = true;
299        } finally {
300            closed = !success;
301            if (!success && closeOnError) {
302                IOUtils.closeQuietly(archive);
303            }
304        }
305    }
306
307    /**
308     * The encoding to use for filenames and the file comment.
309     *
310     * @return null if using the platform's default character encoding.
311     */
312    public String getEncoding() {
313        return encoding;
314    }
315
316    /**
317     * Closes the archive.
318     * @throws IOException if an error occurs closing the archive.
319     */
320    @Override
321    public void close() throws IOException {
322        // this flag is only written here and read in finalize() which
323        // can never be run in parallel.
324        // no synchronization needed.
325        closed = true;
326
327        archive.close();
328    }
329
330    /**
331     * close a zipfile quietly; throw no io fault, do nothing
332     * on a null parameter
333     * @param zipfile file to close, can be null
334     */
335    public static void closeQuietly(final ZipFile zipfile) {
336        IOUtils.closeQuietly(zipfile);
337    }
338
339    /**
340     * Returns all entries.
341     *
342     * <p>Entries will be returned in the same order they appear
343     * within the archive's central directory.</p>
344     *
345     * @return all entries as {@link ZipArchiveEntry} instances
346     */
347    public Enumeration<ZipArchiveEntry> getEntries() {
348        return Collections.enumeration(entries);
349    }
350
351    /**
352     * Returns all entries in physical order.
353     *
354     * <p>Entries will be returned in the same order their contents
355     * appear within the archive.</p>
356     *
357     * @return all entries as {@link ZipArchiveEntry} instances
358     *
359     * @since 1.1
360     */
361    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
362        final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
363        Arrays.sort(allEntries, offsetComparator);
364        return Collections.enumeration(Arrays.asList(allEntries));
365    }
366
367    /**
368     * Returns a named entry - or {@code null} if no entry by
369     * that name exists.
370     *
371     * <p>If multiple entries with the same name exist the first entry
372     * in the archive's central directory by that name is
373     * returned.</p>
374     *
375     * @param name name of the entry.
376     * @return the ZipArchiveEntry corresponding to the given name - or
377     * {@code null} if not present.
378     */
379    public ZipArchiveEntry getEntry(final String name) {
380        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
381        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
382    }
383
384    /**
385     * Returns all named entries in the same order they appear within
386     * the archive's central directory.
387     *
388     * @param name name of the entry.
389     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
390     * given name
391     * @since 1.6
392     */
393    public Iterable<ZipArchiveEntry> getEntries(final String name) {
394        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
395        return entriesOfThatName != null ? entriesOfThatName
396            : Collections.<ZipArchiveEntry>emptyList();
397    }
398
399    /**
400     * Returns all named entries in the same order their contents
401     * appear within the archive.
402     *
403     * @param name name of the entry.
404     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
405     * given name
406     * @since 1.6
407     */
408    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
409        ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
410        if (nameMap.containsKey(name)) {
411            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
412            Arrays.sort(entriesOfThatName, offsetComparator);
413        }
414        return Arrays.asList(entriesOfThatName);
415    }
416
417    /**
418     * Whether this class is able to read the given entry.
419     *
420     * <p>May return false if it is set up to use encryption or a
421     * compression method that hasn't been implemented yet.</p>
422     * @since 1.1
423     * @param ze the entry
424     * @return whether this class is able to read the given entry.
425     */
426    public boolean canReadEntryData(final ZipArchiveEntry ze) {
427        return ZipUtil.canHandleEntryData(ze);
428    }
429
430    /**
431     * Expose the raw stream of the archive entry (compressed form).
432     *
433     * <p>This method does not relate to how/if we understand the payload in the
434     * stream, since we really only intend to move it on to somewhere else.</p>
435     *
436     * @param ze The entry to get the stream for
437     * @return The raw input stream containing (possibly) compressed data.
438     * @since 1.11
439     */
440    public InputStream getRawInputStream(final ZipArchiveEntry ze) {
441        if (!(ze instanceof Entry)) {
442            return null;
443        }
444        final long start = ze.getDataOffset();
445        return createBoundedInputStream(start, ze.getCompressedSize());
446    }
447
448
449    /**
450     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
451     * Compression and all other attributes will be as in this file.
452     * <p>This method transfers entries based on the central directory of the zip file.</p>
453     *
454     * @param target The zipArchiveOutputStream to write the entries to
455     * @param predicate A predicate that selects which entries to write
456     * @throws IOException on error
457     */
458    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
459            throws IOException {
460        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
461        while (src.hasMoreElements()) {
462            final ZipArchiveEntry entry = src.nextElement();
463            if (predicate.test( entry)) {
464                target.addRawArchiveEntry(entry, getRawInputStream(entry));
465            }
466        }
467    }
468
469    /**
470     * Returns an InputStream for reading the contents of the given entry.
471     *
472     * @param ze the entry to get the stream for.
473     * @return a stream to read the entry from. The returned stream
474     * implements {@link InputStreamStatistics}.
475     * @throws IOException if unable to create an input stream from the zipentry
476     */
477    public InputStream getInputStream(final ZipArchiveEntry ze)
478        throws IOException {
479        if (!(ze instanceof Entry)) {
480            return null;
481        }
482        // cast validity is checked just above
483        ZipUtil.checkRequestedFeatures(ze);
484        final long start = ze.getDataOffset();
485
486        // doesn't get closed if the method is not supported - which
487        // should never happen because of the checkRequestedFeatures
488        // call above
489        final InputStream is =
490            new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
491        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
492            case STORED:
493                return new StoredStatisticsStream(is);
494            case UNSHRINKING:
495                return new UnshrinkingInputStream(is);
496            case IMPLODING:
497                return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
498                        ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
499            case DEFLATED:
500                final Inflater inflater = new Inflater(true);
501                // Inflater with nowrap=true has this odd contract for a zero padding
502                // byte following the data stream; this used to be zlib's requirement
503                // and has been fixed a long time ago, but the contract persists so
504                // we comply.
505                // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
506                return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
507                    inflater) {
508                    @Override
509                    public void close() throws IOException {
510                        try {
511                            super.close();
512                        } finally {
513                            inflater.end();
514                        }
515                    }
516                };
517            case BZIP2:
518                return new BZip2CompressorInputStream(is);
519            case ENHANCED_DEFLATED:
520                return new Deflate64CompressorInputStream(is);
521            case AES_ENCRYPTED:
522            case EXPANDING_LEVEL_1:
523            case EXPANDING_LEVEL_2:
524            case EXPANDING_LEVEL_3:
525            case EXPANDING_LEVEL_4:
526            case JPEG:
527            case LZMA:
528            case PKWARE_IMPLODING:
529            case PPMD:
530            case TOKENIZATION:
531            case UNKNOWN:
532            case WAVPACK:
533            case XZ:
534            default:
535                throw new ZipException("Found unsupported compression method "
536                                       + ze.getMethod());
537        }
538    }
539
540    /**
541     * <p>
542     * Convenience method to return the entry's content as a String if isUnixSymlink()
543     * returns true for it, otherwise returns null.
544     * </p>
545     *
546     * <p>This method assumes the symbolic link's file name uses the
547     * same encoding that as been specified for this ZipFile.</p>
548     *
549     * @param entry ZipArchiveEntry object that represents the symbolic link
550     * @return entry's content as a String
551     * @throws IOException problem with content's input stream
552     * @since 1.5
553     */
554    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
555        if (entry != null && entry.isUnixSymlink()) {
556            try (InputStream in = getInputStream(entry)) {
557                return zipEncoding.decode(IOUtils.toByteArray(in));
558            }
559        }
560        return null;
561    }
562
563    /**
564     * Ensures that the close method of this zipfile is called when
565     * there are no more references to it.
566     * @see #close()
567     */
568    @Override
569    protected void finalize() throws Throwable {
570        try {
571            if (!closed) {
572                System.err.println("Cleaning up unclosed ZipFile for archive "
573                                   + archiveName);
574                close();
575            }
576        } finally {
577            super.finalize();
578        }
579    }
580
581    /**
582     * Length of a "central directory" entry structure without file
583     * name, extra fields or comment.
584     */
585    private static final int CFH_LEN =
586        /* version made by                 */ SHORT
587        /* version needed to extract       */ + SHORT
588        /* general purpose bit flag        */ + SHORT
589        /* compression method              */ + SHORT
590        /* last mod file time              */ + SHORT
591        /* last mod file date              */ + SHORT
592        /* crc-32                          */ + WORD
593        /* compressed size                 */ + WORD
594        /* uncompressed size               */ + WORD
595        /* filename length                 */ + SHORT
596        /* extra field length              */ + SHORT
597        /* file comment length             */ + SHORT
598        /* disk number start               */ + SHORT
599        /* internal file attributes        */ + SHORT
600        /* external file attributes        */ + WORD
601        /* relative offset of local header */ + WORD;
602
603    private static final long CFH_SIG =
604        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
605
606    /**
607     * Reads the central directory of the given archive and populates
608     * the internal tables with ZipArchiveEntry instances.
609     *
610     * <p>The ZipArchiveEntrys will know all data that can be obtained from
611     * the central directory alone, but not the data that requires the
612     * local file header or additional data to be read.</p>
613     *
614     * @return a map of zipentries that didn't have the language
615     * encoding flag set when read.
616     */
617    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
618        throws IOException {
619        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
620            new HashMap<>();
621
622        positionAtCentralDirectory();
623
624        ((Buffer)wordBbuf).rewind();
625        IOUtils.readFully(archive, wordBbuf);
626        long sig = ZipLong.getValue(wordBuf);
627
628        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
629            throw new IOException("central directory is empty, can't expand"
630                                  + " corrupt archive.");
631        }
632
633        while (sig == CFH_SIG) {
634            readCentralDirectoryEntry(noUTF8Flag);
635            ((Buffer)wordBbuf).rewind();
636            IOUtils.readFully(archive, wordBbuf);
637            sig = ZipLong.getValue(wordBuf);
638        }
639        return noUTF8Flag;
640    }
641
642    /**
643     * Reads an individual entry of the central directory, creats an
644     * ZipArchiveEntry from it and adds it to the global maps.
645     *
646     * @param noUTF8Flag map used to collect entries that don't have
647     * their UTF-8 flag set and whose name will be set by data read
648     * from the local file header later.  The current entry may be
649     * added to this map.
650     */
651    private void
652        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
653        throws IOException {
654        ((Buffer)cfhBbuf).rewind();
655        IOUtils.readFully(archive, cfhBbuf);
656        int off = 0;
657        final Entry ze = new Entry();
658
659        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
660        off += SHORT;
661        ze.setVersionMadeBy(versionMadeBy);
662        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
663
664        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
665        off += SHORT; // version required
666
667        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
668        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
669        final ZipEncoding entryEncoding =
670            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
671        if (hasUTF8Flag) {
672            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
673        }
674        ze.setGeneralPurposeBit(gpFlag);
675        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
676
677        off += SHORT;
678
679        //noinspection MagicConstant
680        ze.setMethod(ZipShort.getValue(cfhBuf, off));
681        off += SHORT;
682
683        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
684        ze.setTime(time);
685        off += WORD;
686
687        ze.setCrc(ZipLong.getValue(cfhBuf, off));
688        off += WORD;
689
690        ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
691        off += WORD;
692
693        ze.setSize(ZipLong.getValue(cfhBuf, off));
694        off += WORD;
695
696        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
697        off += SHORT;
698
699        final int extraLen = ZipShort.getValue(cfhBuf, off);
700        off += SHORT;
701
702        final int commentLen = ZipShort.getValue(cfhBuf, off);
703        off += SHORT;
704
705        final int diskStart = ZipShort.getValue(cfhBuf, off);
706        off += SHORT;
707
708        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
709        off += SHORT;
710
711        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
712        off += WORD;
713
714        final byte[] fileName = new byte[fileNameLen];
715        IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
716        ze.setName(entryEncoding.decode(fileName), fileName);
717
718        // LFH offset,
719        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
720        // data offset will be filled later
721        entries.add(ze);
722
723        final byte[] cdExtraData = new byte[extraLen];
724        IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
725        ze.setCentralDirectoryExtra(cdExtraData);
726
727        setSizesAndOffsetFromZip64Extra(ze, diskStart);
728
729        final byte[] comment = new byte[commentLen];
730        IOUtils.readFully(archive, ByteBuffer.wrap(comment));
731        ze.setComment(entryEncoding.decode(comment));
732
733        if (!hasUTF8Flag && useUnicodeExtraFields) {
734            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
735        }
736    }
737
738    /**
739     * If the entry holds a Zip64 extended information extra field,
740     * read sizes from there if the entry's sizes are set to
741     * 0xFFFFFFFFF, do the same for the offset of the local file
742     * header.
743     *
744     * <p>Ensures the Zip64 extra either knows both compressed and
745     * uncompressed size or neither of both as the internal logic in
746     * ExtraFieldUtils forces the field to create local header data
747     * even if they are never used - and here a field with only one
748     * size would be invalid.</p>
749     */
750    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
751                                                 final int diskStart)
752        throws IOException {
753        final Zip64ExtendedInformationExtraField z64 =
754            (Zip64ExtendedInformationExtraField)
755            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
756        if (z64 != null) {
757            final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
758            final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
759            final boolean hasRelativeHeaderOffset =
760                ze.getLocalHeaderOffset() == ZIP64_MAGIC;
761            z64.reparseCentralDirectoryData(hasUncompressedSize,
762                                            hasCompressedSize,
763                                            hasRelativeHeaderOffset,
764                                            diskStart == ZIP64_MAGIC_SHORT);
765
766            if (hasUncompressedSize) {
767                ze.setSize(z64.getSize().getLongValue());
768            } else if (hasCompressedSize) {
769                z64.setSize(new ZipEightByteInteger(ze.getSize()));
770            }
771
772            if (hasCompressedSize) {
773                ze.setCompressedSize(z64.getCompressedSize().getLongValue());
774            } else if (hasUncompressedSize) {
775                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
776            }
777
778            if (hasRelativeHeaderOffset) {
779                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
780            }
781        }
782    }
783
784    /**
785     * Length of the "End of central directory record" - which is
786     * supposed to be the last structure of the archive - without file
787     * comment.
788     */
789    static final int MIN_EOCD_SIZE =
790        /* end of central dir signature    */ WORD
791        /* number of this disk             */ + SHORT
792        /* number of the disk with the     */
793        /* start of the central directory  */ + SHORT
794        /* total number of entries in      */
795        /* the central dir on this disk    */ + SHORT
796        /* total number of entries in      */
797        /* the central dir                 */ + SHORT
798        /* size of the central directory   */ + WORD
799        /* offset of start of central      */
800        /* directory with respect to       */
801        /* the starting disk number        */ + WORD
802        /* zipfile comment length          */ + SHORT;
803
804    /**
805     * Maximum length of the "End of central directory record" with a
806     * file comment.
807     */
808    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
809        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
810
811    /**
812     * Offset of the field that holds the location of the first
813     * central directory entry inside the "End of central directory
814     * record" relative to the start of the "End of central directory
815     * record".
816     */
817    private static final int CFD_LOCATOR_OFFSET =
818        /* end of central dir signature    */ WORD
819        /* number of this disk             */ + SHORT
820        /* number of the disk with the     */
821        /* start of the central directory  */ + SHORT
822        /* total number of entries in      */
823        /* the central dir on this disk    */ + SHORT
824        /* total number of entries in      */
825        /* the central dir                 */ + SHORT
826        /* size of the central directory   */ + WORD;
827
828    /**
829     * Length of the "Zip64 end of central directory locator" - which
830     * should be right in front of the "end of central directory
831     * record" if one is present at all.
832     */
833    private static final int ZIP64_EOCDL_LENGTH =
834        /* zip64 end of central dir locator sig */ WORD
835        /* number of the disk with the start    */
836        /* start of the zip64 end of            */
837        /* central directory                    */ + WORD
838        /* relative offset of the zip64         */
839        /* end of central directory record      */ + DWORD
840        /* total number of disks                */ + WORD;
841
842    /**
843     * Offset of the field that holds the location of the "Zip64 end
844     * of central directory record" inside the "Zip64 end of central
845     * directory locator" relative to the start of the "Zip64 end of
846     * central directory locator".
847     */
848    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
849        /* zip64 end of central dir locator sig */ WORD
850        /* number of the disk with the start    */
851        /* start of the zip64 end of            */
852        /* central directory                    */ + WORD;
853
854    /**
855     * Offset of the field that holds the location of the first
856     * central directory entry inside the "Zip64 end of central
857     * directory record" relative to the start of the "Zip64 end of
858     * central directory record".
859     */
860    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
861        /* zip64 end of central dir        */
862        /* signature                       */ WORD
863        /* size of zip64 end of central    */
864        /* directory record                */ + DWORD
865        /* version made by                 */ + SHORT
866        /* version needed to extract       */ + SHORT
867        /* number of this disk             */ + WORD
868        /* number of the disk with the     */
869        /* start of the central directory  */ + WORD
870        /* total number of entries in the  */
871        /* central directory on this disk  */ + DWORD
872        /* total number of entries in the  */
873        /* central directory               */ + DWORD
874        /* size of the central directory   */ + DWORD;
875
876    /**
877     * Searches for either the &quot;Zip64 end of central directory
878     * locator&quot; or the &quot;End of central dir record&quot;, parses
879     * it and positions the stream at the first central directory
880     * record.
881     */
882    private void positionAtCentralDirectory()
883        throws IOException {
884        positionAtEndOfCentralDirectoryRecord();
885        boolean found = false;
886        final boolean searchedForZip64EOCD =
887            archive.position() > ZIP64_EOCDL_LENGTH;
888        if (searchedForZip64EOCD) {
889            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
890            ((Buffer)wordBbuf).rewind();
891            IOUtils.readFully(archive, wordBbuf);
892            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
893                                  wordBuf);
894        }
895        if (!found) {
896            // not a ZIP64 archive
897            if (searchedForZip64EOCD) {
898                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
899            }
900            positionAtCentralDirectory32();
901        } else {
902            positionAtCentralDirectory64();
903        }
904    }
905
906    /**
907     * Parses the &quot;Zip64 end of central directory locator&quot;,
908     * finds the &quot;Zip64 end of central directory record&quot; using the
909     * parsed information, parses that and positions the stream at the
910     * first central directory record.
911     *
912     * Expects stream to be positioned right behind the &quot;Zip64
913     * end of central directory locator&quot;'s signature.
914     */
915    private void positionAtCentralDirectory64()
916        throws IOException {
917        skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
918                  - WORD /* signature has already been read */);
919        ((Buffer)dwordBbuf).rewind();
920        IOUtils.readFully(archive, dwordBbuf);
921        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
922        ((Buffer)wordBbuf).rewind();
923        IOUtils.readFully(archive, wordBbuf);
924        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
925            throw new ZipException("archive's ZIP64 end of central "
926                                   + "directory locator is corrupt.");
927        }
928        skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
929                  - WORD /* signature has already been read */);
930        ((Buffer)dwordBbuf).rewind();
931        IOUtils.readFully(archive, dwordBbuf);
932        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
933    }
934
935    /**
936     * Parses the &quot;End of central dir record&quot; and positions
937     * the stream at the first central directory record.
938     *
939     * Expects stream to be positioned at the beginning of the
940     * &quot;End of central dir record&quot;.
941     */
942    private void positionAtCentralDirectory32()
943        throws IOException {
944        skipBytes(CFD_LOCATOR_OFFSET);
945        ((Buffer)wordBbuf).rewind();
946        IOUtils.readFully(archive, wordBbuf);
947        archive.position(ZipLong.getValue(wordBuf));
948    }
949
950    /**
951     * Searches for the and positions the stream at the start of the
952     * &quot;End of central dir record&quot;.
953     */
954    private void positionAtEndOfCentralDirectoryRecord()
955        throws IOException {
956        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
957                                             ZipArchiveOutputStream.EOCD_SIG);
958        if (!found) {
959            throw new ZipException("archive is not a ZIP archive");
960        }
961    }
962
963    /**
964     * Searches the archive backwards from minDistance to maxDistance
965     * for the given signature, positions the RandomaccessFile right
966     * at the signature if it has been found.
967     */
968    private boolean tryToLocateSignature(final long minDistanceFromEnd,
969                                         final long maxDistanceFromEnd,
970                                         final byte[] sig) throws IOException {
971        boolean found = false;
972        long off = archive.size() - minDistanceFromEnd;
973        final long stopSearching =
974            Math.max(0L, archive.size() - maxDistanceFromEnd);
975        if (off >= 0) {
976            for (; off >= stopSearching; off--) {
977                archive.position(off);
978                try {
979                    ((Buffer)wordBbuf).rewind();
980                    IOUtils.readFully(archive, wordBbuf);
981                    ((Buffer)wordBbuf).flip();
982                } catch (EOFException ex) {
983                    break;
984                }
985                int curr = wordBbuf.get();
986                if (curr == sig[POS_0]) {
987                    curr = wordBbuf.get();
988                    if (curr == sig[POS_1]) {
989                        curr = wordBbuf.get();
990                        if (curr == sig[POS_2]) {
991                            curr = wordBbuf.get();
992                            if (curr == sig[POS_3]) {
993                                found = true;
994                                break;
995                            }
996                        }
997                    }
998                }
999            }
1000        }
1001        if (found) {
1002            archive.position(off);
1003        }
1004        return found;
1005    }
1006
1007    /**
1008     * Skips the given number of bytes or throws an EOFException if
1009     * skipping failed.
1010     */
1011    private void skipBytes(final int count) throws IOException {
1012        long currentPosition = archive.position();
1013        long newPosition = currentPosition + count;
1014        if (newPosition > archive.size()) {
1015            throw new EOFException();
1016        }
1017        archive.position(newPosition);
1018    }
1019
1020    /**
1021     * Number of bytes in local file header up to the &quot;length of
1022     * filename&quot; entry.
1023     */
1024    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1025        /* local file header signature     */ WORD
1026        /* version needed to extract       */ + SHORT
1027        /* general purpose bit flag        */ + SHORT
1028        /* compression method              */ + SHORT
1029        /* last mod file time              */ + SHORT
1030        /* last mod file date              */ + SHORT
1031        /* crc-32                          */ + WORD
1032        /* compressed size                 */ + WORD
1033        /* uncompressed size               */ + (long) WORD;
1034
1035    /**
1036     * Walks through all recorded entries and adds the data available
1037     * from the local file header.
1038     *
1039     * <p>Also records the offsets for the data to read from the
1040     * entries.</p>
1041     */
1042    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1043                                            entriesWithoutUTF8Flag)
1044        throws IOException {
1045        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1046            // entries is filled in populateFromCentralDirectory and
1047            // never modified
1048            final Entry ze = (Entry) zipArchiveEntry;
1049            final long offset = ze.getLocalHeaderOffset();
1050            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1051            ((Buffer)wordBbuf).rewind();
1052            IOUtils.readFully(archive, wordBbuf);
1053            ((Buffer)wordBbuf).flip();
1054            wordBbuf.get(shortBuf);
1055            final int fileNameLen = ZipShort.getValue(shortBuf);
1056            wordBbuf.get(shortBuf);
1057            final int extraFieldLen = ZipShort.getValue(shortBuf);
1058            skipBytes(fileNameLen);
1059            final byte[] localExtraData = new byte[extraFieldLen];
1060            IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1061            ze.setExtra(localExtraData);
1062            ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1063                + SHORT + SHORT + fileNameLen + extraFieldLen);
1064            ze.setStreamContiguous(true);
1065
1066            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1067                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1068                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1069                                                         nc.comment);
1070            }
1071
1072            final String name = ze.getName();
1073            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1074            if (entriesOfThatName == null) {
1075                entriesOfThatName = new LinkedList<>();
1076                nameMap.put(name, entriesOfThatName);
1077            }
1078            entriesOfThatName.addLast(ze);
1079        }
1080    }
1081
1082    /**
1083     * Checks whether the archive starts with a LFH.  If it doesn't,
1084     * it may be an empty archive.
1085     */
1086    private boolean startsWithLocalFileHeader() throws IOException {
1087        archive.position(0);
1088        ((Buffer)wordBbuf).rewind();
1089        IOUtils.readFully(archive, wordBbuf);
1090        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1091    }
1092
1093    /**
1094     * Creates new BoundedInputStream, according to implementation of
1095     * underlying archive channel.
1096     */
1097    private BoundedInputStream createBoundedInputStream(long start, long remaining) {
1098        return archive instanceof FileChannel ?
1099            new BoundedFileChannelInputStream(start, remaining) :
1100            new BoundedInputStream(start, remaining);
1101    }
1102
1103    /**
1104     * InputStream that delegates requests to the underlying
1105     * SeekableByteChannel, making sure that only bytes from a certain
1106     * range can be read.
1107     */
1108    private class BoundedInputStream extends InputStream {
1109        private ByteBuffer singleByteBuffer;
1110        private final long end;
1111        private long loc;
1112
1113        BoundedInputStream(final long start, final long remaining) {
1114            this.end = start+remaining;
1115            if (this.end < start) {
1116                // check for potential vulnerability due to overflow
1117                throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining);
1118            }
1119            loc = start;
1120        }
1121
1122        @Override
1123        public synchronized int read() throws IOException {
1124            if (loc >= end) {
1125                return -1;
1126            }
1127            if (singleByteBuffer == null) {
1128                singleByteBuffer = ByteBuffer.allocate(1);
1129            }
1130            else {
1131                ((Buffer)singleByteBuffer).rewind();
1132            }
1133            int read = read(loc, singleByteBuffer);
1134            if (read < 0) {
1135                return read;
1136            }
1137            loc++;
1138            return singleByteBuffer.get() & 0xff;
1139        }
1140
1141        @Override
1142        public synchronized int read(final byte[] b, final int off, int len) throws IOException {
1143            if (len <= 0) {
1144                return 0;
1145            }
1146
1147            if (len > end-loc) {
1148                if (loc >= end) {
1149                    return -1;
1150                }
1151                len = (int)(end-loc);
1152            }
1153
1154            ByteBuffer buf;
1155            buf = ByteBuffer.wrap(b, off, len);
1156            int ret = read(loc, buf);
1157            if (ret > 0) {
1158                loc += ret;
1159                return ret;
1160            }
1161            return ret;
1162        }
1163
1164        protected int read(long pos, ByteBuffer buf) throws IOException {
1165            int read;
1166            synchronized (archive) {
1167                archive.position(pos);
1168                read = archive.read(buf);
1169            }
1170            ((Buffer)buf).flip();
1171            return read;
1172        }
1173    }
1174
1175    /**
1176     * Lock-free implementation of BoundedInputStream. The
1177     * implementation uses positioned reads on the underlying archive
1178     * file channel and therefore performs significantly faster in
1179     * concurrent environment.
1180     */
1181    private class BoundedFileChannelInputStream extends BoundedInputStream {
1182        private final FileChannel archive;
1183
1184        BoundedFileChannelInputStream(final long start, final long remaining) {
1185            super(start, remaining);
1186            archive = (FileChannel)ZipFile.this.archive;
1187        }
1188
1189        @Override
1190        protected int read(long pos, ByteBuffer buf) throws IOException {
1191            int read = archive.read(buf, pos);
1192            ((Buffer)buf).flip();
1193            return read;
1194        }
1195    }
1196
1197    private static final class NameAndComment {
1198        private final byte[] name;
1199        private final byte[] comment;
1200        private NameAndComment(final byte[] name, final byte[] comment) {
1201            this.name = name;
1202            this.comment = comment;
1203        }
1204    }
1205
1206    /**
1207     * Compares two ZipArchiveEntries based on their offset within the archive.
1208     *
1209     * <p>Won't return any meaningful results if one of the entries
1210     * isn't part of the archive at all.</p>
1211     *
1212     * @since 1.1
1213     */
1214    private final Comparator<ZipArchiveEntry> offsetComparator =
1215        new Comparator<ZipArchiveEntry>() {
1216        @Override
1217        public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1218            if (e1 == e2) {
1219                return 0;
1220            }
1221
1222            final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1223            final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1224            if (ent1 == null) {
1225                return 1;
1226            }
1227            if (ent2 == null) {
1228                return -1;
1229            }
1230            final long val = (ent1.getLocalHeaderOffset()
1231                        - ent2.getLocalHeaderOffset());
1232            return val == 0 ? 0 : val < 0 ? -1 : +1;
1233        }
1234    };
1235
1236    /**
1237     * Extends ZipArchiveEntry to store the offset within the archive.
1238     */
1239    private static class Entry extends ZipArchiveEntry {
1240
1241        Entry() {
1242        }
1243
1244        @Override
1245        public int hashCode() {
1246            return 3 * super.hashCode()
1247                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1248        }
1249
1250        @Override
1251        public boolean equals(final Object other) {
1252            if (super.equals(other)) {
1253                // super.equals would return false if other were not an Entry
1254                final Entry otherEntry = (Entry) other;
1255                return getLocalHeaderOffset()
1256                        == otherEntry.getLocalHeaderOffset()
1257                    && getDataOffset()
1258                        == otherEntry.getDataOffset();
1259            }
1260            return false;
1261        }
1262    }
1263
1264    private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
1265        StoredStatisticsStream(InputStream in) {
1266            super(in);
1267        }
1268
1269        @Override
1270        public long getCompressedCount() {
1271            return super.getBytesRead();
1272        }
1273
1274        @Override
1275        public long getUncompressedCount() {
1276            return getCompressedCount();
1277        }
1278    }
1279}