001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.math.BigInteger;
028import java.nio.Buffer;
029import java.nio.ByteBuffer;
030import java.util.Arrays;
031import java.util.zip.CRC32;
032import java.util.zip.DataFormatException;
033import java.util.zip.Inflater;
034import java.util.zip.ZipEntry;
035import java.util.zip.ZipException;
036
037import org.apache.commons.compress.archivers.ArchiveEntry;
038import org.apache.commons.compress.archivers.ArchiveInputStream;
039import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
040import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
041import org.apache.commons.compress.utils.ArchiveUtils;
042import org.apache.commons.compress.utils.IOUtils;
043import org.apache.commons.compress.utils.InputStreamStatistics;
044
045import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
046import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
047import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
048import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
049
050/**
051 * Implements an input stream that can read Zip archives.
052 *
053 * <p>As of Apache Commons Compress it transparently supports Zip64
054 * extensions and thus individual entries and archives larger than 4
055 * GB or with more than 65536 entries.</p>
056 *
057 * <p>The {@link ZipFile} class is preferred when reading from files
058 * as {@link ZipArchiveInputStream} is limited by not being able to
059 * read the central directory header before returning entries.  In
060 * particular {@link ZipArchiveInputStream}</p>
061 *
062 * <ul>
063 *
064 *  <li>may return entries that are not part of the central directory
065 *  at all and shouldn't be considered part of the archive.</li>
066 *
067 *  <li>may return several entries with the same name.</li>
068 *
069 *  <li>will not return internal or external attributes.</li>
070 *
071 *  <li>may return incomplete extra field data.</li>
072 *
073 *  <li>may return unknown sizes and CRC values for entries until the
074 *  next entry has been reached if the archive uses the data
075 *  descriptor feature.</li>
076 *
077 * </ul>
078 *
079 * @see ZipFile
080 * @NotThreadSafe
081 */
082public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
083
084    /** The zip encoding to use for filenames and the file comment. */
085    private final ZipEncoding zipEncoding;
086
087    // the provided encoding (for unit tests)
088    final String encoding;
089
090    /** Whether to look for and use Unicode extra fields. */
091    private final boolean useUnicodeExtraFields;
092
093    /** Wrapped stream, will always be a PushbackInputStream. */
094    private final InputStream in;
095
096    /** Inflater used for all deflated entries. */
097    private final Inflater inf = new Inflater(true);
098
099    /** Buffer used to read from the wrapped stream. */
100    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
101
102    /** The entry that is currently being read. */
103    private CurrentEntry current = null;
104
105    /** Whether the stream has been closed. */
106    private boolean closed = false;
107
108    /** Whether the stream has reached the central directory - and thus found all entries. */
109    private boolean hitCentralDirectory = false;
110
111    /**
112     * When reading a stored entry that uses the data descriptor this
113     * stream has to read the full entry and caches it.  This is the
114     * cache.
115     */
116    private ByteArrayInputStream lastStoredEntry = null;
117
118    /** Whether the stream will try to read STORED entries that use a data descriptor. */
119    private boolean allowStoredEntriesWithDataDescriptor = false;
120
121    /** Count decompressed bytes for current entry */
122    private long uncompressedCount = 0;
123
124    private static final int LFH_LEN = 30;
125    /*
126      local file header signature     WORD
127      version needed to extract       SHORT
128      general purpose bit flag        SHORT
129      compression method              SHORT
130      last mod file time              SHORT
131      last mod file date              SHORT
132      crc-32                          WORD
133      compressed size                 WORD
134      uncompressed size               WORD
135      file name length                SHORT
136      extra field length              SHORT
137    */
138
139    private static final int CFH_LEN = 46;
140    /*
141        central file header signature   WORD
142        version made by                 SHORT
143        version needed to extract       SHORT
144        general purpose bit flag        SHORT
145        compression method              SHORT
146        last mod file time              SHORT
147        last mod file date              SHORT
148        crc-32                          WORD
149        compressed size                 WORD
150        uncompressed size               WORD
151        file name length                SHORT
152        extra field length              SHORT
153        file comment length             SHORT
154        disk number start               SHORT
155        internal file attributes        SHORT
156        external file attributes        WORD
157        relative offset of local header WORD
158    */
159
160    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
161
162    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
163    private final byte[] lfhBuf = new byte[LFH_LEN];
164    private final byte[] skipBuf = new byte[1024];
165    private final byte[] shortBuf = new byte[SHORT];
166    private final byte[] wordBuf = new byte[WORD];
167    private final byte[] twoDwordBuf = new byte[2 * DWORD];
168
169    private int entriesRead = 0;
170
171    /**
172     * Create an instance using UTF-8 encoding
173     * @param inputStream the stream to wrap
174     */
175    public ZipArchiveInputStream(final InputStream inputStream) {
176        this(inputStream, ZipEncodingHelper.UTF8);
177    }
178
179    /**
180     * Create an instance using the specified encoding
181     * @param inputStream the stream to wrap
182     * @param encoding the encoding to use for file names, use null
183     * for the platform's default encoding
184     * @since 1.5
185     */
186    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
187        this(inputStream, encoding, true);
188    }
189
190    /**
191     * Create an instance using the specified encoding
192     * @param inputStream the stream to wrap
193     * @param encoding the encoding to use for file names, use null
194     * for the platform's default encoding
195     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
196     * Extra Fields (if present) to set the file names.
197     */
198    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
199        this(inputStream, encoding, useUnicodeExtraFields, false);
200    }
201
202    /**
203     * Create an instance using the specified encoding
204     * @param inputStream the stream to wrap
205     * @param encoding the encoding to use for file names, use null
206     * for the platform's default encoding
207     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
208     * Extra Fields (if present) to set the file names.
209     * @param allowStoredEntriesWithDataDescriptor whether the stream
210     * will try to read STORED entries that use a data descriptor
211     * @since 1.1
212     */
213    public ZipArchiveInputStream(final InputStream inputStream,
214                                 final String encoding,
215                                 final boolean useUnicodeExtraFields,
216                                 final boolean allowStoredEntriesWithDataDescriptor) {
217        this.encoding = encoding;
218        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
219        this.useUnicodeExtraFields = useUnicodeExtraFields;
220        in = new PushbackInputStream(inputStream, buf.capacity());
221        this.allowStoredEntriesWithDataDescriptor =
222            allowStoredEntriesWithDataDescriptor;
223        // haven't read anything so far
224        ((Buffer)buf).limit(0);
225    }
226
227    public ZipArchiveEntry getNextZipEntry() throws IOException {
228        uncompressedCount = 0;
229
230        boolean firstEntry = true;
231        if (closed || hitCentralDirectory) {
232            return null;
233        }
234        if (current != null) {
235            closeEntry();
236            firstEntry = false;
237        }
238
239        long currentHeaderOffset = getBytesRead();
240        try {
241            if (firstEntry) {
242                // split archives have a special signature before the
243                // first local file header - look for it and fail with
244                // the appropriate error message if this is a split
245                // archive.
246                readFirstLocalFileHeader(lfhBuf);
247            } else {
248                readFully(lfhBuf);
249            }
250        } catch (final EOFException e) {
251            return null;
252        }
253
254        final ZipLong sig = new ZipLong(lfhBuf);
255        if (!sig.equals(ZipLong.LFH_SIG)) {
256            if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) {
257                hitCentralDirectory = true;
258                skipRemainderOfArchive();
259                return null;
260            }
261            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
262        }
263
264        int off = WORD;
265        current = new CurrentEntry();
266
267        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
268        off += SHORT;
269        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
270
271        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
272        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
273        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
274        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
275        current.entry.setGeneralPurposeBit(gpFlag);
276
277        off += SHORT;
278
279        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
280        off += SHORT;
281
282        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
283        current.entry.setTime(time);
284        off += WORD;
285
286        ZipLong size = null, cSize = null;
287        if (!current.hasDataDescriptor) {
288            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
289            off += WORD;
290
291            cSize = new ZipLong(lfhBuf, off);
292            off += WORD;
293
294            size = new ZipLong(lfhBuf, off);
295            off += WORD;
296        } else {
297            off += 3 * WORD;
298        }
299
300        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
301
302        off += SHORT;
303
304        final int extraLen = ZipShort.getValue(lfhBuf, off);
305        off += SHORT; // NOSONAR - assignment as documentation
306
307        final byte[] fileName = new byte[fileNameLen];
308        readFully(fileName);
309        current.entry.setName(entryEncoding.decode(fileName), fileName);
310        if (hasUTF8Flag) {
311            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
312        }
313
314        final byte[] extraData = new byte[extraLen];
315        readFully(extraData);
316        current.entry.setExtra(extraData);
317
318        if (!hasUTF8Flag && useUnicodeExtraFields) {
319            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
320        }
321
322        processZip64Extra(size, cSize);
323
324        current.entry.setLocalHeaderOffset(currentHeaderOffset);
325        current.entry.setDataOffset(getBytesRead());
326        current.entry.setStreamContiguous(true);
327
328        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
329        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
330            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
331                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
332                switch (m) {
333                case UNSHRINKING:
334                    current.in = new UnshrinkingInputStream(bis);
335                    break;
336                case IMPLODING:
337                    current.in = new ExplodingInputStream(
338                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
339                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
340                        bis);
341                    break;
342                case BZIP2:
343                    current.in = new BZip2CompressorInputStream(bis);
344                    break;
345                case ENHANCED_DEFLATED:
346                    current.in = new Deflate64CompressorInputStream(bis);
347                    break;
348                default:
349                    // we should never get here as all supported methods have been covered
350                    // will cause an error when read is invoked, don't throw an exception here so people can
351                    // skip unsupported entries
352                    break;
353                }
354            }
355        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
356            current.in = new Deflate64CompressorInputStream(in);
357        }
358
359        entriesRead++;
360        return current.entry;
361    }
362
363    /**
364     * Fills the given array with the first local file header and
365     * deals with splitting/spanning markers that may prefix the first
366     * LFH.
367     */
368    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
369        readFully(lfh);
370        final ZipLong sig = new ZipLong(lfh);
371        if (sig.equals(ZipLong.DD_SIG)) {
372            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
373        }
374
375        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
376            // The archive is not really split as only one segment was
377            // needed in the end.  Just skip over the marker.
378            final byte[] missedLfhBytes = new byte[4];
379            readFully(missedLfhBytes);
380            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
381            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
382        }
383    }
384
385    /**
386     * Records whether a Zip64 extra is present and sets the size
387     * information from it if sizes are 0xFFFFFFFF and the entry
388     * doesn't use a data descriptor.
389     */
390    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
391        final Zip64ExtendedInformationExtraField z64 =
392            (Zip64ExtendedInformationExtraField)
393            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
394        current.usesZip64 = z64 != null;
395        if (!current.hasDataDescriptor) {
396            if (z64 != null // same as current.usesZip64 but avoids NPE warning
397                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
398                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
399                current.entry.setSize(z64.getSize().getLongValue());
400            } else {
401                current.entry.setCompressedSize(cSize.getValue());
402                current.entry.setSize(size.getValue());
403            }
404        }
405    }
406
407    @Override
408    public ArchiveEntry getNextEntry() throws IOException {
409        return getNextZipEntry();
410    }
411
412    /**
413     * Whether this class is able to read the given entry.
414     *
415     * <p>May return false if it is set up to use encryption or a
416     * compression method that hasn't been implemented yet.</p>
417     * @since 1.1
418     */
419    @Override
420    public boolean canReadEntryData(final ArchiveEntry ae) {
421        if (ae instanceof ZipArchiveEntry) {
422            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
423            return ZipUtil.canHandleEntryData(ze)
424                && supportsDataDescriptorFor(ze)
425                && supportsCompressedSizeFor(ze);
426        }
427        return false;
428    }
429
430    @Override
431    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
432        if (closed) {
433            throw new IOException("The stream is closed");
434        }
435
436        if (current == null) {
437            return -1;
438        }
439
440        // avoid int overflow, check null buffer
441        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
442            throw new ArrayIndexOutOfBoundsException();
443        }
444
445        ZipUtil.checkRequestedFeatures(current.entry);
446        if (!supportsDataDescriptorFor(current.entry)) {
447            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
448                    current.entry);
449        }
450        if (!supportsCompressedSizeFor(current.entry)) {
451            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
452                    current.entry);
453        }
454
455        int read;
456        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
457            read = readStored(buffer, offset, length);
458        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
459            read = readDeflated(buffer, offset, length);
460        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
461                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
462                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
463                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
464            read = current.in.read(buffer, offset, length);
465        } else {
466            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
467                    current.entry);
468        }
469
470        if (read >= 0) {
471            current.crc.update(buffer, offset, read);
472            uncompressedCount += read;
473        }
474
475        return read;
476    }
477
478    /**
479     * @since 1.17
480     */
481    @Override
482    public long getCompressedCount() {
483        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
484            return current.bytesRead;
485        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
486            return getBytesInflated();
487        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
488            return ((UnshrinkingInputStream) current.in).getCompressedCount();
489        } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
490            return ((ExplodingInputStream) current.in).getCompressedCount();
491        } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
492            return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
493        } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
494            return ((BZip2CompressorInputStream) current.in).getCompressedCount();
495        } else {
496            return -1;
497        }
498    }
499
500    /**
501     * @since 1.17
502     */
503    @Override
504    public long getUncompressedCount() {
505        return uncompressedCount;
506    }
507
508    /**
509     * Implementation of read for STORED entries.
510     */
511    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
512
513        if (current.hasDataDescriptor) {
514            if (lastStoredEntry == null) {
515                readStoredEntry();
516            }
517            return lastStoredEntry.read(buffer, offset, length);
518        }
519
520        final long csize = current.entry.getSize();
521        if (current.bytesRead >= csize) {
522            return -1;
523        }
524
525        if (buf.position() >= buf.limit()) {
526            ((Buffer)buf).position(0);
527            final int l = in.read(buf.array());
528            if (l == -1) {
529                ((Buffer)buf).limit(0);
530                throw new IOException("Truncated ZIP file");
531            }
532            ((Buffer)buf).limit(l);
533
534            count(l);
535            current.bytesReadFromStream += l;
536        }
537
538        int toRead = Math.min(buf.remaining(), length);
539        if ((csize - current.bytesRead) < toRead) {
540            // if it is smaller than toRead then it fits into an int
541            toRead = (int) (csize - current.bytesRead);
542        }
543        buf.get(buffer, offset, toRead);
544        current.bytesRead += toRead;
545        return toRead;
546    }
547
548    /**
549     * Implementation of read for DEFLATED entries.
550     */
551    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
552        final int read = readFromInflater(buffer, offset, length);
553        if (read <= 0) {
554            if (inf.finished()) {
555                return -1;
556            } else if (inf.needsDictionary()) {
557                throw new ZipException("This archive needs a preset dictionary"
558                                       + " which is not supported by Commons"
559                                       + " Compress.");
560            } else if (read == -1) {
561                throw new IOException("Truncated ZIP file");
562            }
563        }
564        return read;
565    }
566
567    /**
568     * Potentially reads more bytes to fill the inflater's buffer and
569     * reads from it.
570     */
571    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
572        int read = 0;
573        do {
574            if (inf.needsInput()) {
575                final int l = fill();
576                if (l > 0) {
577                    current.bytesReadFromStream += buf.limit();
578                } else if (l == -1) {
579                    return -1;
580                } else {
581                    break;
582                }
583            }
584            try {
585                read = inf.inflate(buffer, offset, length);
586            } catch (final DataFormatException e) {
587                throw (IOException) new ZipException(e.getMessage()).initCause(e);
588            }
589        } while (read == 0 && inf.needsInput());
590        return read;
591    }
592
593    @Override
594    public void close() throws IOException {
595        if (!closed) {
596            closed = true;
597            try {
598                in.close();
599            } finally {
600                inf.end();
601            }
602        }
603    }
604
605    /**
606     * Skips over and discards value bytes of data from this input
607     * stream.
608     *
609     * <p>This implementation may end up skipping over some smaller
610     * number of bytes, possibly 0, if and only if it reaches the end
611     * of the underlying stream.</p>
612     *
613     * <p>The actual number of bytes skipped is returned.</p>
614     *
615     * @param value the number of bytes to be skipped.
616     * @return the actual number of bytes skipped.
617     * @throws IOException - if an I/O error occurs.
618     * @throws IllegalArgumentException - if value is negative.
619     */
620    @Override
621    public long skip(final long value) throws IOException {
622        if (value >= 0) {
623            long skipped = 0;
624            while (skipped < value) {
625                final long rem = value - skipped;
626                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
627                if (x == -1) {
628                    return skipped;
629                }
630                skipped += x;
631            }
632            return skipped;
633        }
634        throw new IllegalArgumentException();
635    }
636
637    /**
638     * Checks if the signature matches what is expected for a zip file.
639     * Does not currently handle self-extracting zips which may have arbitrary
640     * leading content.
641     *
642     * @param signature the bytes to check
643     * @param length    the number of bytes to check
644     * @return true, if this stream is a zip archive stream, false otherwise
645     */
646    public static boolean matches(final byte[] signature, final int length) {
647        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
648            return false;
649        }
650
651        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
652            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
653            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
654            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
655    }
656
657    private static boolean checksig(final byte[] signature, final byte[] expected) {
658        for (int i = 0; i < expected.length; i++) {
659            if (signature[i] != expected[i]) {
660                return false;
661            }
662        }
663        return true;
664    }
665
666    /**
667     * Closes the current ZIP archive entry and positions the underlying
668     * stream to the beginning of the next entry. All per-entry variables
669     * and data structures are cleared.
670     * <p>
671     * If the compressed size of this entry is included in the entry header,
672     * then any outstanding bytes are simply skipped from the underlying
673     * stream without uncompressing them. This allows an entry to be safely
674     * closed even if the compression method is unsupported.
675     * <p>
676     * In case we don't know the compressed size of this entry or have
677     * already buffered too much data from the underlying stream to support
678     * uncompression, then the uncompression process is completed and the
679     * end position of the stream is adjusted based on the result of that
680     * process.
681     *
682     * @throws IOException if an error occurs
683     */
684    private void closeEntry() throws IOException {
685        if (closed) {
686            throw new IOException("The stream is closed");
687        }
688        if (current == null) {
689            return;
690        }
691
692        // Ensure all entry bytes are read
693        if (currentEntryHasOutstandingBytes()) {
694            drainCurrentEntryData();
695        } else {
696            // this is guaranteed to exhaust the stream
697            skip(Long.MAX_VALUE); //NOSONAR
698
699            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
700                       ? getBytesInflated() : current.bytesRead;
701
702            // this is at most a single read() operation and can't
703            // exceed the range of int
704            final int diff = (int) (current.bytesReadFromStream - inB);
705
706            // Pushback any required bytes
707            if (diff > 0) {
708                pushback(buf.array(), buf.limit() - diff, diff);
709                current.bytesReadFromStream -= diff;
710            }
711
712            // Drain remainder of entry if not all data bytes were required
713            if (currentEntryHasOutstandingBytes()) {
714                drainCurrentEntryData();
715            }
716        }
717
718        if (lastStoredEntry == null && current.hasDataDescriptor) {
719            readDataDescriptor();
720        }
721
722        inf.reset();
723        ((Buffer)buf).clear().flip();
724        current = null;
725        lastStoredEntry = null;
726    }
727
728    /**
729     * If the compressed size of the current entry is included in the entry header
730     * and there are any outstanding bytes in the underlying stream, then
731     * this returns true.
732     *
733     * @return true, if current entry is determined to have outstanding bytes, false otherwise
734     */
735    private boolean currentEntryHasOutstandingBytes() {
736        return current.bytesReadFromStream <= current.entry.getCompressedSize()
737                && !current.hasDataDescriptor;
738    }
739
740    /**
741     * Read all data of the current entry from the underlying stream
742     * that hasn't been read, yet.
743     */
744    private void drainCurrentEntryData() throws IOException {
745        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
746        while (remaining > 0) {
747            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
748            if (n < 0) {
749                throw new EOFException("Truncated ZIP entry: "
750                                       + ArchiveUtils.sanitize(current.entry.getName()));
751            }
752            count(n);
753            remaining -= n;
754        }
755    }
756
757    /**
758     * Get the number of bytes Inflater has actually processed.
759     *
760     * <p>for Java &lt; Java7 the getBytes* methods in
761     * Inflater/Deflater seem to return unsigned ints rather than
762     * longs that start over with 0 at 2^32.</p>
763     *
764     * <p>The stream knows how many bytes it has read, but not how
765     * many the Inflater actually consumed - it should be between the
766     * total number of bytes read for the entry and the total number
767     * minus the last read operation.  Here we just try to make the
768     * value close enough to the bytes we've read by assuming the
769     * number of bytes consumed must be smaller than (or equal to) the
770     * number of bytes read but not smaller by more than 2^32.</p>
771     */
772    private long getBytesInflated() {
773        long inB = inf.getBytesRead();
774        if (current.bytesReadFromStream >= TWO_EXP_32) {
775            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
776                inB += TWO_EXP_32;
777            }
778        }
779        return inB;
780    }
781
782    private int fill() throws IOException {
783        if (closed) {
784            throw new IOException("The stream is closed");
785        }
786        final int length = in.read(buf.array());
787        if (length > 0) {
788            ((Buffer)buf).limit(length);
789            count(buf.limit());
790            inf.setInput(buf.array(), 0, buf.limit());
791        }
792        return length;
793    }
794
795    private void readFully(final byte[] b) throws IOException {
796        readFully(b, 0);
797    }
798
799    private void readFully(final byte[] b, final int off) throws IOException {
800        final int len = b.length - off;
801        final int count = IOUtils.readFully(in, b, off, len);
802        count(count);
803        if (count < len) {
804            throw new EOFException();
805        }
806    }
807
808    private void readDataDescriptor() throws IOException {
809        readFully(wordBuf);
810        ZipLong val = new ZipLong(wordBuf);
811        if (ZipLong.DD_SIG.equals(val)) {
812            // data descriptor with signature, skip sig
813            readFully(wordBuf);
814            val = new ZipLong(wordBuf);
815        }
816        current.entry.setCrc(val.getValue());
817
818        // if there is a ZIP64 extra field, sizes are eight bytes
819        // each, otherwise four bytes each.  Unfortunately some
820        // implementations - namely Java7 - use eight bytes without
821        // using a ZIP64 extra field -
822        // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
823
824        // just read 16 bytes and check whether bytes nine to twelve
825        // look like one of the signatures of what could follow a data
826        // descriptor (ignoring archive decryption headers for now).
827        // If so, push back eight bytes and assume sizes are four
828        // bytes, otherwise sizes are eight bytes each.
829        readFully(twoDwordBuf);
830        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
831        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
832            pushback(twoDwordBuf, DWORD, DWORD);
833            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
834            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
835        } else {
836            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
837            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
838        }
839    }
840
841    /**
842     * Whether this entry requires a data descriptor this library can work with.
843     *
844     * @return true if allowStoredEntriesWithDataDescriptor is true,
845     * the entry doesn't require any data descriptor or the method is
846     * DEFLATED or ENHANCED_DEFLATED.
847     */
848    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
849        return !entry.getGeneralPurposeBit().usesDataDescriptor()
850
851                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
852                || entry.getMethod() == ZipEntry.DEFLATED
853                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
854    }
855
856    /**
857     * Whether the compressed size for the entry is either known or
858     * not required by the compression method being used.
859     */
860    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
861        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
862            || entry.getMethod() == ZipEntry.DEFLATED
863            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
864            || (entry.getGeneralPurposeBit().usesDataDescriptor()
865                && allowStoredEntriesWithDataDescriptor
866                && entry.getMethod() == ZipEntry.STORED);
867    }
868
869    /**
870     * Caches a stored entry that uses the data descriptor.
871     *
872     * <ul>
873     *   <li>Reads a stored entry until the signature of a local file
874     *     header, central directory header or data descriptor has been
875     *     found.</li>
876     *   <li>Stores all entry data in lastStoredEntry.</p>
877     *   <li>Rewinds the stream to position at the data
878     *     descriptor.</li>
879     *   <li>reads the data descriptor</li>
880     * </ul>
881     *
882     * <p>After calling this method the entry should know its size,
883     * the entry's data is cached and the stream is positioned at the
884     * next local file or central directory header.</p>
885     */
886    private void readStoredEntry() throws IOException {
887        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
888        int off = 0;
889        boolean done = false;
890
891        // length of DD without signature
892        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
893
894        while (!done) {
895            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
896            if (r <= 0) {
897                // read the whole archive without ever finding a
898                // central directory
899                throw new IOException("Truncated ZIP file");
900            }
901            if (r + off < 4) {
902                // buffer too small to check for a signature, loop
903                off += r;
904                continue;
905            }
906
907            done = bufferContainsSignature(bos, off, r, ddLen);
908            if (!done) {
909                off = cacheBytesRead(bos, off, r, ddLen);
910            }
911        }
912
913        final byte[] b = bos.toByteArray();
914        lastStoredEntry = new ByteArrayInputStream(b);
915    }
916
917    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
918    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
919    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
920
921    /**
922     * Checks whether the current buffer contains the signature of a
923     * &quot;data descriptor&quot;, &quot;local file header&quot; or
924     * &quot;central directory entry&quot;.
925     *
926     * <p>If it contains such a signature, reads the data descriptor
927     * and positions the stream right after the data descriptor.</p>
928     */
929    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
930            throws IOException {
931
932        boolean done = false;
933        int readTooMuch = 0;
934        for (int i = 0; !done && i < offset + lastRead - 4; i++) {
935            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
936                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
937                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
938                    // found a LFH or CFH:
939                    readTooMuch = offset + lastRead - i - expectedDDLen;
940                    done = true;
941                }
942                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
943                    // found DD:
944                    readTooMuch = offset + lastRead - i;
945                    done = true;
946                }
947                if (done) {
948                    // * push back bytes read in excess as well as the data
949                    //   descriptor
950                    // * copy the remaining bytes to cache
951                    // * read data descriptor
952                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
953                    bos.write(buf.array(), 0, i);
954                    readDataDescriptor();
955                }
956            }
957        }
958        return done;
959    }
960
961    /**
962     * If the last read bytes could hold a data descriptor and an
963     * incomplete signature then save the last bytes to the front of
964     * the buffer and cache everything in front of the potential data
965     * descriptor into the given ByteArrayOutputStream.
966     *
967     * <p>Data descriptor plus incomplete signature (3 bytes in the
968     * worst case) can be 20 bytes max.</p>
969     */
970    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
971        final int cacheable = offset + lastRead - expecteDDLen - 3;
972        if (cacheable > 0) {
973            bos.write(buf.array(), 0, cacheable);
974            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
975            offset = expecteDDLen + 3;
976        } else {
977            offset += lastRead;
978        }
979        return offset;
980    }
981
982    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
983        ((PushbackInputStream) in).unread(buf, offset, length);
984        pushedBackBytes(length);
985    }
986
987    // End of Central Directory Record
988    //   end of central dir signature    WORD
989    //   number of this disk             SHORT
990    //   number of the disk with the
991    //   start of the central directory  SHORT
992    //   total number of entries in the
993    //   central directory on this disk  SHORT
994    //   total number of entries in
995    //   the central directory           SHORT
996    //   size of the central directory   WORD
997    //   offset of start of central
998    //   directory with respect to
999    //   the starting disk number        WORD
1000    //   .ZIP file comment length        SHORT
1001    //   .ZIP file comment               up to 64KB
1002    //
1003
1004    /**
1005     * Reads the stream until it find the "End of central directory
1006     * record" and consumes it as well.
1007     */
1008    private void skipRemainderOfArchive() throws IOException {
1009        // skip over central directory. One LFH has been read too much
1010        // already.  The calculation discounts file names and extra
1011        // data so it will be too short.
1012        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1013        findEocdRecord();
1014        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1015        readFully(shortBuf);
1016        // file comment
1017        realSkip(ZipShort.getValue(shortBuf));
1018    }
1019
1020    /**
1021     * Reads forward until the signature of the &quot;End of central
1022     * directory&quot; record is found.
1023     */
1024    private void findEocdRecord() throws IOException {
1025        int currentByte = -1;
1026        boolean skipReadCall = false;
1027        while (skipReadCall || (currentByte = readOneByte()) > -1) {
1028            skipReadCall = false;
1029            if (!isFirstByteOfEocdSig(currentByte)) {
1030                continue;
1031            }
1032            currentByte = readOneByte();
1033            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1034                if (currentByte == -1) {
1035                    break;
1036                }
1037                skipReadCall = isFirstByteOfEocdSig(currentByte);
1038                continue;
1039            }
1040            currentByte = readOneByte();
1041            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1042                if (currentByte == -1) {
1043                    break;
1044                }
1045                skipReadCall = isFirstByteOfEocdSig(currentByte);
1046                continue;
1047            }
1048            currentByte = readOneByte();
1049            if (currentByte == -1
1050                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1051                break;
1052            }
1053            skipReadCall = isFirstByteOfEocdSig(currentByte);
1054        }
1055    }
1056
1057    /**
1058     * Skips bytes by reading from the underlying stream rather than
1059     * the (potentially inflating) archive stream - which {@link
1060     * #skip} would do.
1061     *
1062     * Also updates bytes-read counter.
1063     */
1064    private void realSkip(final long value) throws IOException {
1065        if (value >= 0) {
1066            long skipped = 0;
1067            while (skipped < value) {
1068                final long rem = value - skipped;
1069                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1070                if (x == -1) {
1071                    return;
1072                }
1073                count(x);
1074                skipped += x;
1075            }
1076            return;
1077        }
1078        throw new IllegalArgumentException();
1079    }
1080
1081    /**
1082     * Reads bytes by reading from the underlying stream rather than
1083     * the (potentially inflating) archive stream - which {@link #read} would do.
1084     *
1085     * Also updates bytes-read counter.
1086     */
1087    private int readOneByte() throws IOException {
1088        final int b = in.read();
1089        if (b != -1) {
1090            count(1);
1091        }
1092        return b;
1093    }
1094
1095    private boolean isFirstByteOfEocdSig(final int b) {
1096        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1097    }
1098
1099    private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] {
1100        'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2',
1101    };
1102    private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
1103
1104    /**
1105     * Checks whether this might be an APK Signing Block.
1106     *
1107     * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It
1108     * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature
1109     * and if we've found it, return true.</p>
1110     *
1111     * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold
1112     * the local file header of the next entry.
1113     *
1114     * @return true if this looks like a APK signing block
1115     *
1116     * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a>
1117     */
1118    private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException {
1119        // length of block excluding the size field itself
1120        BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader);
1121        // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block,
1122        // also subtract 16 bytes in order to position us at the magic string
1123        BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length
1124            - APK_SIGNING_BLOCK_MAGIC.length));
1125        byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length];
1126
1127        try {
1128            if (toSkip.signum() < 0) {
1129                // suspectLocalFileHeader contains the start of suspect magic string
1130                int off = suspectLocalFileHeader.length + toSkip.intValue();
1131                // length was shorter than magic length
1132                if (off < DWORD) {
1133                    return false;
1134                }
1135                int bytesInBuffer = Math.abs(toSkip.intValue());
1136                System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length));
1137                if (bytesInBuffer < magic.length) {
1138                    readFully(magic, bytesInBuffer);
1139                }
1140            } else {
1141                while (toSkip.compareTo(LONG_MAX) > 0) {
1142                    realSkip(Long.MAX_VALUE);
1143                    toSkip = toSkip.add(LONG_MAX.negate());
1144                }
1145                realSkip(toSkip.longValue());
1146                readFully(magic);
1147            }
1148        } catch (EOFException ex) {
1149            // length was invalid
1150            return false;
1151        }
1152        return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC);
1153    }
1154
1155    /**
1156     * Structure collecting information for the entry that is
1157     * currently being read.
1158     */
1159    private static final class CurrentEntry {
1160
1161        /**
1162         * Current ZIP entry.
1163         */
1164        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1165
1166        /**
1167         * Does the entry use a data descriptor?
1168         */
1169        private boolean hasDataDescriptor;
1170
1171        /**
1172         * Does the entry have a ZIP64 extended information extra field.
1173         */
1174        private boolean usesZip64;
1175
1176        /**
1177         * Number of bytes of entry content read by the client if the
1178         * entry is STORED.
1179         */
1180        private long bytesRead;
1181
1182        /**
1183         * Number of bytes of entry content read from the stream.
1184         *
1185         * <p>This may be more than the actual entry's length as some
1186         * stuff gets buffered up and needs to be pushed back when the
1187         * end of the entry has been reached.</p>
1188         */
1189        private long bytesReadFromStream;
1190
1191        /**
1192         * The checksum calculated as the current entry is read.
1193         */
1194        private final CRC32 crc = new CRC32();
1195
1196        /**
1197         * The input stream decompressing the data for shrunk and imploded entries.
1198         */
1199        private InputStream in;
1200    }
1201
1202    /**
1203     * Bounded input stream adapted from commons-io
1204     */
1205    private class BoundedInputStream extends InputStream {
1206
1207        /** the wrapped input stream */
1208        private final InputStream in;
1209
1210        /** the max length to provide */
1211        private final long max;
1212
1213        /** the number of bytes already returned */
1214        private long pos = 0;
1215
1216        /**
1217         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1218         * stream and limits it to a certain size.
1219         *
1220         * @param in The wrapped input stream
1221         * @param size The maximum number of bytes to return
1222         */
1223        public BoundedInputStream(final InputStream in, final long size) {
1224            this.max = size;
1225            this.in = in;
1226        }
1227
1228        @Override
1229        public int read() throws IOException {
1230            if (max >= 0 && pos >= max) {
1231                return -1;
1232            }
1233            final int result = in.read();
1234            pos++;
1235            count(1);
1236            current.bytesReadFromStream++;
1237            return result;
1238        }
1239
1240        @Override
1241        public int read(final byte[] b) throws IOException {
1242            return this.read(b, 0, b.length);
1243        }
1244
1245        @Override
1246        public int read(final byte[] b, final int off, final int len) throws IOException {
1247            if (max >= 0 && pos >= max) {
1248                return -1;
1249            }
1250            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1251            final int bytesRead = in.read(b, off, (int) maxRead);
1252
1253            if (bytesRead == -1) {
1254                return -1;
1255            }
1256
1257            pos += bytesRead;
1258            count(bytesRead);
1259            current.bytesReadFromStream += bytesRead;
1260            return bytesRead;
1261        }
1262
1263        @Override
1264        public long skip(final long n) throws IOException {
1265            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1266            final long skippedBytes = IOUtils.skip(in, toSkip);
1267            pos += skippedBytes;
1268            return skippedBytes;
1269        }
1270
1271        @Override
1272        public int available() throws IOException {
1273            if (max >= 0 && pos >= max) {
1274                return 0;
1275            }
1276            return in.available();
1277        }
1278    }
1279}