001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.EOFException; 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.SequenceInputStream; 028import java.nio.Buffer; 029import java.nio.ByteBuffer; 030import java.nio.channels.FileChannel; 031import java.nio.channels.SeekableByteChannel; 032import java.nio.file.Files; 033import java.nio.file.StandardOpenOption; 034import java.util.Arrays; 035import java.util.Collections; 036import java.util.Comparator; 037import java.util.Enumeration; 038import java.util.EnumSet; 039import java.util.HashMap; 040import java.util.LinkedList; 041import java.util.List; 042import java.util.Map; 043import java.util.zip.Inflater; 044import java.util.zip.ZipException; 045 046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 048import org.apache.commons.compress.utils.CountingInputStream; 049import org.apache.commons.compress.utils.IOUtils; 050import org.apache.commons.compress.utils.InputStreamStatistics; 051 052import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 053import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 054import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 055import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 056import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 057 058/** 059 * Replacement for <code>java.util.ZipFile</code>. 060 * 061 * <p>This class adds support for file name encodings other than UTF-8 062 * (which is required to work on ZIP files created by native zip tools 063 * and is able to skip a preamble like the one found in self 064 * extracting archives. Furthermore it returns instances of 065 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 066 * instead of <code>java.util.zip.ZipEntry</code>.</p> 067 * 068 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 069 * have to reimplement all methods anyway. Like 070 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 071 * covers and supports compressed and uncompressed entries. As of 072 * Apache Commons Compress 1.3 it also transparently supports Zip64 073 * extensions and thus individual entries and archives larger than 4 074 * GB or with more than 65536 entries.</p> 075 * 076 * <p>The method signatures mimic the ones of 077 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 078 * 079 * <ul> 080 * <li>There is no getName method.</li> 081 * <li>entries has been renamed to getEntries.</li> 082 * <li>getEntries and getEntry return 083 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 084 * instances.</li> 085 * <li>close is allowed to throw IOException.</li> 086 * </ul> 087 * 088 */ 089public class ZipFile implements Closeable { 090 private static final int HASH_SIZE = 509; 091 static final int NIBLET_MASK = 0x0f; 092 static final int BYTE_SHIFT = 8; 093 private static final int POS_0 = 0; 094 private static final int POS_1 = 1; 095 private static final int POS_2 = 2; 096 private static final int POS_3 = 3; 097 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 098 099 /** 100 * List of entries in the order they appear inside the central 101 * directory. 102 */ 103 private final List<ZipArchiveEntry> entries = 104 new LinkedList<>(); 105 106 /** 107 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 108 */ 109 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 110 new HashMap<>(HASH_SIZE); 111 112 /** 113 * The encoding to use for filenames and the file comment. 114 * 115 * <p>For a list of possible values see <a 116 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 117 * Defaults to UTF-8.</p> 118 */ 119 private final String encoding; 120 121 /** 122 * The zip encoding to use for filenames and the file comment. 123 */ 124 private final ZipEncoding zipEncoding; 125 126 /** 127 * File name of actual source. 128 */ 129 private final String archiveName; 130 131 /** 132 * The actual data source. 133 */ 134 private final SeekableByteChannel archive; 135 136 /** 137 * Whether to look for and use Unicode extra fields. 138 */ 139 private final boolean useUnicodeExtraFields; 140 141 /** 142 * Whether the file is closed. 143 */ 144 private volatile boolean closed = true; 145 146 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 147 private final byte[] dwordBuf = new byte[DWORD]; 148 private final byte[] wordBuf = new byte[WORD]; 149 private final byte[] cfhBuf = new byte[CFH_LEN]; 150 private final byte[] shortBuf = new byte[SHORT]; 151 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 152 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 153 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 154 155 /** 156 * Opens the given file for reading, assuming "UTF8" for file names. 157 * 158 * @param f the archive. 159 * 160 * @throws IOException if an error occurs while reading the file. 161 */ 162 public ZipFile(final File f) throws IOException { 163 this(f, ZipEncodingHelper.UTF8); 164 } 165 166 /** 167 * Opens the given file for reading, assuming "UTF8". 168 * 169 * @param name name of the archive. 170 * 171 * @throws IOException if an error occurs while reading the file. 172 */ 173 public ZipFile(final String name) throws IOException { 174 this(new File(name), ZipEncodingHelper.UTF8); 175 } 176 177 /** 178 * Opens the given file for reading, assuming the specified 179 * encoding for file names, scanning unicode extra fields. 180 * 181 * @param name name of the archive. 182 * @param encoding the encoding to use for file names, use null 183 * for the platform's default encoding 184 * 185 * @throws IOException if an error occurs while reading the file. 186 */ 187 public ZipFile(final String name, final String encoding) throws IOException { 188 this(new File(name), encoding, true); 189 } 190 191 /** 192 * Opens the given file for reading, assuming the specified 193 * encoding for file names and scanning for unicode extra fields. 194 * 195 * @param f the archive. 196 * @param encoding the encoding to use for file names, use null 197 * for the platform's default encoding 198 * 199 * @throws IOException if an error occurs while reading the file. 200 */ 201 public ZipFile(final File f, final String encoding) throws IOException { 202 this(f, encoding, true); 203 } 204 205 /** 206 * Opens the given file for reading, assuming the specified 207 * encoding for file names. 208 * 209 * @param f the archive. 210 * @param encoding the encoding to use for file names, use null 211 * for the platform's default encoding 212 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 213 * Extra Fields (if present) to set the file names. 214 * 215 * @throws IOException if an error occurs while reading the file. 216 */ 217 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 218 throws IOException { 219 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 220 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true); 221 } 222 223 /** 224 * Opens the given channel for reading, assuming "UTF8" for file names. 225 * 226 * <p>{@link 227 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 228 * allows you to read from an in-memory archive.</p> 229 * 230 * @param channel the archive. 231 * 232 * @throws IOException if an error occurs while reading the file. 233 * @since 1.13 234 */ 235 public ZipFile(final SeekableByteChannel channel) 236 throws IOException { 237 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 238 } 239 240 /** 241 * Opens the given channel for reading, assuming the specified 242 * encoding for file names. 243 * 244 * <p>{@link 245 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 246 * allows you to read from an in-memory archive.</p> 247 * 248 * @param channel the archive. 249 * @param encoding the encoding to use for file names, use null 250 * for the platform's default encoding 251 * 252 * @throws IOException if an error occurs while reading the file. 253 * @since 1.13 254 */ 255 public ZipFile(final SeekableByteChannel channel, final String encoding) 256 throws IOException { 257 this(channel, "unknown archive", encoding, true); 258 } 259 260 /** 261 * Opens the given channel for reading, assuming the specified 262 * encoding for file names. 263 * 264 * <p>{@link 265 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 266 * allows you to read from an in-memory archive.</p> 267 * 268 * @param channel the archive. 269 * @param archiveName name of the archive, used for error messages only. 270 * @param encoding the encoding to use for file names, use null 271 * for the platform's default encoding 272 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 273 * Extra Fields (if present) to set the file names. 274 * 275 * @throws IOException if an error occurs while reading the file. 276 * @since 1.13 277 */ 278 public ZipFile(final SeekableByteChannel channel, final String archiveName, 279 final String encoding, final boolean useUnicodeExtraFields) 280 throws IOException { 281 this(channel, archiveName, encoding, useUnicodeExtraFields, false); 282 } 283 284 private ZipFile(final SeekableByteChannel channel, final String archiveName, 285 final String encoding, final boolean useUnicodeExtraFields, 286 final boolean closeOnError) 287 throws IOException { 288 this.archiveName = archiveName; 289 this.encoding = encoding; 290 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 291 this.useUnicodeExtraFields = useUnicodeExtraFields; 292 archive = channel; 293 boolean success = false; 294 try { 295 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 296 populateFromCentralDirectory(); 297 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 298 success = true; 299 } finally { 300 closed = !success; 301 if (!success && closeOnError) { 302 IOUtils.closeQuietly(archive); 303 } 304 } 305 } 306 307 /** 308 * The encoding to use for filenames and the file comment. 309 * 310 * @return null if using the platform's default character encoding. 311 */ 312 public String getEncoding() { 313 return encoding; 314 } 315 316 /** 317 * Closes the archive. 318 * @throws IOException if an error occurs closing the archive. 319 */ 320 @Override 321 public void close() throws IOException { 322 // this flag is only written here and read in finalize() which 323 // can never be run in parallel. 324 // no synchronization needed. 325 closed = true; 326 327 archive.close(); 328 } 329 330 /** 331 * close a zipfile quietly; throw no io fault, do nothing 332 * on a null parameter 333 * @param zipfile file to close, can be null 334 */ 335 public static void closeQuietly(final ZipFile zipfile) { 336 IOUtils.closeQuietly(zipfile); 337 } 338 339 /** 340 * Returns all entries. 341 * 342 * <p>Entries will be returned in the same order they appear 343 * within the archive's central directory.</p> 344 * 345 * @return all entries as {@link ZipArchiveEntry} instances 346 */ 347 public Enumeration<ZipArchiveEntry> getEntries() { 348 return Collections.enumeration(entries); 349 } 350 351 /** 352 * Returns all entries in physical order. 353 * 354 * <p>Entries will be returned in the same order their contents 355 * appear within the archive.</p> 356 * 357 * @return all entries as {@link ZipArchiveEntry} instances 358 * 359 * @since 1.1 360 */ 361 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 362 final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 363 Arrays.sort(allEntries, offsetComparator); 364 return Collections.enumeration(Arrays.asList(allEntries)); 365 } 366 367 /** 368 * Returns a named entry - or {@code null} if no entry by 369 * that name exists. 370 * 371 * <p>If multiple entries with the same name exist the first entry 372 * in the archive's central directory by that name is 373 * returned.</p> 374 * 375 * @param name name of the entry. 376 * @return the ZipArchiveEntry corresponding to the given name - or 377 * {@code null} if not present. 378 */ 379 public ZipArchiveEntry getEntry(final String name) { 380 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 381 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 382 } 383 384 /** 385 * Returns all named entries in the same order they appear within 386 * the archive's central directory. 387 * 388 * @param name name of the entry. 389 * @return the Iterable<ZipArchiveEntry> corresponding to the 390 * given name 391 * @since 1.6 392 */ 393 public Iterable<ZipArchiveEntry> getEntries(final String name) { 394 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 395 return entriesOfThatName != null ? entriesOfThatName 396 : Collections.<ZipArchiveEntry>emptyList(); 397 } 398 399 /** 400 * Returns all named entries in the same order their contents 401 * appear within the archive. 402 * 403 * @param name name of the entry. 404 * @return the Iterable<ZipArchiveEntry> corresponding to the 405 * given name 406 * @since 1.6 407 */ 408 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 409 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 410 if (nameMap.containsKey(name)) { 411 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 412 Arrays.sort(entriesOfThatName, offsetComparator); 413 } 414 return Arrays.asList(entriesOfThatName); 415 } 416 417 /** 418 * Whether this class is able to read the given entry. 419 * 420 * <p>May return false if it is set up to use encryption or a 421 * compression method that hasn't been implemented yet.</p> 422 * @since 1.1 423 * @param ze the entry 424 * @return whether this class is able to read the given entry. 425 */ 426 public boolean canReadEntryData(final ZipArchiveEntry ze) { 427 return ZipUtil.canHandleEntryData(ze); 428 } 429 430 /** 431 * Expose the raw stream of the archive entry (compressed form). 432 * 433 * <p>This method does not relate to how/if we understand the payload in the 434 * stream, since we really only intend to move it on to somewhere else.</p> 435 * 436 * @param ze The entry to get the stream for 437 * @return The raw input stream containing (possibly) compressed data. 438 * @since 1.11 439 */ 440 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 441 if (!(ze instanceof Entry)) { 442 return null; 443 } 444 final long start = ze.getDataOffset(); 445 return createBoundedInputStream(start, ze.getCompressedSize()); 446 } 447 448 449 /** 450 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 451 * Compression and all other attributes will be as in this file. 452 * <p>This method transfers entries based on the central directory of the zip file.</p> 453 * 454 * @param target The zipArchiveOutputStream to write the entries to 455 * @param predicate A predicate that selects which entries to write 456 * @throws IOException on error 457 */ 458 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 459 throws IOException { 460 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 461 while (src.hasMoreElements()) { 462 final ZipArchiveEntry entry = src.nextElement(); 463 if (predicate.test( entry)) { 464 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 465 } 466 } 467 } 468 469 /** 470 * Returns an InputStream for reading the contents of the given entry. 471 * 472 * @param ze the entry to get the stream for. 473 * @return a stream to read the entry from. The returned stream 474 * implements {@link InputStreamStatistics}. 475 * @throws IOException if unable to create an input stream from the zipentry 476 */ 477 public InputStream getInputStream(final ZipArchiveEntry ze) 478 throws IOException { 479 if (!(ze instanceof Entry)) { 480 return null; 481 } 482 // cast validity is checked just above 483 ZipUtil.checkRequestedFeatures(ze); 484 final long start = ze.getDataOffset(); 485 486 // doesn't get closed if the method is not supported - which 487 // should never happen because of the checkRequestedFeatures 488 // call above 489 final InputStream is = 490 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 491 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 492 case STORED: 493 return new StoredStatisticsStream(is); 494 case UNSHRINKING: 495 return new UnshrinkingInputStream(is); 496 case IMPLODING: 497 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 498 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 499 case DEFLATED: 500 final Inflater inflater = new Inflater(true); 501 // Inflater with nowrap=true has this odd contract for a zero padding 502 // byte following the data stream; this used to be zlib's requirement 503 // and has been fixed a long time ago, but the contract persists so 504 // we comply. 505 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 506 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 507 inflater) { 508 @Override 509 public void close() throws IOException { 510 try { 511 super.close(); 512 } finally { 513 inflater.end(); 514 } 515 } 516 }; 517 case BZIP2: 518 return new BZip2CompressorInputStream(is); 519 case ENHANCED_DEFLATED: 520 return new Deflate64CompressorInputStream(is); 521 case AES_ENCRYPTED: 522 case EXPANDING_LEVEL_1: 523 case EXPANDING_LEVEL_2: 524 case EXPANDING_LEVEL_3: 525 case EXPANDING_LEVEL_4: 526 case JPEG: 527 case LZMA: 528 case PKWARE_IMPLODING: 529 case PPMD: 530 case TOKENIZATION: 531 case UNKNOWN: 532 case WAVPACK: 533 case XZ: 534 default: 535 throw new ZipException("Found unsupported compression method " 536 + ze.getMethod()); 537 } 538 } 539 540 /** 541 * <p> 542 * Convenience method to return the entry's content as a String if isUnixSymlink() 543 * returns true for it, otherwise returns null. 544 * </p> 545 * 546 * <p>This method assumes the symbolic link's file name uses the 547 * same encoding that as been specified for this ZipFile.</p> 548 * 549 * @param entry ZipArchiveEntry object that represents the symbolic link 550 * @return entry's content as a String 551 * @throws IOException problem with content's input stream 552 * @since 1.5 553 */ 554 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 555 if (entry != null && entry.isUnixSymlink()) { 556 try (InputStream in = getInputStream(entry)) { 557 return zipEncoding.decode(IOUtils.toByteArray(in)); 558 } 559 } 560 return null; 561 } 562 563 /** 564 * Ensures that the close method of this zipfile is called when 565 * there are no more references to it. 566 * @see #close() 567 */ 568 @Override 569 protected void finalize() throws Throwable { 570 try { 571 if (!closed) { 572 System.err.println("Cleaning up unclosed ZipFile for archive " 573 + archiveName); 574 close(); 575 } 576 } finally { 577 super.finalize(); 578 } 579 } 580 581 /** 582 * Length of a "central directory" entry structure without file 583 * name, extra fields or comment. 584 */ 585 private static final int CFH_LEN = 586 /* version made by */ SHORT 587 /* version needed to extract */ + SHORT 588 /* general purpose bit flag */ + SHORT 589 /* compression method */ + SHORT 590 /* last mod file time */ + SHORT 591 /* last mod file date */ + SHORT 592 /* crc-32 */ + WORD 593 /* compressed size */ + WORD 594 /* uncompressed size */ + WORD 595 /* filename length */ + SHORT 596 /* extra field length */ + SHORT 597 /* file comment length */ + SHORT 598 /* disk number start */ + SHORT 599 /* internal file attributes */ + SHORT 600 /* external file attributes */ + WORD 601 /* relative offset of local header */ + WORD; 602 603 private static final long CFH_SIG = 604 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 605 606 /** 607 * Reads the central directory of the given archive and populates 608 * the internal tables with ZipArchiveEntry instances. 609 * 610 * <p>The ZipArchiveEntrys will know all data that can be obtained from 611 * the central directory alone, but not the data that requires the 612 * local file header or additional data to be read.</p> 613 * 614 * @return a map of zipentries that didn't have the language 615 * encoding flag set when read. 616 */ 617 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 618 throws IOException { 619 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 620 new HashMap<>(); 621 622 positionAtCentralDirectory(); 623 624 ((Buffer)wordBbuf).rewind(); 625 IOUtils.readFully(archive, wordBbuf); 626 long sig = ZipLong.getValue(wordBuf); 627 628 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 629 throw new IOException("central directory is empty, can't expand" 630 + " corrupt archive."); 631 } 632 633 while (sig == CFH_SIG) { 634 readCentralDirectoryEntry(noUTF8Flag); 635 ((Buffer)wordBbuf).rewind(); 636 IOUtils.readFully(archive, wordBbuf); 637 sig = ZipLong.getValue(wordBuf); 638 } 639 return noUTF8Flag; 640 } 641 642 /** 643 * Reads an individual entry of the central directory, creats an 644 * ZipArchiveEntry from it and adds it to the global maps. 645 * 646 * @param noUTF8Flag map used to collect entries that don't have 647 * their UTF-8 flag set and whose name will be set by data read 648 * from the local file header later. The current entry may be 649 * added to this map. 650 */ 651 private void 652 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 653 throws IOException { 654 ((Buffer)cfhBbuf).rewind(); 655 IOUtils.readFully(archive, cfhBbuf); 656 int off = 0; 657 final Entry ze = new Entry(); 658 659 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 660 off += SHORT; 661 ze.setVersionMadeBy(versionMadeBy); 662 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 663 664 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 665 off += SHORT; // version required 666 667 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 668 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 669 final ZipEncoding entryEncoding = 670 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 671 if (hasUTF8Flag) { 672 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 673 } 674 ze.setGeneralPurposeBit(gpFlag); 675 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 676 677 off += SHORT; 678 679 //noinspection MagicConstant 680 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 681 off += SHORT; 682 683 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 684 ze.setTime(time); 685 off += WORD; 686 687 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 688 off += WORD; 689 690 ze.setCompressedSize(ZipLong.getValue(cfhBuf, off)); 691 off += WORD; 692 693 ze.setSize(ZipLong.getValue(cfhBuf, off)); 694 off += WORD; 695 696 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 697 off += SHORT; 698 699 final int extraLen = ZipShort.getValue(cfhBuf, off); 700 off += SHORT; 701 702 final int commentLen = ZipShort.getValue(cfhBuf, off); 703 off += SHORT; 704 705 final int diskStart = ZipShort.getValue(cfhBuf, off); 706 off += SHORT; 707 708 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 709 off += SHORT; 710 711 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 712 off += WORD; 713 714 final byte[] fileName = new byte[fileNameLen]; 715 IOUtils.readFully(archive, ByteBuffer.wrap(fileName)); 716 ze.setName(entryEncoding.decode(fileName), fileName); 717 718 // LFH offset, 719 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 720 // data offset will be filled later 721 entries.add(ze); 722 723 final byte[] cdExtraData = new byte[extraLen]; 724 IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); 725 ze.setCentralDirectoryExtra(cdExtraData); 726 727 setSizesAndOffsetFromZip64Extra(ze, diskStart); 728 729 final byte[] comment = new byte[commentLen]; 730 IOUtils.readFully(archive, ByteBuffer.wrap(comment)); 731 ze.setComment(entryEncoding.decode(comment)); 732 733 if (!hasUTF8Flag && useUnicodeExtraFields) { 734 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 735 } 736 } 737 738 /** 739 * If the entry holds a Zip64 extended information extra field, 740 * read sizes from there if the entry's sizes are set to 741 * 0xFFFFFFFFF, do the same for the offset of the local file 742 * header. 743 * 744 * <p>Ensures the Zip64 extra either knows both compressed and 745 * uncompressed size or neither of both as the internal logic in 746 * ExtraFieldUtils forces the field to create local header data 747 * even if they are never used - and here a field with only one 748 * size would be invalid.</p> 749 */ 750 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, 751 final int diskStart) 752 throws IOException { 753 final Zip64ExtendedInformationExtraField z64 = 754 (Zip64ExtendedInformationExtraField) 755 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 756 if (z64 != null) { 757 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 758 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 759 final boolean hasRelativeHeaderOffset = 760 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 761 z64.reparseCentralDirectoryData(hasUncompressedSize, 762 hasCompressedSize, 763 hasRelativeHeaderOffset, 764 diskStart == ZIP64_MAGIC_SHORT); 765 766 if (hasUncompressedSize) { 767 ze.setSize(z64.getSize().getLongValue()); 768 } else if (hasCompressedSize) { 769 z64.setSize(new ZipEightByteInteger(ze.getSize())); 770 } 771 772 if (hasCompressedSize) { 773 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 774 } else if (hasUncompressedSize) { 775 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 776 } 777 778 if (hasRelativeHeaderOffset) { 779 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 780 } 781 } 782 } 783 784 /** 785 * Length of the "End of central directory record" - which is 786 * supposed to be the last structure of the archive - without file 787 * comment. 788 */ 789 static final int MIN_EOCD_SIZE = 790 /* end of central dir signature */ WORD 791 /* number of this disk */ + SHORT 792 /* number of the disk with the */ 793 /* start of the central directory */ + SHORT 794 /* total number of entries in */ 795 /* the central dir on this disk */ + SHORT 796 /* total number of entries in */ 797 /* the central dir */ + SHORT 798 /* size of the central directory */ + WORD 799 /* offset of start of central */ 800 /* directory with respect to */ 801 /* the starting disk number */ + WORD 802 /* zipfile comment length */ + SHORT; 803 804 /** 805 * Maximum length of the "End of central directory record" with a 806 * file comment. 807 */ 808 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 809 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 810 811 /** 812 * Offset of the field that holds the location of the first 813 * central directory entry inside the "End of central directory 814 * record" relative to the start of the "End of central directory 815 * record". 816 */ 817 private static final int CFD_LOCATOR_OFFSET = 818 /* end of central dir signature */ WORD 819 /* number of this disk */ + SHORT 820 /* number of the disk with the */ 821 /* start of the central directory */ + SHORT 822 /* total number of entries in */ 823 /* the central dir on this disk */ + SHORT 824 /* total number of entries in */ 825 /* the central dir */ + SHORT 826 /* size of the central directory */ + WORD; 827 828 /** 829 * Length of the "Zip64 end of central directory locator" - which 830 * should be right in front of the "end of central directory 831 * record" if one is present at all. 832 */ 833 private static final int ZIP64_EOCDL_LENGTH = 834 /* zip64 end of central dir locator sig */ WORD 835 /* number of the disk with the start */ 836 /* start of the zip64 end of */ 837 /* central directory */ + WORD 838 /* relative offset of the zip64 */ 839 /* end of central directory record */ + DWORD 840 /* total number of disks */ + WORD; 841 842 /** 843 * Offset of the field that holds the location of the "Zip64 end 844 * of central directory record" inside the "Zip64 end of central 845 * directory locator" relative to the start of the "Zip64 end of 846 * central directory locator". 847 */ 848 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 849 /* zip64 end of central dir locator sig */ WORD 850 /* number of the disk with the start */ 851 /* start of the zip64 end of */ 852 /* central directory */ + WORD; 853 854 /** 855 * Offset of the field that holds the location of the first 856 * central directory entry inside the "Zip64 end of central 857 * directory record" relative to the start of the "Zip64 end of 858 * central directory record". 859 */ 860 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 861 /* zip64 end of central dir */ 862 /* signature */ WORD 863 /* size of zip64 end of central */ 864 /* directory record */ + DWORD 865 /* version made by */ + SHORT 866 /* version needed to extract */ + SHORT 867 /* number of this disk */ + WORD 868 /* number of the disk with the */ 869 /* start of the central directory */ + WORD 870 /* total number of entries in the */ 871 /* central directory on this disk */ + DWORD 872 /* total number of entries in the */ 873 /* central directory */ + DWORD 874 /* size of the central directory */ + DWORD; 875 876 /** 877 * Searches for either the "Zip64 end of central directory 878 * locator" or the "End of central dir record", parses 879 * it and positions the stream at the first central directory 880 * record. 881 */ 882 private void positionAtCentralDirectory() 883 throws IOException { 884 positionAtEndOfCentralDirectoryRecord(); 885 boolean found = false; 886 final boolean searchedForZip64EOCD = 887 archive.position() > ZIP64_EOCDL_LENGTH; 888 if (searchedForZip64EOCD) { 889 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 890 ((Buffer)wordBbuf).rewind(); 891 IOUtils.readFully(archive, wordBbuf); 892 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 893 wordBuf); 894 } 895 if (!found) { 896 // not a ZIP64 archive 897 if (searchedForZip64EOCD) { 898 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 899 } 900 positionAtCentralDirectory32(); 901 } else { 902 positionAtCentralDirectory64(); 903 } 904 } 905 906 /** 907 * Parses the "Zip64 end of central directory locator", 908 * finds the "Zip64 end of central directory record" using the 909 * parsed information, parses that and positions the stream at the 910 * first central directory record. 911 * 912 * Expects stream to be positioned right behind the "Zip64 913 * end of central directory locator"'s signature. 914 */ 915 private void positionAtCentralDirectory64() 916 throws IOException { 917 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 918 - WORD /* signature has already been read */); 919 ((Buffer)dwordBbuf).rewind(); 920 IOUtils.readFully(archive, dwordBbuf); 921 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 922 ((Buffer)wordBbuf).rewind(); 923 IOUtils.readFully(archive, wordBbuf); 924 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 925 throw new ZipException("archive's ZIP64 end of central " 926 + "directory locator is corrupt."); 927 } 928 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 929 - WORD /* signature has already been read */); 930 ((Buffer)dwordBbuf).rewind(); 931 IOUtils.readFully(archive, dwordBbuf); 932 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 933 } 934 935 /** 936 * Parses the "End of central dir record" and positions 937 * the stream at the first central directory record. 938 * 939 * Expects stream to be positioned at the beginning of the 940 * "End of central dir record". 941 */ 942 private void positionAtCentralDirectory32() 943 throws IOException { 944 skipBytes(CFD_LOCATOR_OFFSET); 945 ((Buffer)wordBbuf).rewind(); 946 IOUtils.readFully(archive, wordBbuf); 947 archive.position(ZipLong.getValue(wordBuf)); 948 } 949 950 /** 951 * Searches for the and positions the stream at the start of the 952 * "End of central dir record". 953 */ 954 private void positionAtEndOfCentralDirectoryRecord() 955 throws IOException { 956 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 957 ZipArchiveOutputStream.EOCD_SIG); 958 if (!found) { 959 throw new ZipException("archive is not a ZIP archive"); 960 } 961 } 962 963 /** 964 * Searches the archive backwards from minDistance to maxDistance 965 * for the given signature, positions the RandomaccessFile right 966 * at the signature if it has been found. 967 */ 968 private boolean tryToLocateSignature(final long minDistanceFromEnd, 969 final long maxDistanceFromEnd, 970 final byte[] sig) throws IOException { 971 boolean found = false; 972 long off = archive.size() - minDistanceFromEnd; 973 final long stopSearching = 974 Math.max(0L, archive.size() - maxDistanceFromEnd); 975 if (off >= 0) { 976 for (; off >= stopSearching; off--) { 977 archive.position(off); 978 try { 979 ((Buffer)wordBbuf).rewind(); 980 IOUtils.readFully(archive, wordBbuf); 981 ((Buffer)wordBbuf).flip(); 982 } catch (EOFException ex) { 983 break; 984 } 985 int curr = wordBbuf.get(); 986 if (curr == sig[POS_0]) { 987 curr = wordBbuf.get(); 988 if (curr == sig[POS_1]) { 989 curr = wordBbuf.get(); 990 if (curr == sig[POS_2]) { 991 curr = wordBbuf.get(); 992 if (curr == sig[POS_3]) { 993 found = true; 994 break; 995 } 996 } 997 } 998 } 999 } 1000 } 1001 if (found) { 1002 archive.position(off); 1003 } 1004 return found; 1005 } 1006 1007 /** 1008 * Skips the given number of bytes or throws an EOFException if 1009 * skipping failed. 1010 */ 1011 private void skipBytes(final int count) throws IOException { 1012 long currentPosition = archive.position(); 1013 long newPosition = currentPosition + count; 1014 if (newPosition > archive.size()) { 1015 throw new EOFException(); 1016 } 1017 archive.position(newPosition); 1018 } 1019 1020 /** 1021 * Number of bytes in local file header up to the "length of 1022 * filename" entry. 1023 */ 1024 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1025 /* local file header signature */ WORD 1026 /* version needed to extract */ + SHORT 1027 /* general purpose bit flag */ + SHORT 1028 /* compression method */ + SHORT 1029 /* last mod file time */ + SHORT 1030 /* last mod file date */ + SHORT 1031 /* crc-32 */ + WORD 1032 /* compressed size */ + WORD 1033 /* uncompressed size */ + (long) WORD; 1034 1035 /** 1036 * Walks through all recorded entries and adds the data available 1037 * from the local file header. 1038 * 1039 * <p>Also records the offsets for the data to read from the 1040 * entries.</p> 1041 */ 1042 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1043 entriesWithoutUTF8Flag) 1044 throws IOException { 1045 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1046 // entries is filled in populateFromCentralDirectory and 1047 // never modified 1048 final Entry ze = (Entry) zipArchiveEntry; 1049 final long offset = ze.getLocalHeaderOffset(); 1050 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1051 ((Buffer)wordBbuf).rewind(); 1052 IOUtils.readFully(archive, wordBbuf); 1053 ((Buffer)wordBbuf).flip(); 1054 wordBbuf.get(shortBuf); 1055 final int fileNameLen = ZipShort.getValue(shortBuf); 1056 wordBbuf.get(shortBuf); 1057 final int extraFieldLen = ZipShort.getValue(shortBuf); 1058 skipBytes(fileNameLen); 1059 final byte[] localExtraData = new byte[extraFieldLen]; 1060 IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); 1061 ze.setExtra(localExtraData); 1062 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1063 + SHORT + SHORT + fileNameLen + extraFieldLen); 1064 ze.setStreamContiguous(true); 1065 1066 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1067 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1068 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1069 nc.comment); 1070 } 1071 1072 final String name = ze.getName(); 1073 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 1074 if (entriesOfThatName == null) { 1075 entriesOfThatName = new LinkedList<>(); 1076 nameMap.put(name, entriesOfThatName); 1077 } 1078 entriesOfThatName.addLast(ze); 1079 } 1080 } 1081 1082 /** 1083 * Checks whether the archive starts with a LFH. If it doesn't, 1084 * it may be an empty archive. 1085 */ 1086 private boolean startsWithLocalFileHeader() throws IOException { 1087 archive.position(0); 1088 ((Buffer)wordBbuf).rewind(); 1089 IOUtils.readFully(archive, wordBbuf); 1090 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1091 } 1092 1093 /** 1094 * Creates new BoundedInputStream, according to implementation of 1095 * underlying archive channel. 1096 */ 1097 private BoundedInputStream createBoundedInputStream(long start, long remaining) { 1098 return archive instanceof FileChannel ? 1099 new BoundedFileChannelInputStream(start, remaining) : 1100 new BoundedInputStream(start, remaining); 1101 } 1102 1103 /** 1104 * InputStream that delegates requests to the underlying 1105 * SeekableByteChannel, making sure that only bytes from a certain 1106 * range can be read. 1107 */ 1108 private class BoundedInputStream extends InputStream { 1109 private ByteBuffer singleByteBuffer; 1110 private final long end; 1111 private long loc; 1112 1113 BoundedInputStream(final long start, final long remaining) { 1114 this.end = start+remaining; 1115 if (this.end < start) { 1116 // check for potential vulnerability due to overflow 1117 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining); 1118 } 1119 loc = start; 1120 } 1121 1122 @Override 1123 public synchronized int read() throws IOException { 1124 if (loc >= end) { 1125 return -1; 1126 } 1127 if (singleByteBuffer == null) { 1128 singleByteBuffer = ByteBuffer.allocate(1); 1129 } 1130 else { 1131 ((Buffer)singleByteBuffer).rewind(); 1132 } 1133 int read = read(loc, singleByteBuffer); 1134 if (read < 0) { 1135 return read; 1136 } 1137 loc++; 1138 return singleByteBuffer.get() & 0xff; 1139 } 1140 1141 @Override 1142 public synchronized int read(final byte[] b, final int off, int len) throws IOException { 1143 if (len <= 0) { 1144 return 0; 1145 } 1146 1147 if (len > end-loc) { 1148 if (loc >= end) { 1149 return -1; 1150 } 1151 len = (int)(end-loc); 1152 } 1153 1154 ByteBuffer buf; 1155 buf = ByteBuffer.wrap(b, off, len); 1156 int ret = read(loc, buf); 1157 if (ret > 0) { 1158 loc += ret; 1159 return ret; 1160 } 1161 return ret; 1162 } 1163 1164 protected int read(long pos, ByteBuffer buf) throws IOException { 1165 int read; 1166 synchronized (archive) { 1167 archive.position(pos); 1168 read = archive.read(buf); 1169 } 1170 ((Buffer)buf).flip(); 1171 return read; 1172 } 1173 } 1174 1175 /** 1176 * Lock-free implementation of BoundedInputStream. The 1177 * implementation uses positioned reads on the underlying archive 1178 * file channel and therefore performs significantly faster in 1179 * concurrent environment. 1180 */ 1181 private class BoundedFileChannelInputStream extends BoundedInputStream { 1182 private final FileChannel archive; 1183 1184 BoundedFileChannelInputStream(final long start, final long remaining) { 1185 super(start, remaining); 1186 archive = (FileChannel)ZipFile.this.archive; 1187 } 1188 1189 @Override 1190 protected int read(long pos, ByteBuffer buf) throws IOException { 1191 int read = archive.read(buf, pos); 1192 ((Buffer)buf).flip(); 1193 return read; 1194 } 1195 } 1196 1197 private static final class NameAndComment { 1198 private final byte[] name; 1199 private final byte[] comment; 1200 private NameAndComment(final byte[] name, final byte[] comment) { 1201 this.name = name; 1202 this.comment = comment; 1203 } 1204 } 1205 1206 /** 1207 * Compares two ZipArchiveEntries based on their offset within the archive. 1208 * 1209 * <p>Won't return any meaningful results if one of the entries 1210 * isn't part of the archive at all.</p> 1211 * 1212 * @since 1.1 1213 */ 1214 private final Comparator<ZipArchiveEntry> offsetComparator = 1215 new Comparator<ZipArchiveEntry>() { 1216 @Override 1217 public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) { 1218 if (e1 == e2) { 1219 return 0; 1220 } 1221 1222 final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1223 final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1224 if (ent1 == null) { 1225 return 1; 1226 } 1227 if (ent2 == null) { 1228 return -1; 1229 } 1230 final long val = (ent1.getLocalHeaderOffset() 1231 - ent2.getLocalHeaderOffset()); 1232 return val == 0 ? 0 : val < 0 ? -1 : +1; 1233 } 1234 }; 1235 1236 /** 1237 * Extends ZipArchiveEntry to store the offset within the archive. 1238 */ 1239 private static class Entry extends ZipArchiveEntry { 1240 1241 Entry() { 1242 } 1243 1244 @Override 1245 public int hashCode() { 1246 return 3 * super.hashCode() 1247 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1248 } 1249 1250 @Override 1251 public boolean equals(final Object other) { 1252 if (super.equals(other)) { 1253 // super.equals would return false if other were not an Entry 1254 final Entry otherEntry = (Entry) other; 1255 return getLocalHeaderOffset() 1256 == otherEntry.getLocalHeaderOffset() 1257 && getDataOffset() 1258 == otherEntry.getDataOffset(); 1259 } 1260 return false; 1261 } 1262 } 1263 1264 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 1265 StoredStatisticsStream(InputStream in) { 1266 super(in); 1267 } 1268 1269 @Override 1270 public long getCompressedCount() { 1271 return super.getBytesRead(); 1272 } 1273 1274 @Override 1275 public long getUncompressedCount() { 1276 return getCompressedCount(); 1277 } 1278 } 1279}