001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.DataInputStream; 024import java.io.File; 025import java.io.FilterInputStream; 026import java.io.IOException; 027import java.io.InputStream; 028import java.nio.Buffer; 029import java.nio.ByteBuffer; 030import java.nio.ByteOrder; 031import java.nio.CharBuffer; 032import java.nio.channels.SeekableByteChannel; 033import java.nio.charset.StandardCharsets; 034import java.nio.charset.CharsetEncoder; 035import java.nio.file.Files; 036import java.nio.file.StandardOpenOption; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.BitSet; 040import java.util.EnumSet; 041import java.util.LinkedList; 042import java.util.zip.CRC32; 043 044import org.apache.commons.compress.utils.BoundedInputStream; 045import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 046import org.apache.commons.compress.utils.CharsetNames; 047import org.apache.commons.compress.utils.IOUtils; 048import org.apache.commons.compress.utils.InputStreamStatistics; 049 050/** 051 * Reads a 7z file, using SeekableByteChannel under 052 * the covers. 053 * <p> 054 * The 7z file format is a flexible container 055 * that can contain many compression and 056 * encryption types, but at the moment only 057 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 058 * are supported. 059 * <p> 060 * The format is very Windows/Intel specific, 061 * so it uses little-endian byte order, 062 * doesn't store user/group or permission bits, 063 * and represents times using NTFS timestamps 064 * (100 nanosecond units since 1 January 1601). 065 * Hence the official tools recommend against 066 * using it for backup purposes on *nix, and 067 * recommend .tar.7z or .tar.lzma or .tar.xz 068 * instead. 069 * <p> 070 * Both the header and file contents may be 071 * compressed and/or encrypted. With both 072 * encrypted, neither file names nor file 073 * contents can be read, but the use of 074 * encryption isn't plausibly deniable. 075 * 076 * @NotThreadSafe 077 * @since 1.6 078 */ 079public class SevenZFile implements Closeable { 080 static final int SIGNATURE_HEADER_SIZE = 32; 081 082 private final String fileName; 083 private SeekableByteChannel channel; 084 private final Archive archive; 085 private int currentEntryIndex = -1; 086 private int currentFolderIndex = -1; 087 private InputStream currentFolderInputStream = null; 088 private byte[] password; 089 090 private long compressedBytesReadFromCurrentEntry; 091 private long uncompressedBytesReadFromCurrentEntry; 092 093 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 094 095 // shared with SevenZOutputFile and tests, neither mutates it 096 static final byte[] sevenZSignature = { //NOSONAR 097 (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C 098 }; 099 100 /** 101 * Reads a file as 7z archive 102 * 103 * @param filename the file to read 104 * @param password optional password if the archive is encrypted 105 * @throws IOException if reading the archive fails 106 * @since 1.17 107 */ 108 public SevenZFile(final File filename, final char[] password) throws IOException { 109 this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)), 110 filename.getAbsolutePath(), utf16Decode(password), true); 111 } 112 113 /** 114 * Reads a file as 7z archive 115 * 116 * @param filename the file to read 117 * @param password optional password if the archive is encrypted - 118 * the byte array is supposed to be the UTF16-LE encoded 119 * representation of the password. 120 * @throws IOException if reading the archive fails 121 * @deprecated use the char[]-arg version for the password instead 122 */ 123 public SevenZFile(final File filename, final byte[] password) throws IOException { 124 this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)), 125 filename.getAbsolutePath(), password, true); 126 } 127 128 /** 129 * Reads a SeekableByteChannel as 7z archive 130 * 131 * <p>{@link 132 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 133 * allows you to read from an in-memory archive.</p> 134 * 135 * @param channel the channel to read 136 * @throws IOException if reading the archive fails 137 * @since 1.13 138 */ 139 public SevenZFile(final SeekableByteChannel channel) throws IOException { 140 this(channel, "unknown archive", (char[]) null); 141 } 142 143 /** 144 * Reads a SeekableByteChannel as 7z archive 145 * 146 * <p>{@link 147 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 148 * allows you to read from an in-memory archive.</p> 149 * 150 * @param channel the channel to read 151 * @param password optional password if the archive is encrypted 152 * @throws IOException if reading the archive fails 153 * @since 1.17 154 */ 155 public SevenZFile(final SeekableByteChannel channel, 156 final char[] password) throws IOException { 157 this(channel, "unknown archive", utf16Decode(password)); 158 } 159 160 /** 161 * Reads a SeekableByteChannel as 7z archive 162 * 163 * <p>{@link 164 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 165 * allows you to read from an in-memory archive.</p> 166 * 167 * @param channel the channel to read 168 * @param filename name of the archive - only used for error reporting 169 * @param password optional password if the archive is encrypted 170 * @throws IOException if reading the archive fails 171 * @since 1.17 172 */ 173 public SevenZFile(final SeekableByteChannel channel, String filename, 174 final char[] password) throws IOException { 175 this(channel, filename, utf16Decode(password), false); 176 } 177 178 /** 179 * Reads a SeekableByteChannel as 7z archive 180 * 181 * <p>{@link 182 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 183 * allows you to read from an in-memory archive.</p> 184 * 185 * @param channel the channel to read 186 * @param filename name of the archive - only used for error reporting 187 * @throws IOException if reading the archive fails 188 * @since 1.17 189 */ 190 public SevenZFile(final SeekableByteChannel channel, String filename) 191 throws IOException { 192 this(channel, filename, null, false); 193 } 194 195 /** 196 * Reads a SeekableByteChannel as 7z archive 197 * 198 * <p>{@link 199 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 200 * allows you to read from an in-memory archive.</p> 201 * 202 * @param channel the channel to read 203 * @param password optional password if the archive is encrypted - 204 * the byte array is supposed to be the UTF16-LE encoded 205 * representation of the password. 206 * @throws IOException if reading the archive fails 207 * @since 1.13 208 * @deprecated use the char[]-arg version for the password instead 209 */ 210 public SevenZFile(final SeekableByteChannel channel, 211 final byte[] password) throws IOException { 212 this(channel, "unknown archive", password); 213 } 214 215 /** 216 * Reads a SeekableByteChannel as 7z archive 217 * 218 * <p>{@link 219 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 220 * allows you to read from an in-memory archive.</p> 221 * 222 * @param channel the channel to read 223 * @param filename name of the archive - only used for error reporting 224 * @param password optional password if the archive is encrypted - 225 * the byte array is supposed to be the UTF16-LE encoded 226 * representation of the password. 227 * @throws IOException if reading the archive fails 228 * @since 1.13 229 * @deprecated use the char[]-arg version for the password instead 230 */ 231 public SevenZFile(final SeekableByteChannel channel, String filename, 232 final byte[] password) throws IOException { 233 this(channel, filename, password, false); 234 } 235 236 private SevenZFile(final SeekableByteChannel channel, String filename, 237 final byte[] password, boolean closeOnError) throws IOException { 238 boolean succeeded = false; 239 this.channel = channel; 240 this.fileName = filename; 241 try { 242 archive = readHeaders(password); 243 if (password != null) { 244 this.password = Arrays.copyOf(password, password.length); 245 } else { 246 this.password = null; 247 } 248 succeeded = true; 249 } finally { 250 if (!succeeded && closeOnError) { 251 this.channel.close(); 252 } 253 } 254 } 255 256 /** 257 * Reads a file as unencrypted 7z archive 258 * 259 * @param filename the file to read 260 * @throws IOException if reading the archive fails 261 */ 262 public SevenZFile(final File filename) throws IOException { 263 this(filename, (char[]) null); 264 } 265 266 /** 267 * Closes the archive. 268 * @throws IOException if closing the file fails 269 */ 270 @Override 271 public void close() throws IOException { 272 if (channel != null) { 273 try { 274 channel.close(); 275 } finally { 276 channel = null; 277 if (password != null) { 278 Arrays.fill(password, (byte) 0); 279 } 280 password = null; 281 } 282 } 283 } 284 285 /** 286 * Returns the next Archive Entry in this archive. 287 * 288 * @return the next entry, 289 * or {@code null} if there are no more entries 290 * @throws IOException if the next entry could not be read 291 */ 292 public SevenZArchiveEntry getNextEntry() throws IOException { 293 if (currentEntryIndex >= archive.files.length - 1) { 294 return null; 295 } 296 ++currentEntryIndex; 297 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 298 buildDecodingStream(); 299 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 300 return entry; 301 } 302 303 /** 304 * Returns meta-data of all archive entries. 305 * 306 * <p>This method only provides meta-data, the entries can not be 307 * used to read the contents, you still need to process all 308 * entries in order using {@link #getNextEntry} for that.</p> 309 * 310 * <p>The content methods are only available for entries that have 311 * already been reached via {@link #getNextEntry}.</p> 312 * 313 * @return meta-data of all archive entries. 314 * @since 1.11 315 */ 316 public Iterable<SevenZArchiveEntry> getEntries() { 317 return Arrays.asList(archive.files); 318 } 319 320 private Archive readHeaders(final byte[] password) throws IOException { 321 ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 322 .order(ByteOrder.LITTLE_ENDIAN); 323 readFully(buf); 324 final byte[] signature = new byte[6]; 325 buf.get(signature); 326 if (!Arrays.equals(signature, sevenZSignature)) { 327 throw new IOException("Bad 7z signature"); 328 } 329 // 7zFormat.txt has it wrong - it's first major then minor 330 final byte archiveVersionMajor = buf.get(); 331 final byte archiveVersionMinor = buf.get(); 332 if (archiveVersionMajor != 0) { 333 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 334 archiveVersionMajor, archiveVersionMinor)); 335 } 336 337 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 338 final StartHeader startHeader = readStartHeader(startHeaderCrc); 339 340 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 341 if (nextHeaderSizeInt != startHeader.nextHeaderSize) { 342 throw new IOException("cannot handle nextHeaderSize " + startHeader.nextHeaderSize); 343 } 344 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 345 buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 346 readFully(buf); 347 final CRC32 crc = new CRC32(); 348 crc.update(buf.array()); 349 if (startHeader.nextHeaderCrc != crc.getValue()) { 350 throw new IOException("NextHeader CRC mismatch"); 351 } 352 353 Archive archive = new Archive(); 354 int nid = getUnsignedByte(buf); 355 if (nid == NID.kEncodedHeader) { 356 buf = readEncodedHeader(buf, archive, password); 357 // Archive gets rebuilt with the new header 358 archive = new Archive(); 359 nid = getUnsignedByte(buf); 360 } 361 if (nid == NID.kHeader) { 362 readHeader(buf, archive); 363 } else { 364 throw new IOException("Broken or unsupported archive: no Header"); 365 } 366 return archive; 367 } 368 369 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 370 final StartHeader startHeader = new StartHeader(); 371 // using Stream rather than ByteBuffer for the benefit of the 372 // built-in CRC check 373 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 374 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 375 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 376 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 377 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 378 return startHeader; 379 } 380 } 381 382 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 383 int nid = getUnsignedByte(header); 384 385 if (nid == NID.kArchiveProperties) { 386 readArchiveProperties(header); 387 nid = getUnsignedByte(header); 388 } 389 390 if (nid == NID.kAdditionalStreamsInfo) { 391 throw new IOException("Additional streams unsupported"); 392 //nid = header.readUnsignedByte(); 393 } 394 395 if (nid == NID.kMainStreamsInfo) { 396 readStreamsInfo(header, archive); 397 nid = getUnsignedByte(header); 398 } 399 400 if (nid == NID.kFilesInfo) { 401 readFilesInfo(header, archive); 402 nid = getUnsignedByte(header); 403 } 404 405 if (nid != NID.kEnd) { 406 throw new IOException("Badly terminated header, found " + nid); 407 } 408 } 409 410 private void readArchiveProperties(final ByteBuffer input) throws IOException { 411 // FIXME: the reference implementation just throws them away? 412 int nid = getUnsignedByte(input); 413 while (nid != NID.kEnd) { 414 final long propertySize = readUint64(input); 415 final byte[] property = new byte[(int)propertySize]; 416 input.get(property); 417 nid = getUnsignedByte(input); 418 } 419 } 420 421 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 422 final byte[] password) throws IOException { 423 readStreamsInfo(header, archive); 424 425 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 426 final Folder folder = archive.folders[0]; 427 final int firstPackStreamIndex = 0; 428 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 429 0; 430 431 channel.position(folderOffset); 432 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 433 archive.packSizes[firstPackStreamIndex]); 434 for (final Coder coder : folder.getOrderedCoders()) { 435 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 436 throw new IOException("Multi input/output stream coders are not yet supported"); 437 } 438 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 439 folder.getUnpackSizeForCoder(coder), coder, password); 440 } 441 if (folder.hasCrc) { 442 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 443 folder.getUnpackSize(), folder.crc); 444 } 445 final byte[] nextHeader = new byte[(int)folder.getUnpackSize()]; 446 try (DataInputStream nextHeaderInputStream = new DataInputStream(inputStreamStack)) { 447 nextHeaderInputStream.readFully(nextHeader); 448 } 449 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 450 } 451 452 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 453 int nid = getUnsignedByte(header); 454 455 if (nid == NID.kPackInfo) { 456 readPackInfo(header, archive); 457 nid = getUnsignedByte(header); 458 } 459 460 if (nid == NID.kUnpackInfo) { 461 readUnpackInfo(header, archive); 462 nid = getUnsignedByte(header); 463 } else { 464 // archive without unpack/coders info 465 archive.folders = new Folder[0]; 466 } 467 468 if (nid == NID.kSubStreamsInfo) { 469 readSubStreamsInfo(header, archive); 470 nid = getUnsignedByte(header); 471 } 472 473 if (nid != NID.kEnd) { 474 throw new IOException("Badly terminated StreamsInfo"); 475 } 476 } 477 478 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 479 archive.packPos = readUint64(header); 480 final long numPackStreams = readUint64(header); 481 int nid = getUnsignedByte(header); 482 if (nid == NID.kSize) { 483 archive.packSizes = new long[(int)numPackStreams]; 484 for (int i = 0; i < archive.packSizes.length; i++) { 485 archive.packSizes[i] = readUint64(header); 486 } 487 nid = getUnsignedByte(header); 488 } 489 490 if (nid == NID.kCRC) { 491 archive.packCrcsDefined = readAllOrBits(header, (int)numPackStreams); 492 archive.packCrcs = new long[(int)numPackStreams]; 493 for (int i = 0; i < (int)numPackStreams; i++) { 494 if (archive.packCrcsDefined.get(i)) { 495 archive.packCrcs[i] = 0xffffFFFFL & header.getInt(); 496 } 497 } 498 499 nid = getUnsignedByte(header); 500 } 501 502 if (nid != NID.kEnd) { 503 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 504 } 505 } 506 507 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 508 int nid = getUnsignedByte(header); 509 if (nid != NID.kFolder) { 510 throw new IOException("Expected kFolder, got " + nid); 511 } 512 final long numFolders = readUint64(header); 513 final Folder[] folders = new Folder[(int)numFolders]; 514 archive.folders = folders; 515 final int external = getUnsignedByte(header); 516 if (external != 0) { 517 throw new IOException("External unsupported"); 518 } 519 for (int i = 0; i < (int)numFolders; i++) { 520 folders[i] = readFolder(header); 521 } 522 523 nid = getUnsignedByte(header); 524 if (nid != NID.kCodersUnpackSize) { 525 throw new IOException("Expected kCodersUnpackSize, got " + nid); 526 } 527 for (final Folder folder : folders) { 528 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 529 for (int i = 0; i < folder.totalOutputStreams; i++) { 530 folder.unpackSizes[i] = readUint64(header); 531 } 532 } 533 534 nid = getUnsignedByte(header); 535 if (nid == NID.kCRC) { 536 final BitSet crcsDefined = readAllOrBits(header, (int)numFolders); 537 for (int i = 0; i < (int)numFolders; i++) { 538 if (crcsDefined.get(i)) { 539 folders[i].hasCrc = true; 540 folders[i].crc = 0xffffFFFFL & header.getInt(); 541 } else { 542 folders[i].hasCrc = false; 543 } 544 } 545 546 nid = getUnsignedByte(header); 547 } 548 549 if (nid != NID.kEnd) { 550 throw new IOException("Badly terminated UnpackInfo"); 551 } 552 } 553 554 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 555 for (final Folder folder : archive.folders) { 556 folder.numUnpackSubStreams = 1; 557 } 558 int totalUnpackStreams = archive.folders.length; 559 560 int nid = getUnsignedByte(header); 561 if (nid == NID.kNumUnpackStream) { 562 totalUnpackStreams = 0; 563 for (final Folder folder : archive.folders) { 564 final long numStreams = readUint64(header); 565 folder.numUnpackSubStreams = (int)numStreams; 566 totalUnpackStreams += numStreams; 567 } 568 nid = getUnsignedByte(header); 569 } 570 571 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 572 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 573 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 574 subStreamsInfo.crcs = new long[totalUnpackStreams]; 575 576 int nextUnpackStream = 0; 577 for (final Folder folder : archive.folders) { 578 if (folder.numUnpackSubStreams == 0) { 579 continue; 580 } 581 long sum = 0; 582 if (nid == NID.kSize) { 583 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 584 final long size = readUint64(header); 585 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 586 sum += size; 587 } 588 } 589 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 590 } 591 if (nid == NID.kSize) { 592 nid = getUnsignedByte(header); 593 } 594 595 int numDigests = 0; 596 for (final Folder folder : archive.folders) { 597 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 598 numDigests += folder.numUnpackSubStreams; 599 } 600 } 601 602 if (nid == NID.kCRC) { 603 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 604 final long[] missingCrcs = new long[numDigests]; 605 for (int i = 0; i < numDigests; i++) { 606 if (hasMissingCrc.get(i)) { 607 missingCrcs[i] = 0xffffFFFFL & header.getInt(); 608 } 609 } 610 int nextCrc = 0; 611 int nextMissingCrc = 0; 612 for (final Folder folder: archive.folders) { 613 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 614 subStreamsInfo.hasCrc.set(nextCrc, true); 615 subStreamsInfo.crcs[nextCrc] = folder.crc; 616 ++nextCrc; 617 } else { 618 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 619 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 620 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 621 ++nextCrc; 622 ++nextMissingCrc; 623 } 624 } 625 } 626 627 nid = getUnsignedByte(header); 628 } 629 630 if (nid != NID.kEnd) { 631 throw new IOException("Badly terminated SubStreamsInfo"); 632 } 633 634 archive.subStreamsInfo = subStreamsInfo; 635 } 636 637 private Folder readFolder(final ByteBuffer header) throws IOException { 638 final Folder folder = new Folder(); 639 640 final long numCoders = readUint64(header); 641 final Coder[] coders = new Coder[(int)numCoders]; 642 long totalInStreams = 0; 643 long totalOutStreams = 0; 644 for (int i = 0; i < coders.length; i++) { 645 coders[i] = new Coder(); 646 final int bits = getUnsignedByte(header); 647 final int idSize = bits & 0xf; 648 final boolean isSimple = (bits & 0x10) == 0; 649 final boolean hasAttributes = (bits & 0x20) != 0; 650 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 651 652 coders[i].decompressionMethodId = new byte[idSize]; 653 header.get(coders[i].decompressionMethodId); 654 if (isSimple) { 655 coders[i].numInStreams = 1; 656 coders[i].numOutStreams = 1; 657 } else { 658 coders[i].numInStreams = readUint64(header); 659 coders[i].numOutStreams = readUint64(header); 660 } 661 totalInStreams += coders[i].numInStreams; 662 totalOutStreams += coders[i].numOutStreams; 663 if (hasAttributes) { 664 final long propertiesSize = readUint64(header); 665 coders[i].properties = new byte[(int)propertiesSize]; 666 header.get(coders[i].properties); 667 } 668 // would need to keep looping as above: 669 while (moreAlternativeMethods) { 670 throw new IOException("Alternative methods are unsupported, please report. " + 671 "The reference implementation doesn't support them either."); 672 } 673 } 674 folder.coders = coders; 675 folder.totalInputStreams = totalInStreams; 676 folder.totalOutputStreams = totalOutStreams; 677 678 if (totalOutStreams == 0) { 679 throw new IOException("Total output streams can't be 0"); 680 } 681 final long numBindPairs = totalOutStreams - 1; 682 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 683 for (int i = 0; i < bindPairs.length; i++) { 684 bindPairs[i] = new BindPair(); 685 bindPairs[i].inIndex = readUint64(header); 686 bindPairs[i].outIndex = readUint64(header); 687 } 688 folder.bindPairs = bindPairs; 689 690 if (totalInStreams < numBindPairs) { 691 throw new IOException("Total input streams can't be less than the number of bind pairs"); 692 } 693 final long numPackedStreams = totalInStreams - numBindPairs; 694 final long packedStreams[] = new long[(int)numPackedStreams]; 695 if (numPackedStreams == 1) { 696 int i; 697 for (i = 0; i < (int)totalInStreams; i++) { 698 if (folder.findBindPairForInStream(i) < 0) { 699 break; 700 } 701 } 702 if (i == (int)totalInStreams) { 703 throw new IOException("Couldn't find stream's bind pair index"); 704 } 705 packedStreams[0] = i; 706 } else { 707 for (int i = 0; i < (int)numPackedStreams; i++) { 708 packedStreams[i] = readUint64(header); 709 } 710 } 711 folder.packedStreams = packedStreams; 712 713 return folder; 714 } 715 716 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 717 final int areAllDefined = getUnsignedByte(header); 718 final BitSet bits; 719 if (areAllDefined != 0) { 720 bits = new BitSet(size); 721 for (int i = 0; i < size; i++) { 722 bits.set(i, true); 723 } 724 } else { 725 bits = readBits(header, size); 726 } 727 return bits; 728 } 729 730 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 731 final BitSet bits = new BitSet(size); 732 int mask = 0; 733 int cache = 0; 734 for (int i = 0; i < size; i++) { 735 if (mask == 0) { 736 mask = 0x80; 737 cache = getUnsignedByte(header); 738 } 739 bits.set(i, (cache & mask) != 0); 740 mask >>>= 1; 741 } 742 return bits; 743 } 744 745 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 746 final long numFiles = readUint64(header); 747 final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int)numFiles]; 748 for (int i = 0; i < files.length; i++) { 749 files[i] = new SevenZArchiveEntry(); 750 } 751 BitSet isEmptyStream = null; 752 BitSet isEmptyFile = null; 753 BitSet isAnti = null; 754 while (true) { 755 final int propertyType = getUnsignedByte(header); 756 if (propertyType == 0) { 757 break; 758 } 759 final long size = readUint64(header); 760 switch (propertyType) { 761 case NID.kEmptyStream: { 762 isEmptyStream = readBits(header, files.length); 763 break; 764 } 765 case NID.kEmptyFile: { 766 if (isEmptyStream == null) { // protect against NPE 767 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 768 } 769 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 770 break; 771 } 772 case NID.kAnti: { 773 if (isEmptyStream == null) { // protect against NPE 774 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 775 } 776 isAnti = readBits(header, isEmptyStream.cardinality()); 777 break; 778 } 779 case NID.kName: { 780 final int external = getUnsignedByte(header); 781 if (external != 0) { 782 throw new IOException("Not implemented"); 783 } 784 if (((size - 1) & 1) != 0) { 785 throw new IOException("File names length invalid"); 786 } 787 final byte[] names = new byte[(int)(size - 1)]; 788 header.get(names); 789 int nextFile = 0; 790 int nextName = 0; 791 for (int i = 0; i < names.length; i += 2) { 792 if (names[i] == 0 && names[i+1] == 0) { 793 files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE)); 794 nextName = i + 2; 795 } 796 } 797 if (nextName != names.length || nextFile != files.length) { 798 throw new IOException("Error parsing file names"); 799 } 800 break; 801 } 802 case NID.kCTime: { 803 final BitSet timesDefined = readAllOrBits(header, files.length); 804 final int external = getUnsignedByte(header); 805 if (external != 0) { 806 throw new IOException("Unimplemented"); 807 } 808 for (int i = 0; i < files.length; i++) { 809 files[i].setHasCreationDate(timesDefined.get(i)); 810 if (files[i].getHasCreationDate()) { 811 files[i].setCreationDate(header.getLong()); 812 } 813 } 814 break; 815 } 816 case NID.kATime: { 817 final BitSet timesDefined = readAllOrBits(header, files.length); 818 final int external = getUnsignedByte(header); 819 if (external != 0) { 820 throw new IOException("Unimplemented"); 821 } 822 for (int i = 0; i < files.length; i++) { 823 files[i].setHasAccessDate(timesDefined.get(i)); 824 if (files[i].getHasAccessDate()) { 825 files[i].setAccessDate(header.getLong()); 826 } 827 } 828 break; 829 } 830 case NID.kMTime: { 831 final BitSet timesDefined = readAllOrBits(header, files.length); 832 final int external = getUnsignedByte(header); 833 if (external != 0) { 834 throw new IOException("Unimplemented"); 835 } 836 for (int i = 0; i < files.length; i++) { 837 files[i].setHasLastModifiedDate(timesDefined.get(i)); 838 if (files[i].getHasLastModifiedDate()) { 839 files[i].setLastModifiedDate(header.getLong()); 840 } 841 } 842 break; 843 } 844 case NID.kWinAttributes: { 845 final BitSet attributesDefined = readAllOrBits(header, files.length); 846 final int external = getUnsignedByte(header); 847 if (external != 0) { 848 throw new IOException("Unimplemented"); 849 } 850 for (int i = 0; i < files.length; i++) { 851 files[i].setHasWindowsAttributes(attributesDefined.get(i)); 852 if (files[i].getHasWindowsAttributes()) { 853 files[i].setWindowsAttributes(header.getInt()); 854 } 855 } 856 break; 857 } 858 case NID.kStartPos: { 859 throw new IOException("kStartPos is unsupported, please report"); 860 } 861 case NID.kDummy: { 862 // 7z 9.20 asserts the content is all zeros and ignores the property 863 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 864 865 if (skipBytesFully(header, size) < size) { 866 throw new IOException("Incomplete kDummy property"); 867 } 868 break; 869 } 870 871 default: { 872 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 873 if (skipBytesFully(header, size) < size) { 874 throw new IOException("Incomplete property of type " + propertyType); 875 } 876 break; 877 } 878 } 879 } 880 int nonEmptyFileCounter = 0; 881 int emptyFileCounter = 0; 882 for (int i = 0; i < files.length; i++) { 883 files[i].setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 884 if (files[i].hasStream()) { 885 files[i].setDirectory(false); 886 files[i].setAntiItem(false); 887 files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 888 files[i].setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 889 files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 890 ++nonEmptyFileCounter; 891 } else { 892 files[i].setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 893 files[i].setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 894 files[i].setHasCrc(false); 895 files[i].setSize(0); 896 ++emptyFileCounter; 897 } 898 } 899 archive.files = files; 900 calculateStreamMap(archive); 901 } 902 903 private void calculateStreamMap(final Archive archive) throws IOException { 904 final StreamMap streamMap = new StreamMap(); 905 906 int nextFolderPackStreamIndex = 0; 907 final int numFolders = archive.folders != null ? archive.folders.length : 0; 908 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 909 for (int i = 0; i < numFolders; i++) { 910 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 911 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 912 } 913 914 long nextPackStreamOffset = 0; 915 final int numPackSizes = archive.packSizes != null ? archive.packSizes.length : 0; 916 streamMap.packStreamOffsets = new long[numPackSizes]; 917 for (int i = 0; i < numPackSizes; i++) { 918 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 919 nextPackStreamOffset += archive.packSizes[i]; 920 } 921 922 streamMap.folderFirstFileIndex = new int[numFolders]; 923 streamMap.fileFolderIndex = new int[archive.files.length]; 924 int nextFolderIndex = 0; 925 int nextFolderUnpackStreamIndex = 0; 926 for (int i = 0; i < archive.files.length; i++) { 927 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 928 streamMap.fileFolderIndex[i] = -1; 929 continue; 930 } 931 if (nextFolderUnpackStreamIndex == 0) { 932 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 933 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 934 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 935 break; 936 } 937 } 938 if (nextFolderIndex >= archive.folders.length) { 939 throw new IOException("Too few folders in archive"); 940 } 941 } 942 streamMap.fileFolderIndex[i] = nextFolderIndex; 943 if (!archive.files[i].hasStream()) { 944 continue; 945 } 946 ++nextFolderUnpackStreamIndex; 947 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 948 ++nextFolderIndex; 949 nextFolderUnpackStreamIndex = 0; 950 } 951 } 952 953 archive.streamMap = streamMap; 954 } 955 956 private void buildDecodingStream() throws IOException { 957 final int folderIndex = archive.streamMap.fileFolderIndex[currentEntryIndex]; 958 if (folderIndex < 0) { 959 deferredBlockStreams.clear(); 960 // TODO: previously it'd return an empty stream? 961 // new BoundedInputStream(new ByteArrayInputStream(new byte[0]), 0); 962 return; 963 } 964 final SevenZArchiveEntry file = archive.files[currentEntryIndex]; 965 if (currentFolderIndex == folderIndex) { 966 // (COMPRESS-320). 967 // The current entry is within the same (potentially opened) folder. The 968 // previous stream has to be fully decoded before we can start reading 969 // but don't do it eagerly -- if the user skips over the entire folder nothing 970 // is effectively decompressed. 971 972 file.setContentMethods(archive.files[currentEntryIndex - 1].getContentMethods()); 973 } else { 974 // We're opening a new folder. Discard any queued streams/ folder stream. 975 currentFolderIndex = folderIndex; 976 deferredBlockStreams.clear(); 977 if (currentFolderInputStream != null) { 978 currentFolderInputStream.close(); 979 currentFolderInputStream = null; 980 } 981 982 final Folder folder = archive.folders[folderIndex]; 983 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 984 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 985 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 986 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 987 } 988 989 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 990 if (file.getHasCrc()) { 991 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 992 } 993 994 deferredBlockStreams.add(fileStream); 995 } 996 997 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 998 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 999 channel.position(folderOffset); 1000 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 1001 new BoundedSeekableByteChannelInputStream(channel, 1002 archive.packSizes[firstPackStreamIndex]))) { 1003 @Override 1004 public int read() throws IOException { 1005 final int r = in.read(); 1006 if (r >= 0) { 1007 count(1); 1008 } 1009 return r; 1010 } 1011 @Override 1012 public int read(final byte[] b) throws IOException { 1013 return read(b, 0, b.length); 1014 } 1015 @Override 1016 public int read(final byte[] b, final int off, final int len) throws IOException { 1017 final int r = in.read(b, off, len); 1018 if (r >= 0) { 1019 count(r); 1020 } 1021 return r; 1022 } 1023 private void count(int c) { 1024 compressedBytesReadFromCurrentEntry += c; 1025 } 1026 }; 1027 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 1028 for (final Coder coder : folder.getOrderedCoders()) { 1029 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1030 throw new IOException("Multi input/output stream coders are not yet supported"); 1031 } 1032 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 1033 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 1034 folder.getUnpackSizeForCoder(coder), coder, password); 1035 methods.addFirst(new SevenZMethodConfiguration(method, 1036 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 1037 } 1038 entry.setContentMethods(methods); 1039 if (folder.hasCrc) { 1040 return new CRC32VerifyingInputStream(inputStreamStack, 1041 folder.getUnpackSize(), folder.crc); 1042 } 1043 return inputStreamStack; 1044 } 1045 1046 /** 1047 * Reads a byte of data. 1048 * 1049 * @return the byte read, or -1 if end of input is reached 1050 * @throws IOException 1051 * if an I/O error has occurred 1052 */ 1053 public int read() throws IOException { 1054 int b = getCurrentStream().read(); 1055 if (b >= 0) { 1056 uncompressedBytesReadFromCurrentEntry++; 1057 } 1058 return b; 1059 } 1060 1061 private InputStream getCurrentStream() throws IOException { 1062 if (archive.files[currentEntryIndex].getSize() == 0) { 1063 return new ByteArrayInputStream(new byte[0]); 1064 } 1065 if (deferredBlockStreams.isEmpty()) { 1066 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 1067 } 1068 1069 while (deferredBlockStreams.size() > 1) { 1070 // In solid compression mode we need to decompress all leading folder' 1071 // streams to get access to an entry. We defer this until really needed 1072 // so that entire blocks can be skipped without wasting time for decompression. 1073 try (final InputStream stream = deferredBlockStreams.remove(0)) { 1074 IOUtils.skip(stream, Long.MAX_VALUE); 1075 } 1076 compressedBytesReadFromCurrentEntry = 0; 1077 } 1078 1079 return deferredBlockStreams.get(0); 1080 } 1081 1082 /** 1083 * Reads data into an array of bytes. 1084 * 1085 * @param b the array to write data to 1086 * @return the number of bytes read, or -1 if end of input is reached 1087 * @throws IOException 1088 * if an I/O error has occurred 1089 */ 1090 public int read(final byte[] b) throws IOException { 1091 return read(b, 0, b.length); 1092 } 1093 1094 /** 1095 * Reads data into an array of bytes. 1096 * 1097 * @param b the array to write data to 1098 * @param off offset into the buffer to start filling at 1099 * @param len of bytes to read 1100 * @return the number of bytes read, or -1 if end of input is reached 1101 * @throws IOException 1102 * if an I/O error has occurred 1103 */ 1104 public int read(final byte[] b, final int off, final int len) throws IOException { 1105 int cnt = getCurrentStream().read(b, off, len); 1106 if (cnt > 0) { 1107 uncompressedBytesReadFromCurrentEntry += cnt; 1108 } 1109 return cnt; 1110 } 1111 1112 /** 1113 * Provides statistics for bytes read from the current entry. 1114 * 1115 * @return statistics for bytes read from the current entry 1116 * @since 1.17 1117 */ 1118 public InputStreamStatistics getStatisticsForCurrentEntry() { 1119 return new InputStreamStatistics() { 1120 @Override 1121 public long getCompressedCount() { 1122 return compressedBytesReadFromCurrentEntry; 1123 } 1124 @Override 1125 public long getUncompressedCount() { 1126 return uncompressedBytesReadFromCurrentEntry; 1127 } 1128 }; 1129 } 1130 1131 private static long readUint64(final ByteBuffer in) throws IOException { 1132 // long rather than int as it might get shifted beyond the range of an int 1133 final long firstByte = getUnsignedByte(in); 1134 int mask = 0x80; 1135 long value = 0; 1136 for (int i = 0; i < 8; i++) { 1137 if ((firstByte & mask) == 0) { 1138 return value | ((firstByte & (mask - 1)) << (8 * i)); 1139 } 1140 final long nextByte = getUnsignedByte(in); 1141 value |= nextByte << (8 * i); 1142 mask >>>= 1; 1143 } 1144 return value; 1145 } 1146 1147 private static int getUnsignedByte(ByteBuffer buf) { 1148 return buf.get() & 0xff; 1149 } 1150 1151 /** 1152 * Checks if the signature matches what is expected for a 7z file. 1153 * 1154 * @param signature 1155 * the bytes to check 1156 * @param length 1157 * the number of bytes to check 1158 * @return true, if this is the signature of a 7z archive. 1159 * @since 1.8 1160 */ 1161 public static boolean matches(final byte[] signature, final int length) { 1162 if (length < sevenZSignature.length) { 1163 return false; 1164 } 1165 1166 for (int i = 0; i < sevenZSignature.length; i++) { 1167 if (signature[i] != sevenZSignature[i]) { 1168 return false; 1169 } 1170 } 1171 return true; 1172 } 1173 1174 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException { 1175 if (bytesToSkip < 1) { 1176 return 0; 1177 } 1178 int current = input.position(); 1179 int maxSkip = input.remaining(); 1180 if (maxSkip < bytesToSkip) { 1181 bytesToSkip = maxSkip; 1182 } 1183 input.position(current + (int) bytesToSkip); 1184 return bytesToSkip; 1185 } 1186 1187 private void readFully(ByteBuffer buf) throws IOException { 1188 ((Buffer)buf).rewind(); 1189 IOUtils.readFully(channel, buf); 1190 ((Buffer)buf).flip(); 1191 } 1192 1193 @Override 1194 public String toString() { 1195 return archive.toString(); 1196 } 1197 1198 private static final CharsetEncoder PASSWORD_ENCODER = StandardCharsets.UTF_16LE.newEncoder(); 1199 1200 private static byte[] utf16Decode(char[] chars) throws IOException { 1201 if (chars == null) { 1202 return null; 1203 } 1204 ByteBuffer encoded = PASSWORD_ENCODER.encode(CharBuffer.wrap(chars)); 1205 if (encoded.hasArray()) { 1206 return encoded.array(); 1207 } 1208 byte[] e = new byte[encoded.remaining()]; 1209 encoded.get(e); 1210 return e; 1211 } 1212}