001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.math.BigInteger; 025import java.nio.ByteBuffer; 026import java.nio.charset.StandardCharsets; 027import java.util.ArrayList; 028import java.util.Collections; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032 033import org.apache.commons.compress.archivers.zip.ZipEncoding; 034import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 035import org.apache.commons.compress.utils.CharsetNames; 036import org.apache.commons.compress.utils.IOUtils; 037 038import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; 039import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; 040import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN; 041import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN; 042 043/** 044 * This class provides static utility methods to work with byte streams. 045 * 046 * @Immutable 047 */ 048// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 049public class TarUtils { 050 051 private static final int BYTE_MASK = 255; 052 053 static final ZipEncoding DEFAULT_ENCODING = 054 ZipEncodingHelper.getZipEncoding(null); 055 056 /** 057 * Encapsulates the algorithms used up to Commons Compress 1.3 as 058 * ZipEncoding. 059 */ 060 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 061 @Override 062 public boolean canEncode(final String name) { return true; } 063 064 @Override 065 public ByteBuffer encode(final String name) { 066 final int length = name.length(); 067 final byte[] buf = new byte[length]; 068 069 // copy until end of input or output is reached. 070 for (int i = 0; i < length; ++i) { 071 buf[i] = (byte) name.charAt(i); 072 } 073 return ByteBuffer.wrap(buf); 074 } 075 076 @Override 077 public String decode(final byte[] buffer) { 078 final int length = buffer.length; 079 final StringBuilder result = new StringBuilder(length); 080 081 for (final byte b : buffer) { 082 if (b == 0) { // Trailing null 083 break; 084 } 085 result.append((char) (b & 0xFF)); // Allow for sign-extension 086 } 087 088 return result.toString(); 089 } 090 }; 091 092 /** Private constructor to prevent instantiation of this utility class. */ 093 private TarUtils(){ 094 } 095 096 /** 097 * Parse an octal string from a buffer. 098 * 099 * <p>Leading spaces are ignored. 100 * The buffer must contain a trailing space or NUL, 101 * and may contain an additional trailing space or NUL.</p> 102 * 103 * <p>The input buffer is allowed to contain all NULs, 104 * in which case the method returns 0L 105 * (this allows for missing fields).</p> 106 * 107 * <p>To work-around some tar implementations that insert a 108 * leading NUL this method returns 0 if it detects a leading NUL 109 * since Commons Compress 1.4.</p> 110 * 111 * @param buffer The buffer from which to parse. 112 * @param offset The offset into the buffer from which to parse. 113 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 114 * @return The long value of the octal string. 115 * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. 116 */ 117 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 118 long result = 0; 119 int end = offset + length; 120 int start = offset; 121 122 if (length < 2){ 123 throw new IllegalArgumentException("Length "+length+" must be at least 2"); 124 } 125 126 if (buffer[start] == 0) { 127 return 0L; 128 } 129 130 // Skip leading spaces 131 while (start < end){ 132 if (buffer[start] != ' ') { 133 break; 134 } 135 start++; 136 } 137 138 // Trim all trailing NULs and spaces. 139 // The ustar and POSIX tar specs require a trailing NUL or 140 // space but some implementations use the extra digit for big 141 // sizes/uids/gids ... 142 byte trailer = buffer[end - 1]; 143 while (start < end && (trailer == 0 || trailer == ' ')) { 144 end--; 145 trailer = buffer[end - 1]; 146 } 147 148 for ( ;start < end; start++) { 149 final byte currentByte = buffer[start]; 150 // CheckStyle:MagicNumber OFF 151 if (currentByte < '0' || currentByte > '7'){ 152 throw new IllegalArgumentException( 153 exceptionMessage(buffer, offset, length, start, currentByte)); 154 } 155 result = (result << 3) + (currentByte - '0'); // convert from ASCII 156 // CheckStyle:MagicNumber ON 157 } 158 159 return result; 160 } 161 162 /** 163 * Compute the value contained in a byte buffer. If the most 164 * significant bit of the first byte in the buffer is set, this 165 * bit is ignored and the rest of the buffer is interpreted as a 166 * binary number. Otherwise, the buffer is interpreted as an 167 * octal number as per the parseOctal function above. 168 * 169 * @param buffer The buffer from which to parse. 170 * @param offset The offset into the buffer from which to parse. 171 * @param length The maximum number of bytes to parse. 172 * @return The long value of the octal or binary string. 173 * @throws IllegalArgumentException if the trailing space/NUL is 174 * missing or an invalid byte is detected in an octal number, or 175 * if a binary number would exceed the size of a signed long 176 * 64-bit integer. 177 * @since 1.4 178 */ 179 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 180 final int length) { 181 182 if ((buffer[offset] & 0x80) == 0) { 183 return parseOctal(buffer, offset, length); 184 } 185 final boolean negative = buffer[offset] == (byte) 0xff; 186 if (length < 9) { 187 return parseBinaryLong(buffer, offset, length, negative); 188 } 189 return parseBinaryBigInteger(buffer, offset, length, negative); 190 } 191 192 private static long parseBinaryLong(final byte[] buffer, final int offset, 193 final int length, 194 final boolean negative) { 195 if (length >= 9) { 196 throw new IllegalArgumentException("At offset " + offset + ", " 197 + length + " byte binary number" 198 + " exceeds maximum signed long" 199 + " value"); 200 } 201 long val = 0; 202 for (int i = 1; i < length; i++) { 203 val = (val << 8) + (buffer[offset + i] & 0xff); 204 } 205 if (negative) { 206 // 2's complement 207 val--; 208 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 209 } 210 return negative ? -val : val; 211 } 212 213 private static long parseBinaryBigInteger(final byte[] buffer, 214 final int offset, 215 final int length, 216 final boolean negative) { 217 final byte[] remainder = new byte[length - 1]; 218 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 219 BigInteger val = new BigInteger(remainder); 220 if (negative) { 221 // 2's complement 222 val = val.add(BigInteger.valueOf(-1)).not(); 223 } 224 if (val.bitLength() > 63) { 225 throw new IllegalArgumentException("At offset " + offset + ", " 226 + length + " byte binary number" 227 + " exceeds maximum signed long" 228 + " value"); 229 } 230 return negative ? -val.longValue() : val.longValue(); 231 } 232 233 /** 234 * Parse a boolean byte from a buffer. 235 * Leading spaces and NUL are ignored. 236 * The buffer may contain trailing spaces or NULs. 237 * 238 * @param buffer The buffer from which to parse. 239 * @param offset The offset into the buffer from which to parse. 240 * @return The boolean value of the bytes. 241 * @throws IllegalArgumentException if an invalid byte is detected. 242 */ 243 public static boolean parseBoolean(final byte[] buffer, final int offset) { 244 return buffer[offset] == 1; 245 } 246 247 // Helper method to generate the exception message 248 private static String exceptionMessage(final byte[] buffer, final int offset, 249 final int length, final int current, final byte currentByte) { 250 // default charset is good enough for an exception message, 251 // 252 // the alternative was to modify parseOctal and 253 // parseOctalOrBinary to receive the ZipEncoding of the 254 // archive (deprecating the existing public methods, of 255 // course) and dealing with the fact that ZipEncoding#decode 256 // can throw an IOException which parseOctal* doesn't declare 257 String string = new String(buffer, offset, length); 258 259 string=string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 260 return "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; 261 } 262 263 /** 264 * Parse an entry name from a buffer. 265 * Parsing stops when a NUL is found 266 * or the buffer length is reached. 267 * 268 * @param buffer The buffer from which to parse. 269 * @param offset The offset into the buffer from which to parse. 270 * @param length The maximum number of bytes to parse. 271 * @return The entry name. 272 */ 273 public static String parseName(final byte[] buffer, final int offset, final int length) { 274 try { 275 return parseName(buffer, offset, length, DEFAULT_ENCODING); 276 } catch (final IOException ex) { // NOSONAR 277 try { 278 return parseName(buffer, offset, length, FALLBACK_ENCODING); 279 } catch (final IOException ex2) { 280 // impossible 281 throw new RuntimeException(ex2); //NOSONAR 282 } 283 } 284 } 285 286 /** 287 * Parse an entry name from a buffer. 288 * Parsing stops when a NUL is found 289 * or the buffer length is reached. 290 * 291 * @param buffer The buffer from which to parse. 292 * @param offset The offset into the buffer from which to parse. 293 * @param length The maximum number of bytes to parse. 294 * @param encoding name of the encoding to use for file names 295 * @since 1.4 296 * @return The entry name. 297 * @throws IOException on error 298 */ 299 public static String parseName(final byte[] buffer, final int offset, 300 final int length, 301 final ZipEncoding encoding) 302 throws IOException { 303 304 int len = 0; 305 for (int i = offset; len < length && buffer[i] != 0; i++) { 306 len++; 307 } 308 if (len > 0) { 309 final byte[] b = new byte[len]; 310 System.arraycopy(buffer, offset, b, 0, len); 311 return encoding.decode(b); 312 } 313 return ""; 314 } 315 316 /** 317 * Parses the content of a PAX 1.0 sparse block. 318 * @since 1.20 319 * @param buffer The buffer from which to parse. 320 * @param offset The offset into the buffer from which to parse. 321 * @return a parsed sparse struct 322 */ 323 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 324 final long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN); 325 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN); 326 327 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 328 } 329 330 /** 331 * @since 1.21 332 */ 333 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) 334 throws IOException { 335 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 336 for (int i = 0; i < entries; i++) { 337 try { 338 final TarArchiveStructSparse sparseHeader = 339 parseSparse(buffer, offset + i * (SPARSE_OFFSET_LEN + SPARSE_NUMBYTES_LEN)); 340 341 if (sparseHeader.getOffset() < 0) { 342 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 343 } 344 if (sparseHeader.getNumbytes() < 0) { 345 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 346 } 347 sparseHeaders.add(sparseHeader); 348 } catch (IllegalArgumentException ex) { 349 // thrown internally by parseOctalOrBinary 350 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 351 } 352 } 353 return Collections.unmodifiableList(sparseHeaders); 354 } 355 356 /** 357 * Copy a name into a buffer. 358 * Copies characters from the name into the buffer 359 * starting at the specified offset. 360 * If the buffer is longer than the name, the buffer 361 * is filled with trailing NULs. 362 * If the name is longer than the buffer, 363 * the output is truncated. 364 * 365 * @param name The header name from which to copy the characters. 366 * @param buf The buffer where the name is to be stored. 367 * @param offset The starting offset into the buffer 368 * @param length The maximum number of header bytes to copy. 369 * @return The updated offset, i.e. offset + length 370 */ 371 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 372 try { 373 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 374 } catch (final IOException ex) { // NOSONAR 375 try { 376 return formatNameBytes(name, buf, offset, length, 377 FALLBACK_ENCODING); 378 } catch (final IOException ex2) { 379 // impossible 380 throw new RuntimeException(ex2); //NOSONAR 381 } 382 } 383 } 384 385 /** 386 * Copy a name into a buffer. 387 * Copies characters from the name into the buffer 388 * starting at the specified offset. 389 * If the buffer is longer than the name, the buffer 390 * is filled with trailing NULs. 391 * If the name is longer than the buffer, 392 * the output is truncated. 393 * 394 * @param name The header name from which to copy the characters. 395 * @param buf The buffer where the name is to be stored. 396 * @param offset The starting offset into the buffer 397 * @param length The maximum number of header bytes to copy. 398 * @param encoding name of the encoding to use for file names 399 * @since 1.4 400 * @return The updated offset, i.e. offset + length 401 * @throws IOException on error 402 */ 403 public static int formatNameBytes(final String name, final byte[] buf, final int offset, 404 final int length, 405 final ZipEncoding encoding) 406 throws IOException { 407 int len = name.length(); 408 ByteBuffer b = encoding.encode(name); 409 while (b.limit() > length && len > 0) { 410 b = encoding.encode(name.substring(0, --len)); 411 } 412 final int limit = b.limit() - b.position(); 413 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 414 415 // Pad any remaining output bytes with NUL 416 for (int i = limit; i < length; ++i) { 417 buf[offset + i] = 0; 418 } 419 420 return offset + length; 421 } 422 423 /** 424 * Fill buffer with unsigned octal number, padded with leading zeroes. 425 * 426 * @param value number to convert to octal - treated as unsigned 427 * @param buffer destination buffer 428 * @param offset starting offset in buffer 429 * @param length length of buffer to fill 430 * @throws IllegalArgumentException if the value will not fit in the buffer 431 */ 432 public static void formatUnsignedOctalString(final long value, final byte[] buffer, 433 final int offset, final int length) { 434 int remaining = length; 435 remaining--; 436 if (value == 0) { 437 buffer[offset + remaining--] = (byte) '0'; 438 } else { 439 long val = value; 440 for (; remaining >= 0 && val != 0; --remaining) { 441 // CheckStyle:MagicNumber OFF 442 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 443 val = val >>> 3; 444 // CheckStyle:MagicNumber ON 445 } 446 if (val != 0){ 447 throw new IllegalArgumentException 448 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 449 } 450 } 451 452 for (; remaining >= 0; --remaining) { // leading zeros 453 buffer[offset + remaining] = (byte) '0'; 454 } 455 } 456 457 /** 458 * Write an octal integer into a buffer. 459 * 460 * Uses {@link #formatUnsignedOctalString} to format 461 * the value as an octal string with leading zeros. 462 * The converted number is followed by space and NUL 463 * 464 * @param value The value to write 465 * @param buf The buffer to receive the output 466 * @param offset The starting offset into the buffer 467 * @param length The size of the output buffer 468 * @return The updated offset, i.e offset+length 469 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 470 */ 471 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 472 473 int idx=length-2; // For space and trailing null 474 formatUnsignedOctalString(value, buf, offset, idx); 475 476 buf[offset + idx++] = (byte) ' '; // Trailing space 477 buf[offset + idx] = 0; // Trailing null 478 479 return offset + length; 480 } 481 482 /** 483 * Write an octal long integer into a buffer. 484 * 485 * Uses {@link #formatUnsignedOctalString} to format 486 * the value as an octal string with leading zeros. 487 * The converted number is followed by a space. 488 * 489 * @param value The value to write as octal 490 * @param buf The destinationbuffer. 491 * @param offset The starting offset into the buffer. 492 * @param length The length of the buffer 493 * @return The updated offset 494 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 495 */ 496 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 497 498 final int idx=length-1; // For space 499 500 formatUnsignedOctalString(value, buf, offset, idx); 501 buf[offset + idx] = (byte) ' '; // Trailing space 502 503 return offset + length; 504 } 505 506 /** 507 * Write an long integer into a buffer as an octal string if this 508 * will fit, or as a binary number otherwise. 509 * 510 * Uses {@link #formatUnsignedOctalString} to format 511 * the value as an octal string with leading zeros. 512 * The converted number is followed by a space. 513 * 514 * @param value The value to write into the buffer. 515 * @param buf The destination buffer. 516 * @param offset The starting offset into the buffer. 517 * @param length The length of the buffer. 518 * @return The updated offset. 519 * @throws IllegalArgumentException if the value (and trailer) 520 * will not fit in the buffer. 521 * @since 1.4 522 */ 523 public static int formatLongOctalOrBinaryBytes( 524 final long value, final byte[] buf, final int offset, final int length) { 525 526 // Check whether we are dealing with UID/GID or SIZE field 527 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 528 529 final boolean negative = value < 0; 530 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 531 return formatLongOctalBytes(value, buf, offset, length); 532 } 533 534 if (length < 9) { 535 formatLongBinary(value, buf, offset, length, negative); 536 } else { 537 formatBigIntegerBinary(value, buf, offset, length, negative); 538 } 539 540 buf[offset] = (byte) (negative ? 0xff : 0x80); 541 return offset + length; 542 } 543 544 private static void formatLongBinary(final long value, final byte[] buf, 545 final int offset, final int length, 546 final boolean negative) { 547 final int bits = (length - 1) * 8; 548 final long max = 1L << bits; 549 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 550 if (val < 0 || val >= max) { 551 throw new IllegalArgumentException("Value " + value + 552 " is too large for " + length + " byte field."); 553 } 554 if (negative) { 555 val ^= max - 1; 556 val++; 557 val |= 0xffL << bits; 558 } 559 for (int i = offset + length - 1; i >= offset; i--) { 560 buf[i] = (byte) val; 561 val >>= 8; 562 } 563 } 564 565 private static void formatBigIntegerBinary(final long value, final byte[] buf, 566 final int offset, 567 final int length, 568 final boolean negative) { 569 final BigInteger val = BigInteger.valueOf(value); 570 final byte[] b = val.toByteArray(); 571 final int len = b.length; 572 if (len > length - 1) { 573 throw new IllegalArgumentException("Value " + value + 574 " is too large for " + length + " byte field."); 575 } 576 final int off = offset + length - len; 577 System.arraycopy(b, 0, buf, off, len); 578 final byte fill = (byte) (negative ? 0xff : 0); 579 for (int i = offset + 1; i < off; i++) { 580 buf[i] = fill; 581 } 582 } 583 584 /** 585 * Writes an octal value into a buffer. 586 * 587 * Uses {@link #formatUnsignedOctalString} to format 588 * the value as an octal string with leading zeros. 589 * The converted number is followed by NUL and then space. 590 * 591 * @param value The value to convert 592 * @param buf The destination buffer 593 * @param offset The starting offset into the buffer. 594 * @param length The size of the buffer. 595 * @return The updated value of offset, i.e. offset+length 596 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 597 */ 598 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 599 600 int idx=length-2; // for NUL and space 601 formatUnsignedOctalString(value, buf, offset, idx); 602 603 buf[offset + idx++] = 0; // Trailing null 604 buf[offset + idx] = (byte) ' '; // Trailing space 605 606 return offset + length; 607 } 608 609 /** 610 * Compute the checksum of a tar entry header. 611 * 612 * @param buf The tar entry's header buffer. 613 * @return The computed checksum. 614 */ 615 public static long computeCheckSum(final byte[] buf) { 616 long sum = 0; 617 618 for (final byte element : buf) { 619 sum += BYTE_MASK & element; 620 } 621 622 return sum; 623 } 624 625 /** 626 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>: 627 * <blockquote> 628 * The checksum is calculated by taking the sum of the unsigned byte values 629 * of the header block with the eight checksum bytes taken to be ascii 630 * spaces (decimal value 32). It is stored as a six digit octal number with 631 * leading zeroes followed by a NUL and then a space. Various 632 * implementations do not adhere to this format. For better compatibility, 633 * ignore leading and trailing whitespace, and get the first six digits. In 634 * addition, some historic tar implementations treated bytes as signed. 635 * Implementations typically calculate the checksum both ways, and treat it 636 * as good if either the signed or unsigned sum matches the included 637 * checksum. 638 * </blockquote> 639 * <p> 640 * The return value of this method should be treated as a best-effort 641 * heuristic rather than an absolute and final truth. The checksum 642 * verification logic may well evolve over time as more special cases 643 * are encountered. 644 * 645 * @param header tar header 646 * @return whether the checksum is reasonably good 647 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 648 * @since 1.5 649 */ 650 public static boolean verifyCheckSum(final byte[] header) { 651 final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN); 652 long unsignedSum = 0; 653 long signedSum = 0; 654 655 for (int i = 0; i < header.length; i++) { 656 byte b = header[i]; 657 if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { 658 b = ' '; 659 } 660 unsignedSum += 0xff & b; 661 signedSum += b; 662 } 663 return storedSum == unsignedSum || storedSum == signedSum; 664 } 665 666 /** 667 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 668 * may appear multi times, and they look like: 669 * 670 * GNU.sparse.size=size 671 * GNU.sparse.numblocks=numblocks 672 * repeat numblocks times 673 * GNU.sparse.offset=offset 674 * GNU.sparse.numbytes=numbytes 675 * end repeat 676 * 677 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 678 * 679 * GNU.sparse.map 680 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 681 * 682 * @param inputStream input stream to read keys and values 683 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 684 * the sparse headers need to be stored in an array, not a map 685 * @param globalPaxHeaders global PAX headers of the tar archive 686 * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry. 687 * @throws IOException if an I/O error occurs. 688 * @deprecated use the four-arg version instead 689 */ 690 @Deprecated 691 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders) 692 throws IOException { 693 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 694 } 695 696 /** 697 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 698 * may appear multi times, and they look like: 699 * 700 * GNU.sparse.size=size 701 * GNU.sparse.numblocks=numblocks 702 * repeat numblocks times 703 * GNU.sparse.offset=offset 704 * GNU.sparse.numbytes=numbytes 705 * end repeat 706 * 707 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 708 * 709 * GNU.sparse.map 710 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 711 * 712 * @param inputStream input stream to read keys and values 713 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 714 * the sparse headers need to be stored in an array, not a map 715 * @param globalPaxHeaders global PAX headers of the tar archive 716 * @param headerSize total size of the PAX header, will be ignored if negative 717 * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry. 718 * @throws IOException if an I/O error occurs. 719 * @since 1.21 720 */ 721 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, 722 final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders, 723 final long headerSize) throws IOException { 724 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 725 Long offset = null; 726 // Format is "length keyword=value\n"; 727 int totalRead = 0; 728 while(true) { // get length 729 int ch; 730 int len = 0; 731 int read = 0; 732 while((ch = inputStream.read()) != -1) { 733 read++; 734 totalRead++; 735 if (ch == '\n') { // blank line in header 736 break; 737 } 738 if (ch == ' '){ // End of length string 739 // Get keyword 740 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 741 while((ch = inputStream.read()) != -1) { 742 read++; 743 totalRead++; 744 if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) { 745 break; 746 } 747 if (ch == '='){ // end of keyword 748 final String keyword = coll.toString(CharsetNames.UTF_8); 749 // Get rest of entry 750 final int restLen = len - read; 751 if (restLen <= 1) { // only NL 752 headers.remove(keyword); 753 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 754 throw new IOException("Paxheader value size " + restLen 755 + " exceeds size of header record"); 756 } else { 757 final byte[] rest = IOUtils.readRange(inputStream, restLen); 758 final int got = rest.length; 759 if (got != restLen) { 760 throw new IOException("Failed to read " 761 + "Paxheader. Expected " 762 + restLen 763 + " bytes, read " 764 + got); 765 } 766 totalRead += restLen; 767 // Drop trailing NL 768 if (rest[restLen - 1] != '\n') { 769 throw new IOException("Failed to read Paxheader." 770 + "Value should end with a newline"); 771 } 772 final String value = new String(rest, 0, 773 restLen - 1, StandardCharsets.UTF_8); 774 headers.put(keyword, value); 775 776 // for 0.0 PAX Headers 777 if (keyword.equals("GNU.sparse.offset")) { 778 if (offset != null) { 779 // previous GNU.sparse.offset header but but no numBytes 780 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 781 } 782 try { 783 offset = Long.valueOf(value); 784 } catch (NumberFormatException ex) { 785 throw new IOException("Failed to read Paxheader." 786 + "GNU.sparse.offset contains a non-numeric value"); 787 } 788 if (offset < 0) { 789 throw new IOException("Failed to read Paxheader." 790 + "GNU.sparse.offset contains negative value"); 791 } 792 } 793 794 // for 0.0 PAX Headers 795 if (keyword.equals("GNU.sparse.numbytes")) { 796 if (offset == null) { 797 throw new IOException("Failed to read Paxheader." + 798 "GNU.sparse.offset is expected before GNU.sparse.numbytes shows up."); 799 } 800 long numbytes; 801 try { 802 numbytes = Long.parseLong(value); 803 } catch (NumberFormatException ex) { 804 throw new IOException("Failed to read Paxheader." 805 + "GNU.sparse.numbytes contains a non-numeric value."); 806 } 807 if (numbytes < 0) { 808 throw new IOException("Failed to read Paxheader." 809 + "GNU.sparse.numbytes contains negative value"); 810 } 811 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 812 offset = null; 813 } 814 } 815 break; 816 } 817 coll.write((byte) ch); 818 } 819 break; // Processed single header 820 } 821 822 // COMPRESS-530 : throw if we encounter a non-number while reading length 823 if (ch < '0' || ch > '9') { 824 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 825 } 826 827 len *= 10; 828 len += ch - '0'; 829 } 830 if (ch == -1){ // EOF 831 break; 832 } 833 } 834 if (offset != null) { 835 // offset but no numBytes 836 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 837 } 838 return headers; 839 } 840 841 /** 842 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 843 * GNU.sparse.map 844 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 845 * 846 * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You 847 * should use {@link #parseFromPAX01SparseHeaders} directly instead. 848 * 849 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 850 * @return sparse headers parsed from sparse map 851 * @deprecated use #parseFromPAX01SparseHeaders instead 852 */ 853 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(String sparseMap) { 854 try { 855 return parseFromPAX01SparseHeaders(sparseMap); 856 } catch (IOException ex) { 857 throw new RuntimeException(ex.getMessage(), ex); 858 } 859 } 860 861 /** 862 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 863 * GNU.sparse.map 864 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 865 * 866 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 867 * @return unmodifiable list of sparse headers parsed from sparse map 868 * @throws IOException Corrupted TAR archive. 869 * @since 1.21 870 */ 871 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(String sparseMap) 872 throws IOException { 873 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 874 String[] sparseHeaderStrings = sparseMap.split(","); 875 if (sparseHeaderStrings.length % 2 == 1) { 876 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 877 } 878 879 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 880 long sparseOffset; 881 try { 882 sparseOffset = Long.parseLong(sparseHeaderStrings[i]); 883 } catch (NumberFormatException ex) { 884 throw new IOException("Corrupted TAR archive." 885 + " Sparse struct offset contains a non-numeric value"); 886 } 887 if (sparseOffset < 0) { 888 throw new IOException("Corrupted TAR archive." 889 + " Sparse struct offset contains negative value"); 890 } 891 long sparseNumbytes; 892 try { 893 sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]); 894 } catch (NumberFormatException ex) { 895 throw new IOException("Corrupted TAR archive." 896 + " Sparse struct numbytes contains a non-numeric value"); 897 } 898 if (sparseNumbytes < 0) { 899 throw new IOException("Corrupted TAR archive." 900 + " Sparse struct numbytes contains negative value"); 901 } 902 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 903 } 904 905 return Collections.unmodifiableList(sparseHeaders); 906 } 907 908 /** 909 * For PAX Format 1.X: 910 * The sparse map itself is stored in the file data block, preceding the actual file data. 911 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 912 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 913 * giving the offset and size of the data block it describes. 914 * @param inputStream parsing source. 915 * @param recordSize The size the TAR header 916 * @return sparse headers 917 * @throws IOException if an I/O error occurs. 918 */ 919 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 920 // for 1.X PAX Headers 921 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 922 long bytesRead = 0; 923 924 long[] readResult = readLineOfNumberForPax1X(inputStream); 925 long sparseHeadersCount = readResult[0]; 926 if (sparseHeadersCount < 0) { 927 // overflow while reading number? 928 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 929 } 930 bytesRead += readResult[1]; 931 while (sparseHeadersCount-- > 0) { 932 readResult = readLineOfNumberForPax1X(inputStream); 933 final long sparseOffset = readResult[0]; 934 if (sparseOffset < 0) { 935 throw new IOException("Corrupted TAR archive." 936 + " Sparse header block offset contains negative value"); 937 } 938 bytesRead += readResult[1]; 939 940 readResult = readLineOfNumberForPax1X(inputStream); 941 final long sparseNumbytes = readResult[0]; 942 if (sparseNumbytes < 0) { 943 throw new IOException("Corrupted TAR archive." 944 + " Sparse header block numbytes contains negative value"); 945 } 946 bytesRead += readResult[1]; 947 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 948 } 949 950 // skip the rest of this record data 951 long bytesToSkip = recordSize - bytesRead % recordSize; 952 IOUtils.skip(inputStream, bytesToSkip); 953 return sparseHeaders; 954 } 955 956 /** 957 * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. 958 * It consists of a series of decimal numbers delimited by newlines. 959 * 960 * @param inputStream the input stream of the tar file 961 * @return the decimal number delimited by '\n', and the bytes read from input stream 962 * @throws IOException 963 */ 964 private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { 965 int number; 966 long result = 0; 967 long bytesRead = 0; 968 969 while ((number = inputStream.read()) != '\n') { 970 bytesRead += 1; 971 if (number == -1) { 972 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 973 } 974 if (number < '0' || number > '9') { 975 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 976 } 977 result = result * 10 + (number - '0'); 978 } 979 bytesRead += 1; 980 981 return new long[]{result, bytesRead}; 982 } 983 984}