diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..1e058a0 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: splitbrain diff --git a/composer.json b/composer.json index a7bbac7..62a41f6 100644 --- a/composer.json +++ b/composer.json @@ -38,5 +38,12 @@ "psr-4": { "splitbrain\\PHPArchive\\": "tests" } - } + }, + + "funding": [ + { + "url": "https://github.com/sponsors/splitbrain", + "type": "github" + } + ] } diff --git a/src/Tar.php b/src/Tar.php index 463880b..cea3ed8 100644 --- a/src/Tar.php +++ b/src/Tar.php @@ -15,6 +15,7 @@ */ class Tar extends Archive { + const READ_CHUNK_SIZE = 1048576; // 1MB protected $file = ''; protected $comptype = Archive::COMPRESS_AUTO; @@ -23,6 +24,9 @@ class Tar extends Archive protected $memory = ''; protected $closed = true; protected $writeaccess = false; + protected $position = 0; + protected $contentUntil = 0; + protected $skipUntil = 0; /** * Sets the compression to use @@ -72,6 +76,7 @@ public function open($file) throw new ArchiveIOException('Could not open file for reading: '.$this->file); } $this->closed = false; + $this->position = 0; } /** @@ -118,12 +123,37 @@ public function yieldContents() continue; } - $this->skipbytes(ceil($header['size'] / 512) * 512); + $this->contentUntil = $this->position + $header['size']; + $this->skipUntil = $this->position + ceil($header['size'] / 512) * 512; + yield $this->header2fileinfo($header); + + $skip = $this->skipUntil - $this->position; + if ($skip > 0) { + $this->skipbytes($skip); + } } $this->close(); + } + /** + * Reads content of a current archive entry. + * + * Works only when iterating trough the archive using the generator returned + * by the yieldContents(). + * + * @param int $length maximum number of bytes to read + * + * @return string + */ + public function readCurrentEntry($length = PHP_INT_MAX) + { + $length = (int) min($length, $this->contentUntil - $this->position); + if ($length === 0) { + return ''; + } + return $this->readbytes($length); } /** @@ -290,16 +320,27 @@ public function addFile($file, $fileinfo = '') throw new ArchiveIOException('Could not open file for reading: ' . $file); } while (!feof($fp)) { - $data = fread($fp, 512); - $read += strlen($data); + // for performance reasons read bigger chunks at once + $data = fread($fp, self::READ_CHUNK_SIZE); if ($data === false) { break; } if ($data === '') { break; } - $packed = pack("a512", $data); - $this->writebytes($packed); + $dataLen = strlen($data); + $read += $dataLen; + // how much of data read fully fills 512-byte blocks? + $passLen = ($dataLen >> 9) << 9; + if ($passLen === $dataLen) { + // all - just write the data + $this->writebytes($data); + } else { + // directly write what fills 512-byte blocks fully + $this->writebytes(substr($data, 0, $passLen)); + // pad the reminder to 512 bytes + $this->writebytes(pack("a512", substr($data, $passLen))); + } } fclose($fp); @@ -335,8 +376,11 @@ public function addData($fileinfo, $data) $fileinfo->setSize($len); $this->writeFileHeader($fileinfo); - for ($s = 0; $s < $len; $s += 512) { - $this->writebytes(pack("a512", substr($data, $s, 512))); + // write directly everything but the last block which needs padding + $passLen = ($len >> 9) << 9; + $this->writebytes(substr($data, 0, $passLen)); + if ($passLen < $len) { + $this->writebytes(pack("a512", substr($data, $passLen, 512))); } if (is_callable($this->callback)) { @@ -439,12 +483,14 @@ public function save($file) protected function readbytes($length) { if ($this->comptype === Archive::COMPRESS_GZIP) { - return @gzread($this->fh, $length); + $ret = @gzread($this->fh, $length); } elseif ($this->comptype === Archive::COMPRESS_BZIP) { - return @bzread($this->fh, $length); + $ret = @bzread($this->fh, $length); } else { - return @fread($this->fh, $length); + $ret = @fread($this->fh, $length); } + $this->position += strlen($ret); + return $ret; } /** @@ -494,6 +540,7 @@ protected function skipbytes($bytes) } else { @fseek($this->fh, $bytes, SEEK_CUR); } + $this->position += $bytes; } /** @@ -553,8 +600,8 @@ protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $ $uid = sprintf("%6s ", decoct($uid)); $gid = sprintf("%6s ", decoct($gid)); $perm = sprintf("%6s ", decoct($perm)); - $size = sprintf("%11s ", decoct($size)); - $mtime = sprintf("%11s", decoct($mtime)); + $size = self::numberEncode($size, 12); + $mtime = self::numberEncode($mtime, 12); $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); @@ -614,8 +661,8 @@ protected function parseHeader($block) $return['perm'] = OctDec(trim($header['perm'])); $return['uid'] = OctDec(trim($header['uid'])); $return['gid'] = OctDec(trim($header['gid'])); - $return['size'] = OctDec(trim($header['size'])); - $return['mtime'] = OctDec(trim($header['mtime'])); + $return['size'] = self::numberDecode($header['size']); + $return['mtime'] = self::numberDecode($header['mtime']); $return['typeflag'] = $header['typeflag']; $return['link'] = trim($header['link']); $return['uname'] = trim($header['uname']); @@ -713,4 +760,64 @@ public function filetype($file) return Archive::COMPRESS_NONE; } + /** + * Decodes numeric values according to the + * https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions + * (basically with support for big numbers) + * + * @param string $field + * $return int + */ + static public function numberDecode($field) + { + $firstByte = ord(substr($field, 0, 1)); + if ($firstByte === 255) { + $value = -1 << (8 * strlen($field)); + $shift = 0; + for ($i = strlen($field) - 1; $i >= 0; $i--) { + $value += ord(substr($field, $i, 1)) << $shift; + $shift += 8; + } + } elseif ($firstByte === 128) { + $value = 0; + $shift = 0; + for ($i = strlen($field) - 1; $i > 0; $i--) { + $value += ord(substr($field, $i, 1)) << $shift; + $shift += 8; + } + } else { + $value = octdec(trim($field)); + } + return $value; + } + + /** + * Encodes numeric values according to the + * https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions + * (basically with support for big numbers) + * + * @param int $value + * @param int $length field length + * @return string + */ + static public function numberEncode($value, $length) + { + // old implementations leave last byte empty + // octal encoding encodes three bits per byte + $maxValue = 1 << (($length - 1) * 3); + if ($value < 0) { + // PHP already stores integers as 2's complement + $value = pack(PHP_INT_SIZE === 8 ? 'J' : 'N', (int) $value); + $encoded = str_repeat(chr(255), max(1, $length - PHP_INT_SIZE)); + $encoded .= substr($value, max(0, PHP_INT_SIZE - $length + 1)); + } elseif ($value >= $maxValue) { + $value = pack(PHP_INT_SIZE === 8 ? 'J' : 'N', (int) $value); + $encoded = chr(128) . str_repeat(chr(0), max(0, $length - PHP_INT_SIZE - 1)); + $encoded .= substr($value, max(0, PHP_INT_SIZE - $length + 1)); + } else { + $encoded = sprintf("%" . ($length - 1) . "s ", decoct($value)); + } + return $encoded; + } } + diff --git a/tests/TarTestCase.php b/tests/TarTestCase.php index 32cdeed..1a1c5e6 100644 --- a/tests/TarTestCase.php +++ b/tests/TarTestCase.php @@ -16,7 +16,7 @@ class TarTestCase extends TestCase protected $extensions = array('tar'); /** @inheritdoc */ - protected function setUp() : void + protected function setUp(): void { parent::setUp(); if (extension_loaded('zlib')) { @@ -31,7 +31,7 @@ protected function setUp() : void } /** @inheritdoc */ - protected function tearDown() : void + protected function tearDown(): void { parent::tearDown(); $this->extensions[] = null; @@ -53,7 +53,8 @@ protected function getDir() * Callback check function * @param FileInfo $fileinfo */ - public function increaseCounter($fileinfo) { + public function increaseCounter($fileinfo) + { $this->assertInstanceOf('\\splitbrain\\PHPArchive\\FileInfo', $fileinfo); $this->counter++; } @@ -560,7 +561,8 @@ public function testZeroData() /** * Add a zero byte file to a tar and extract it again */ - public function testZeroByteFile() { + public function testZeroByteFile() + { $archive = sys_get_temp_dir() . '/dwziptest' . md5(time()) . '.zip'; $extract = sys_get_temp_dir() . '/dwziptest' . md5(time() + 1); @@ -778,6 +780,118 @@ public function testSaveWithInvalidDestinationFile() $this->assertTrue(true); // succeed if no exception, yet } + public function testNumberEncodeDecode() + { + // 2^34 + 17 = 2^2 * 2^32 + 17 + $refValue = (1 << 34) + 17; + $encoded = Tar::numberEncode($refValue, 12); + $this->assertEquals(pack('CCnNN', 128, 0, 0, 1 << 2, 17), $encoded); + $decoded = Tar::numberDecode($encoded); + $this->assertEquals($refValue, $decoded); + + $encoded = Tar::numberEncode($refValue, 7); + $this->assertEquals(pack('CnN', 128, 1 << 2, 17), $encoded); + $decoded = Tar::numberDecode($encoded); + $this->assertEquals($refValue, $decoded); + + $refValue = -1234; + $encoded = Tar::numberEncode($refValue, 12); + $this->assertEquals(pack('CCnNN', 0xFF, 0xFF, 0xFFFF, 0xFFFFFFFF, -1234), $encoded); + $decoded = Tar::numberDecode($encoded); + $this->assertEquals($refValue, $decoded); + + $encoded = Tar::numberEncode($refValue, 3); + $this->assertEquals(pack('Cn', 0xFF, -1234), $encoded); + $decoded = Tar::numberDecode($encoded); + $this->assertEquals($refValue, $decoded); + } + + public function testReadCurrentEntry() + { + $tar = new Tar(); + $tar->open(__DIR__ . '/tar/test.tar'); + $out = sys_get_temp_dir() . '/dwtartest' . md5(time()); + $tar->extract($out); + + $tar = new Tar(); + $tar->open(__DIR__ . '/tar/test.tar'); + $pathsRead = array(); + foreach ($tar->yieldContents() as $i) { + $this->assertFileExists($out . '/' . $i->getPath()); + if ($i->getIsdir()) { + $this->assertEquals('', $tar->readCurrentEntry()); + } else { + $this->assertStringEqualsFile($out . '/' . $i->getPath(), $tar->readCurrentEntry()); + } + $pathsRead[] = $i->getPath(); + } + $pathsReadRef = array('tar', 'tar/testdata1.txt', 'tar/foobar', 'tar/foobar/testdata2.txt'); + $this->assertEquals($pathsReadRef, $pathsRead); + + self::RDelete($out); + } + + /** + * Create an archive, extract it, and compare file properties + */ + public function testFilePropertiesPreservation() + { + $input = glob($this->getDir() . '/../src/*'); + $archive = sys_get_temp_dir() . '/dwtartest' . md5(time()) . '.tar'; + $extract = sys_get_temp_dir() . '/dwtartest' . md5(time() + 1); + + // Create archive + $tar = new Tar(); + $tar->create($archive); + foreach ($input as $path) { + $file = basename($path); + $tar->addFile($path, $file); + } + $tar->close(); + $this->assertFileExists($archive); + + // Extract archive + $tar = new Tar(); + $tar->open($archive); + $tar->extract($extract); + $tar->close(); + + // Compare file properties + foreach ($input as $originalPath) { + $filename = basename($originalPath); + $extractedPath = $extract . '/' . $filename; + + $this->assertFileExists($extractedPath, "Extracted file should exist: $filename"); + + // Compare file sizes + $originalSize = filesize($originalPath); + $extractedSize = filesize($extractedPath); + $this->assertEquals($originalSize, $extractedSize, "File size should match for: $filename"); + + // Compare file contents + $originalContent = file_get_contents($originalPath); + $extractedContent = file_get_contents($extractedPath); + $this->assertEquals($originalContent, $extractedContent, "File content should match for: $filename"); + + // Compare modification times (allow small difference due to tar format limitations) + $originalMtime = filemtime($originalPath); + $extractedMtime = filemtime($extractedPath); + $this->assertLessThanOrEqual(1, abs($originalMtime - $extractedMtime), + "Modification time should be preserved (within 1 second) for: $filename"); + + // Compare file permissions (only on Unix-like systems) + if (DIRECTORY_SEPARATOR === '/') { + $originalPerms = fileperms($originalPath) & 0777; + $extractedPerms = fileperms($extractedPath) & 0777; + $this->assertEquals($originalPerms, $extractedPerms, + "File permissions should match for: $filename"); + } + } + + self::RDelete($extract); + unlink($archive); + } + /** * recursive rmdir()/unlink() * diff --git a/tests/ZipTestCase.php b/tests/ZipTestCase.php index 3f169d5..7d4f7a5 100644 --- a/tests/ZipTestCase.php +++ b/tests/ZipTestCase.php @@ -560,6 +560,68 @@ public function testUmlautWindows() $this->assertFileExists("$out/täst.txt"); } + /** + * Create an archive, extract it, and compare file properties + */ + public function testFilePropertiesPreservation() + { + $input = glob($this->getDir() . '/../src/*'); + $archive = sys_get_temp_dir() . '/dwtartest' . md5(time()) . '.zip'; + $extract = sys_get_temp_dir() . '/dwtartest' . md5(time() + 1); + + // Create archive + $zip = new Zip(); + $zip->create($archive); + foreach ($input as $path) { + $file = basename($path); + $zip->addFile($path, $file); + } + $zip->close(); + $this->assertFileExists($archive); + + // Extract archive + $zip = new Zip(); + $zip->open($archive); + $zip->extract($extract); + $zip->close(); + + // Compare file properties + foreach ($input as $originalPath) { + $filename = basename($originalPath); + $extractedPath = $extract . '/' . $filename; + + $this->assertFileExists($extractedPath, "Extracted file should exist: $filename"); + + // Compare file sizes + $originalSize = filesize($originalPath); + $extractedSize = filesize($extractedPath); + $this->assertEquals($originalSize, $extractedSize, "File size should match for: $filename"); + + // Compare file contents + $originalContent = file_get_contents($originalPath); + $extractedContent = file_get_contents($extractedPath); + $this->assertEquals($originalContent, $extractedContent, "File content should match for: $filename"); + + // Compare modification times (allow small difference due to tar format limitations) + $originalMtime = filemtime($originalPath); + $extractedMtime = filemtime($extractedPath); + $this->assertLessThanOrEqual(1, abs($originalMtime - $extractedMtime), + "Modification time should be preserved (within 1 second) for: $filename"); + + // Compare file permissions (only on Unix-like systems) + if (DIRECTORY_SEPARATOR === '/') { + $originalPerms = fileperms($originalPath) & 0777; + $extractedPerms = fileperms($extractedPath) & 0777; + $this->assertEquals($originalPerms, $extractedPerms, + "File permissions should match for: $filename"); + } + } + + self::RDelete($extract); + unlink($archive); + } + + /** * recursive rmdir()/unlink() *