From 30326d7e8c6119d5f43607949a9082991b7f2fbb Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Sun, 24 Nov 2024 02:26:14 +0000 Subject: [PATCH 1/4] Fix: failed upload retry with test --- src/Storage/Device/Local.php | 12 +++-- src/Storage/Device/S3.php | 7 ++- tests/Storage/Device/LocalTest.php | 54 +++++++++++++++++++++++ tests/Storage/S3Base.php | 71 ++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 4 deletions(-) diff --git a/src/Storage/Device/Local.php b/src/Storage/Device/Local.php index 6819bf7f..a587a7cf 100644 --- a/src/Storage/Device/Local.php +++ b/src/Storage/Device/Local.php @@ -95,8 +95,14 @@ public function upload(string $source, string $path, int $chunk = 1, int $chunks $tmp = \dirname($path).DIRECTORY_SEPARATOR.'tmp_'.\basename($path).DIRECTORY_SEPARATOR.\basename($path).'_chunks.log'; $this->createDirectory(\dirname($tmp)); - if (! file_put_contents($tmp, "$chunk\n", FILE_APPEND)) { - throw new Exception('Can\'t write chunk log '.$tmp); + + $chunkFilePath = dirname($tmp).DIRECTORY_SEPARATOR.pathinfo($path, PATHINFO_FILENAME).'.part.'.$chunk; + + // skip writing chunk if the chunk was re-uploaded + if (! file_exists($chunkFilePath)) { + if (! file_put_contents($tmp, "$chunk\n", FILE_APPEND)) { + throw new Exception('Can\'t write chunk log '.$tmp); + } } $chunkLogs = file($tmp); @@ -106,7 +112,7 @@ public function upload(string $source, string $path, int $chunk = 1, int $chunks $chunksReceived = count(file($tmp)); - if (! \rename($source, dirname($tmp).DIRECTORY_SEPARATOR.pathinfo($path, PATHINFO_FILENAME).'.part.'.$chunk)) { + if (! \rename($source, $chunkFilePath)) { throw new Exception('Failed to write chunk '.$chunk); } diff --git a/src/Storage/Device/S3.php b/src/Storage/Device/S3.php index 7a9bfbb1..557446fa 100644 --- a/src/Storage/Device/S3.php +++ b/src/Storage/Device/S3.php @@ -318,8 +318,13 @@ public function uploadData(string $data, string $path, string $contentType, int $etag = $this->uploadPart($data, $path, $contentType, $chunk, $uploadId); $metadata['parts'] ??= []; $metadata['parts'][] = ['partNumber' => $chunk, 'etag' => $etag]; + $metadata['chunksUploaded'] ??= []; + $metadata['chunksUploaded'][$chunk] = 1; $metadata['chunks'] ??= 0; - $metadata['chunks']++; + // skip incrementing if the chunk was re-uploaded + if (! array_key_exists($chunk, $metadata['chunksUploaded'])) { + $metadata['chunks']++; + } if ($metadata['chunks'] == $chunks) { $this->completeMultipartUpload($path, $uploadId, $metadata['parts']); } diff --git a/tests/Storage/Device/LocalTest.php b/tests/Storage/Device/LocalTest.php index a2156b6a..c3d91b23 100644 --- a/tests/Storage/Device/LocalTest.php +++ b/tests/Storage/Device/LocalTest.php @@ -173,6 +173,60 @@ public function testPartUpload() return $dest; } + public function testPartUploadRetry() + { + $source = __DIR__.'/../../resources/disk-a/large_file.mp4'; + $dest = $this->object->getPath('uploaded2.mp4'); + $totalSize = \filesize($source); + // AWS S3 requires each part to be at least 5MB except for last part + $chunkSize = 5 * 1024 * 1024; + + $chunks = ceil($totalSize / $chunkSize); + + $chunk = 1; + $start = 0; + $handle = @fopen($source, 'rb'); + $op = __DIR__.'/chunkx.part'; + while ($start < $totalSize) { + $contents = fread($handle, $chunkSize); + $op = __DIR__.'/chunkx.part'; + $cc = fopen($op, 'wb'); + fwrite($cc, $contents); + fclose($cc); + $this->object->upload($op, $dest, $chunk, $chunks); + $start += strlen($contents); + $chunk++; + if ($chunk == 2) { + break; + } + fseek($handle, $start); + } + @fclose($handle); + + $chunk = 1; + $start = 0; + // retry from first to make sure duplicate chunk re-upload works without issue + $handle = @fopen($source, 'rb'); + $op = __DIR__.'/chunkx.part'; + while ($start < $totalSize) { + $contents = fread($handle, $chunkSize); + $op = __DIR__.'/chunkx.part'; + $cc = fopen($op, 'wb'); + fwrite($cc, $contents); + fclose($cc); + $this->object->upload($op, $dest, $chunk, $chunks); + $start += strlen($contents); + $chunk++; + fseek($handle, $start); + } + @fclose($handle); + + $this->assertEquals(\filesize($source), $this->object->getFileSize($dest)); + $this->assertEquals(\md5_file($source), $this->object->getFileHash($dest)); + + return $dest; + } + public function testAbort() { $source = __DIR__.'/../../resources/disk-a/large_file.mp4'; diff --git a/tests/Storage/S3Base.php b/tests/Storage/S3Base.php index 00a60f52..7bf47c53 100644 --- a/tests/Storage/S3Base.php +++ b/tests/Storage/S3Base.php @@ -295,6 +295,77 @@ public function testPartUpload() return $dest; } + public function testPartUploadRetry() + { + $source = __DIR__.'/../resources/disk-a/large_file.mp4'; + $dest = $this->object->getPath('uploaded.mp4'); + $totalSize = \filesize($source); + // AWS S3 requires each part to be at least 5MB except for last part + $chunkSize = 5 * 1024 * 1024; + + $chunks = ceil($totalSize / $chunkSize); + + $chunk = 1; + $start = 0; + + $metadata = [ + 'parts' => [], + 'chunks' => 0, + 'uploadId' => null, + 'content_type' => \mime_content_type($source), + ]; + $handle = @fopen($source, 'rb'); + $op = __DIR__.'/chunk.part'; + while ($start < $totalSize) { + $contents = fread($handle, $chunkSize); + $op = __DIR__.'/chunk.part'; + $cc = fopen($op, 'wb'); + fwrite($cc, $contents); + fclose($cc); + $etag = $this->object->upload($op, $dest, $chunk, $chunks, $metadata); + $parts[] = ['partNumber' => $chunk, 'etag' => $etag]; + $start += strlen($contents); + $chunk++; + if ($chunk == 2) { + break; + } + fseek($handle, $start); + } + @fclose($handle); + unlink($op); + + $chunk = 1; + $start = 0; + // retry from first to make sure duplicate chunk re-upload works without issue + $handle = @fopen($source, 'rb'); + $op = __DIR__.'/chunk.part'; + while ($start < $totalSize) { + $contents = fread($handle, $chunkSize); + $op = __DIR__.'/chunk.part'; + $cc = fopen($op, 'wb'); + fwrite($cc, $contents); + fclose($cc); + $etag = $this->object->upload($op, $dest, $chunk, $chunks, $metadata); + $parts[] = ['partNumber' => $chunk, 'etag' => $etag]; + $start += strlen($contents); + $chunk++; + fseek($handle, $start); + } + @fclose($handle); + unlink($op); + + $this->assertEquals(\filesize($source), $this->object->getFileSize($dest)); + + // S3 doesnt provide a method to get a proper MD5-hash of a file created using multipart upload + // https://stackoverflow.com/questions/8618218/amazon-s3-checksum + // More info on how AWS calculates ETag for multipart upload here + // https://savjee.be/2015/10/Verifying-Amazon-S3-multi-part-uploads-with-ETag-hash/ + // TODO + // $this->assertEquals(\md5_file($source), $this->object->getFileHash($dest)); + // $this->object->delete($dest); + return $dest; + } + /** * @depends testPartUpload */ From 39cf3e8393dcad4a0b35153d0f1696e0fb6e19d3 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 25 Nov 2024 07:39:53 +0000 Subject: [PATCH 2/4] fix s3 chunk upload --- src/Storage/Device/S3.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storage/Device/S3.php b/src/Storage/Device/S3.php index 557446fa..70611612 100644 --- a/src/Storage/Device/S3.php +++ b/src/Storage/Device/S3.php @@ -319,10 +319,10 @@ public function uploadData(string $data, string $path, string $contentType, int $metadata['parts'] ??= []; $metadata['parts'][] = ['partNumber' => $chunk, 'etag' => $etag]; $metadata['chunksUploaded'] ??= []; - $metadata['chunksUploaded'][$chunk] = 1; $metadata['chunks'] ??= 0; // skip incrementing if the chunk was re-uploaded if (! array_key_exists($chunk, $metadata['chunksUploaded'])) { + $metadata['chunksUploaded'][$chunk] = true; $metadata['chunks']++; } if ($metadata['chunks'] == $chunks) { From 93ce8a418b4b5461ad5b170d07c8b5e9c52a119c Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 25 Nov 2024 08:48:27 +0000 Subject: [PATCH 3/4] refactor s3 chunked upload --- src/Storage/Device/S3.php | 6 +++--- tests/Storage/S3Base.php | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Storage/Device/S3.php b/src/Storage/Device/S3.php index 70611612..26b7b7fb 100644 --- a/src/Storage/Device/S3.php +++ b/src/Storage/Device/S3.php @@ -317,7 +317,7 @@ public function uploadData(string $data, string $path, string $contentType, int $etag = $this->uploadPart($data, $path, $contentType, $chunk, $uploadId); $metadata['parts'] ??= []; - $metadata['parts'][] = ['partNumber' => $chunk, 'etag' => $etag]; + $metadata['parts'][$chunk] = $etag; $metadata['chunksUploaded'] ??= []; $metadata['chunks'] ??= 0; // skip incrementing if the chunk was re-uploaded @@ -435,8 +435,8 @@ protected function completeMultipartUpload(string $path, string $uploadId, array $uri = $path !== '' ? '/'.\str_replace(['%2F', '%3F'], ['/', '?'], \rawurlencode($path)) : '/'; $body = ''; - foreach ($parts as $part) { - $body .= "{$part['etag']}{$part['partNumber']}"; + foreach ($parts as $key => $etag) { + $body .= "{$etag}{$key}"; } $body .= ''; diff --git a/tests/Storage/S3Base.php b/tests/Storage/S3Base.php index 7bf47c53..314cfe02 100644 --- a/tests/Storage/S3Base.php +++ b/tests/Storage/S3Base.php @@ -274,8 +274,7 @@ public function testPartUpload() $cc = fopen($op, 'wb'); fwrite($cc, $contents); fclose($cc); - $etag = $this->object->upload($op, $dest, $chunk, $chunks, $metadata); - $parts[] = ['partNumber' => $chunk, 'etag' => $etag]; + $this->object->upload($op, $dest, $chunk, $chunks, $metadata); $start += strlen($contents); $chunk++; fseek($handle, $start); @@ -322,8 +321,7 @@ public function testPartUploadRetry() $cc = fopen($op, 'wb'); fwrite($cc, $contents); fclose($cc); - $etag = $this->object->upload($op, $dest, $chunk, $chunks, $metadata); - $parts[] = ['partNumber' => $chunk, 'etag' => $etag]; + $this->object->upload($op, $dest, $chunk, $chunks, $metadata); $start += strlen($contents); $chunk++; if ($chunk == 2) { @@ -345,8 +343,7 @@ public function testPartUploadRetry() $cc = fopen($op, 'wb'); fwrite($cc, $contents); fclose($cc); - $etag = $this->object->upload($op, $dest, $chunk, $chunks, $metadata); - $parts[] = ['partNumber' => $chunk, 'etag' => $etag]; + $this->object->upload($op, $dest, $chunk, $chunks, $metadata); $start += strlen($contents); $chunk++; fseek($handle, $start); From 9877787be3f82afb0685108c0294e081927b1446 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 28 Nov 2024 00:51:32 +0000 Subject: [PATCH 4/4] cleanup --- src/Storage/Device/S3.php | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Storage/Device/S3.php b/src/Storage/Device/S3.php index 26b7b7fb..c21bb9cb 100644 --- a/src/Storage/Device/S3.php +++ b/src/Storage/Device/S3.php @@ -315,16 +315,15 @@ public function uploadData(string $data, string $path, string $contentType, int $metadata['uploadId'] = $uploadId; } - $etag = $this->uploadPart($data, $path, $contentType, $chunk, $uploadId); $metadata['parts'] ??= []; - $metadata['parts'][$chunk] = $etag; - $metadata['chunksUploaded'] ??= []; $metadata['chunks'] ??= 0; + + $etag = $this->uploadPart($data, $path, $contentType, $chunk, $uploadId); // skip incrementing if the chunk was re-uploaded - if (! array_key_exists($chunk, $metadata['chunksUploaded'])) { - $metadata['chunksUploaded'][$chunk] = true; + if (! array_key_exists($chunk, $metadata['parts'])) { $metadata['chunks']++; } + $metadata['parts'][$chunk] = $etag; if ($metadata['chunks'] == $chunks) { $this->completeMultipartUpload($path, $uploadId, $metadata['parts']); }