From 7e97d53c8728fe69f1dbc3b57b945e759bd7d08f Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 20 Nov 2024 22:14:49 +0100 Subject: [PATCH 1/9] Add first prototype CLI command --- .../bin/import/blueprint-import-wxr.json | 18 +++++++ .../data-liberation/bin/import/import-wxr.sh | 48 +++++++++++++++++++ .../playground/data-liberation/plugin.php | 46 ++++++------------ .../data-liberation/src/functions.php | 20 ++++++++ .../data-liberation/tests/import/run.sh | 2 +- 5 files changed, 102 insertions(+), 32 deletions(-) create mode 100644 packages/playground/data-liberation/bin/import/blueprint-import-wxr.json create mode 100644 packages/playground/data-liberation/bin/import/import-wxr.sh diff --git a/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json new file mode 100644 index 0000000000..55ab107921 --- /dev/null +++ b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json @@ -0,0 +1,18 @@ +{ + "$schema": "../../../blueprints/public/blueprint-schema.json", + "constants": { + "WP_DEBUG": true, + "WP_DEBUG_LOG": true + }, + "login": true, + "steps": [ + { + "step": "activatePlugin", + "pluginPath": "data-liberation/plugin.php" + }, + { + "step": "runPHP", + "code": "files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};" + } + ] +} diff --git a/packages/playground/data-liberation/bin/import/import-wxr.sh b/packages/playground/data-liberation/bin/import/import-wxr.sh new file mode 100644 index 0000000000..fece425b68 --- /dev/null +++ b/packages/playground/data-liberation/bin/import/import-wxr.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# A script that accepts a WXR XML file and imports it into a WordPress site +# +# Usage: +# ./import-wxr.sh +# + +# Display help message +show_help() { + echo "Usage: $0 [-h|--help] " + echo "Options:" + echo " -h, --help Show this help message" +} + +# Check if no arguments were provided. If so, display help message +if [ $# -eq 0 ]; then + show_help + exit 1 +fi + +# Parse command line arguments. If an invalid argument is provided, display help message +while [[ "$1" =~ ^- && ! "$1" == "--" ]]; do case $1 in + -h | --help ) + show_help + exit 0 + ;; +esac; shift; done +if [[ "$1" == '--' ]]; then shift; fi + +# Check if filename is provided. If not, display error message. +if [ -z "$1" ]; then + echo "Error: No folder provided" + show_help + exit 1 +fi + +# Check if the file exists +if [ -d "$1" ]; then + bun ../../../cli/src/cli.ts \ + server \ + --mount=../../:/wordpress/wp-content/plugins/data-liberation \ + --mount=$1:/wordpress/wp-content/uploads/import-wxr \ + --blueprint=./blueprint-import-wxr.json +else + echo "Error: File '$1' does not exist" + exit 1 +fi diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index 3a0f15f7a4..5f383b4a69 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -25,39 +25,23 @@ return []; }); -/** - * Development debug code to run the import manually. - * @TODO: Remove this in favor of a CLI command. - */ add_action('init', function() { - return; - $wxr_path = __DIR__ . '/tests/fixtures/wxr-simple.xml'; - $importer = WP_Stream_Importer::create_for_wxr_file( - $wxr_path - ); - while($importer->next_step()) { - // ... + if ( defined( 'WP_CLI' ) && WP_CLI ) { + /** + * Import a WXR file. + * + * + * : The WXR file to import. + */ + $command = function ( $args, $assoc_args ) { + $file = $args[0]; + data_liberation_import( $file ); + }; + + // Register the WP-CLI import command. + // Example usage: wp data-liberation /path/to/file.xml + WP_CLI::add_command( 'data-liberation', $command ); } - return; - $importer->next_step(); - $paused_importer_state = $importer->get_reentrancy_cursor(); - - echo "\n\n"; - echo "moving to importer2\n"; - echo "\n\n"; - - $importer2 = WP_Stream_Importer::create_for_wxr_file( - $wxr_path, - array(), - $paused_importer_state - ); - $importer2->next_step(); - $importer2->next_step(); - $importer2->next_step(); - // $importer2->next_step(); - // var_dump($importer2); - - die("YAY"); }); // Register admin menu diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index 0c7cfec22f..1bbd4d6aca 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -191,3 +191,23 @@ function wp_visit_file_tree( $dir ) { new SplFileInfo( $dir ) ); } + +/** + * Import a WXR file. Used in the CLI. + * + * @param string $file The path to the WXR file. + * @return void + */ +function data_liberation_import( $file ) { + $entity_iterator_factory = function () use ( $file ) { + $wxr = new WP_WXR_Reader(); + $wxr->connect_upstream( new WP_File_Reader( $file ) ); + + return $wxr; + }; + + $importer = WP_Stream_Importer::create( $entity_iterator_factory ); + + $importer->frontload_assets(); + $importer->import_entities(); +} diff --git a/packages/playground/data-liberation/tests/import/run.sh b/packages/playground/data-liberation/tests/import/run.sh index 46bf1f196a..92190c011c 100644 --- a/packages/playground/data-liberation/tests/import/run.sh +++ b/packages/playground/data-liberation/tests/import/run.sh @@ -4,4 +4,4 @@ bun ../../../cli/src/cli.ts \ server \ --mount=../../:/wordpress/wp-content/plugins/data-liberation \ --mount=../../../../docs:/wordpress/wp-content/docs \ - --blueprint=/Users/cloudnik/www/Automattic/core/plugins/playground/packages/playground/data-liberation/tests/import/blueprint-import.json + --blueprint=./blueprint-import.json From 92c830e0881a29ebd4229b814f61b11f4106d7c4 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 20 Nov 2024 22:45:12 +0100 Subject: [PATCH 2/9] Fix: typo --- .../playground/data-liberation/bin/import/import-wxr.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/playground/data-liberation/bin/import/import-wxr.sh b/packages/playground/data-liberation/bin/import/import-wxr.sh index fece425b68..49cedceebd 100644 --- a/packages/playground/data-liberation/bin/import/import-wxr.sh +++ b/packages/playground/data-liberation/bin/import/import-wxr.sh @@ -1,14 +1,14 @@ #!/bin/bash # -# A script that accepts a WXR XML file and imports it into a WordPress site +# A script that accepts a folder and imports all WXR files into a WordPress site # # Usage: -# ./import-wxr.sh +# ./import-wxr.sh # # Display help message show_help() { - echo "Usage: $0 [-h|--help] " + echo "Usage: $0 [-h|--help] " echo "Options:" echo " -h, --help Show this help message" } @@ -43,6 +43,6 @@ if [ -d "$1" ]; then --mount=$1:/wordpress/wp-content/uploads/import-wxr \ --blueprint=./blueprint-import-wxr.json else - echo "Error: File '$1' does not exist" + echo "Error: Folder '$1' does not exist" exit 1 fi From fab4f2f0bccb39979e87be87c29fc0d3b0b8438e Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 22 Nov 2024 13:25:03 +0100 Subject: [PATCH 3/9] Add re-entrancy semantics support --- .../data-liberation/src/functions.php | 29 ++++++++++++------- .../src/import/WP_Stream_Importer.php | 9 ++++++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index 1bbd4d6aca..adf6e97a4c 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -193,21 +193,28 @@ function wp_visit_file_tree( $dir ) { } /** - * Import a WXR file. Used in the CLI. + * Import a WXR file. Used by the CLI. * - * @param string $file The path to the WXR file. + * @param string $path The path to the WXR file. * @return void */ -function data_liberation_import( $file ) { - $entity_iterator_factory = function () use ( $file ) { - $wxr = new WP_WXR_Reader(); - $wxr->connect_upstream( new WP_File_Reader( $file ) ); +function data_liberation_import( $path ) { + $importer = WP_Stream_Importer::create_for_wxr_file( $path ); + $is_wp_cli = defined( 'WP_CLI' ) && WP_CLI; - return $wxr; - }; + if ( $is_wp_cli ) { + WP_CLI::line( "Importing from {$path}" ); + } - $importer = WP_Stream_Importer::create( $entity_iterator_factory ); + while ( $importer->next_step() ) { + // Output the current stage if running in WP-CLI. + if ( $is_wp_cli ) { + $current_stage = $importer->get_current_stage(); + WP_CLI::line( "Import: stage {$current_stage}" ); + } + } - $importer->frontload_assets(); - $importer->import_entities(); + if ( $is_wp_cli ) { + WP_CLI::success( 'Import ended' ); + } } diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index cc4ea76c30..fab2ab1b4e 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -185,6 +185,15 @@ public function next_step() { } } + /** + * Get the current stage. + * + * @return string + */ + public function get_current_stage() { + return $this->stage; + } + /** * Advance the cursor to the oldest finished download. For example: * From a919beddbc495a19fcafb26ae3743829a7dfd80a Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 22 Nov 2024 15:54:48 +0100 Subject: [PATCH 4/9] Fix: missing require_once --- packages/playground/data-liberation/bootstrap.php | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index b99b02fd31..eb6d45c096 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -52,6 +52,7 @@ require_once __DIR__ . '/src/import/WP_File_Visitor_Event.php'; require_once __DIR__ . '/src/import/WP_Imported_Entity.php'; require_once __DIR__ . '/src/import/WP_Attachment_Downloader.php'; +require_once __DIR__ . '/src/import/WP_Attachment_Downloader_Event.php'; require_once __DIR__ . '/src/import/WP_Stream_Importer.php'; require_once __DIR__ . '/src/import/WP_Markdown_Importer.php'; From d6b24a48f628c2a9ac2bd4d05d9239cc680ac0a5 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 22 Nov 2024 15:55:05 +0100 Subject: [PATCH 5/9] Fix: wrong method name --- .../data-liberation/src/import/WP_Stream_Importer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index fab2ab1b4e..a1d625386d 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -409,7 +409,7 @@ private function enqueue_attachment_download( string $raw_url, $context_path = n $enqueued = $this->downloader->enqueue_if_not_exists( $url, $output_path ); if ( $enqueued ) { - $resource_id = $this->downloader->get_last_enqueued_resource_id(); + $resource_id = $this->downloader->get_enqueued_resource_id(); $entity_cursor = $this->entity_iterator->get_reentrancy_cursor(); $this->active_downloads[ $entity_cursor ][ $resource_id ] = true; } From d6c9019ad77fd7549fec28689eb836cd4755e90c Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 22 Nov 2024 15:55:16 +0100 Subject: [PATCH 6/9] Fix: endless loop --- .../src/import/WP_Attachment_Downloader.php | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php b/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php index a6be3e74f0..131c568a90 100644 --- a/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php +++ b/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php @@ -26,7 +26,7 @@ public function has_pending_requests() { public function enqueue_if_not_exists( $url, $output_path ) { $this->enqueued_resource_id = null; - $output_path = $this->output_root . '/' . ltrim( $output_path, '/' ); + $output_path = ltrim( $output_path, '/' ); if ( file_exists( $output_path ) ) { // @TODO: Reconsider the return value. The enqueuing operation failed, // but overall already having a file seems like a success. @@ -99,13 +99,14 @@ public function poll() { if ( ! $this->client->await_next_event() ) { return false; } - $event = $this->client->get_event(); - $request = $this->client->get_request(); - // The request object we get from the client may be a redirect. - // Let's keep referring to the original request. - $original_request_id = $request->original_request()->id; - while ( true ) { + do { + $event = $this->client->get_event(); + $request = $this->client->get_request(); + // The request object we get from the client may be a redirect. + // Let's keep referring to the original request. + $original_request_id = $this->client->get_request()->original_request()->id; + switch ( $event ) { case Client::EVENT_GOT_HEADERS: if ( ! $request->is_redirected() ) { @@ -129,7 +130,7 @@ public function poll() { fclose( $this->fps[ $original_request_id ] ); } if ( isset( $this->output_paths[ $original_request_id ] ) ) { - $partial_file = $this->output_root . '/' . $this->output_paths[ $original_request_id ] . '.partial'; + $partial_file = $this->output_paths[ $original_request_id ] . '.partial'; if ( file_exists( $partial_file ) ) { unlink( $partial_file ); } @@ -162,7 +163,7 @@ public function poll() { } break; } - } + } while ( $this->client->await_next_event() ); return true; } From 83d86263163c85306653018769328184e44dd009 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 22 Nov 2024 22:48:01 +0100 Subject: [PATCH 7/9] Addd check for WP_Stream_Importer::create_for_wxr_file error --- packages/playground/data-liberation/src/functions.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index adf6e97a4c..de37da5bac 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -198,8 +198,13 @@ function wp_visit_file_tree( $dir ) { * @param string $path The path to the WXR file. * @return void */ -function data_liberation_import( $path ) { - $importer = WP_Stream_Importer::create_for_wxr_file( $path ); +function data_liberation_import( $path ): bool { + $importer = WP_Stream_Importer::create_for_wxr_file( $path ); + + if ( ! $importer ) { + return false; + } + $is_wp_cli = defined( 'WP_CLI' ) && WP_CLI; if ( $is_wp_cli ) { @@ -217,4 +222,6 @@ function data_liberation_import( $path ) { if ( $is_wp_cli ) { WP_CLI::success( 'Import ended' ); } + + return true; } From 6e3e05ea170c4f31bf0d3637b60f3ea20058ce92 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 22 Nov 2024 23:39:47 +0100 Subject: [PATCH 8/9] Add PHPUnit to Playground --- .../playground/data-liberation/phpunit.xml | 1 + .../playground/data-liberation/project.json | 10 +++++++ .../tests/WPStreamImporterTests.php | 29 +++++++++++++++++++ .../tests/import/blueprint-import.json | 9 +++--- 4 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 packages/playground/data-liberation/tests/WPStreamImporterTests.php diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 6581bdaa36..800b55f189 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -10,6 +10,7 @@ tests/URLParserWHATWGComplianceTests.php tests/WPXMLProcessorTests.php tests/UrldecodeNTests.php + tests/WPStreamImporterTests.php diff --git a/packages/playground/data-liberation/project.json b/packages/playground/data-liberation/project.json index 815d255522..44c3e7f5a5 100644 --- a/packages/playground/data-liberation/project.json +++ b/packages/playground/data-liberation/project.json @@ -50,6 +50,16 @@ ], "parallel": false } + }, + "test:wp-phpunit": { + "executor": "nx:run-commands", + "options": { + "cwd": "packages/playground/data-liberation", + "commands": [ + "bun ../cli/src/cli.ts run-blueprint --quiet --mount=./:/wordpress/wp-content/plugins/data-liberation --blueprint=./tests/import/blueprint-import.json" + ], + "parallel": false + } } } } diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php new file mode 100644 index 0000000000..d0d32b17e4 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -0,0 +1,29 @@ +markTestSkipped( 'Test only runs in Playground' ); + } + } + + public function test_import_wxr_is_missing() { + $import = data_liberation_import( __DIR__ . '/wxr/not-a-valid-file.xml' ); + + $this->assertFalse( $import ); + } + + public function test_import_simple_wxr() { + $import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' ); + + $this->assertTrue( $import ); + } +} diff --git a/packages/playground/data-liberation/tests/import/blueprint-import.json b/packages/playground/data-liberation/tests/import/blueprint-import.json index d34478b3aa..10d5960c06 100644 --- a/packages/playground/data-liberation/tests/import/blueprint-import.json +++ b/packages/playground/data-liberation/tests/import/blueprint-import.json @@ -1,15 +1,14 @@ { "$schema": "../../../blueprints/public/blueprint-schema.json", - "constants": { - "WP_DEBUG": true, - "WP_DEBUG_DISPLAY": true, - "WP_DEBUG_LOG": true - }, "login": true, "steps": [ { "step": "activatePlugin", "pluginPath": "data-liberation/plugin.php" + }, + { + "step": "runPHP", + "code": "run($arguments);\nif ( $res !== 0 ) {\n// throw new Exception( 'PHPUnit failed' );\ntrigger_error('ciao', E_USER_ERROR);\n}\n} catch (Throwable $e) {\necho \"Error running PHPUnit: \" . $e->getMessage();\nthrow $e;\n};" } ] } From 388965b9fb65028e2f8ee5d53a6445f16f35e86c Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Sat, 23 Nov 2024 00:10:35 +0100 Subject: [PATCH 9/9] Fix: wrong check --- .../data-liberation/tests/WPStreamImporterTests.php | 6 ------ .../data-liberation/tests/import/blueprint-import.json | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index d0d32b17e4..f99113ec07 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -15,12 +15,6 @@ protected function setUp(): void { } } - public function test_import_wxr_is_missing() { - $import = data_liberation_import( __DIR__ . '/wxr/not-a-valid-file.xml' ); - - $this->assertFalse( $import ); - } - public function test_import_simple_wxr() { $import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' ); diff --git a/packages/playground/data-liberation/tests/import/blueprint-import.json b/packages/playground/data-liberation/tests/import/blueprint-import.json index 10d5960c06..5e383a2eb2 100644 --- a/packages/playground/data-liberation/tests/import/blueprint-import.json +++ b/packages/playground/data-liberation/tests/import/blueprint-import.json @@ -8,7 +8,7 @@ }, { "step": "runPHP", - "code": "run($arguments);\nif ( $res !== 0 ) {\n// throw new Exception( 'PHPUnit failed' );\ntrigger_error('ciao', E_USER_ERROR);\n}\n} catch (Throwable $e) {\necho \"Error running PHPUnit: \" . $e->getMessage();\nthrow $e;\n};" + "code": "run($arguments);\nif ( $res !== 0 ) {\ntrigger_error('PHPUnit failed', E_USER_ERROR);\n}\n} catch (Throwable $e) {\ntrigger_error('PHPUnit failed: ' . $e->getMessage(), E_USER_ERROR);\n};" } ] }