Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data Liberation] Add WXR import CLI script #2012

Merged
merged 9 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"$schema": "../../../blueprints/public/blueprint-schema.json",
"constants": {
"WP_DEBUG": true,
"WP_DEBUG_LOG": true
},
"login": true,
"steps": [
{
"step": "activatePlugin",
"pluginPath": "data-liberation/plugin.php"
},
{
"step": "runPHP",
"code": "<?php require_once 'wordpress/wp-load.php';\n$upload_dir = wp_upload_dir();\nforeach ( wp_visit_file_tree( $upload_dir['basedir'] . '/import-wxr' ) as $event ) {\nforeach ( $event->files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};"
}
]
}
48 changes: 48 additions & 0 deletions packages/playground/data-liberation/bin/import/import-wxr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
#
# A script that accepts a folder and imports all WXR files into a WordPress site
#
# Usage:
# ./import-wxr.sh <folder-name>
#

# Display help message
show_help() {
echo "Usage: $0 [-h|--help] <folder-name>"
echo "Options:"
echo " -h, --help Show this help message"
}

# Check if no arguments were provided. If so, display help message
if [ $# -eq 0 ]; then
show_help
exit 1
fi

# Parse command line arguments. If an invalid argument is provided, display help message
while [[ "$1" =~ ^- && ! "$1" == "--" ]]; do case $1 in
-h | --help )
show_help
exit 0
;;
esac; shift; done
if [[ "$1" == '--' ]]; then shift; fi

# Check if filename is provided. If not, display error message.
if [ -z "$1" ]; then
echo "Error: No folder provided"
show_help
exit 1
fi

# Check if the file exists
if [ -d "$1" ]; then
bun ../../../cli/src/cli.ts \
server \
--mount=../../:/wordpress/wp-content/plugins/data-liberation \
--mount=$1:/wordpress/wp-content/uploads/import-wxr \
--blueprint=./blueprint-import-wxr.json
else
echo "Error: Folder '$1' does not exist"
exit 1
fi
1 change: 1 addition & 0 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
require_once __DIR__ . '/src/import/WP_File_Visitor_Event.php';
require_once __DIR__ . '/src/import/WP_Imported_Entity.php';
require_once __DIR__ . '/src/import/WP_Attachment_Downloader.php';
require_once __DIR__ . '/src/import/WP_Attachment_Downloader_Event.php';
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
require_once __DIR__ . '/src/import/WP_Markdown_Importer.php';

Expand Down
1 change: 1 addition & 0 deletions packages/playground/data-liberation/phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<file>tests/URLParserWHATWGComplianceTests.php</file>
<file>tests/WPXMLProcessorTests.php</file>
<file>tests/UrldecodeNTests.php</file>
<file>tests/WPStreamImporterTests.php</file>
</testsuite>
</testsuites>
</phpunit>
46 changes: 15 additions & 31 deletions packages/playground/data-liberation/plugin.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,23 @@
return [];
});

/**
* Development debug code to run the import manually.
* @TODO: Remove this in favor of a CLI command.
*/
add_action('init', function() {
return;
$wxr_path = __DIR__ . '/tests/fixtures/wxr-simple.xml';
$importer = WP_Stream_Importer::create_for_wxr_file(
$wxr_path
);
while($importer->next_step()) {
// ...
if ( defined( 'WP_CLI' ) && WP_CLI ) {
/**
* Import a WXR file.
*
* <file>
* : The WXR file to import.
*/
$command = function ( $args, $assoc_args ) {
$file = $args[0];
data_liberation_import( $file );
};

// Register the WP-CLI import command.
// Example usage: wp data-liberation /path/to/file.xml
WP_CLI::add_command( 'data-liberation', $command );
}
return;
$importer->next_step();
$paused_importer_state = $importer->get_reentrancy_cursor();

echo "\n\n";
echo "moving to importer2\n";
echo "\n\n";

$importer2 = WP_Stream_Importer::create_for_wxr_file(
$wxr_path,
array(),
$paused_importer_state
);
$importer2->next_step();
$importer2->next_step();
$importer2->next_step();
// $importer2->next_step();
// var_dump($importer2);

die("YAY");
});

// Register admin menu
Expand Down
10 changes: 10 additions & 0 deletions packages/playground/data-liberation/project.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@
],
"parallel": false
}
},
"test:wp-phpunit": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation",
"commands": [
"bun ../cli/src/cli.ts run-blueprint --quiet --mount=./:/wordpress/wp-content/plugins/data-liberation --blueprint=./tests/import/blueprint-import.json"
],
"parallel": false
}
}
}
}
34 changes: 34 additions & 0 deletions packages/playground/data-liberation/src/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,37 @@ function wp_visit_file_tree( $dir ) {
new SplFileInfo( $dir )
);
}

/**
* Import a WXR file. Used by the CLI.
*
* @param string $path The path to the WXR file.
* @return void
*/
function data_liberation_import( $path ): bool {
$importer = WP_Stream_Importer::create_for_wxr_file( $path );

if ( ! $importer ) {
return false;
}

$is_wp_cli = defined( 'WP_CLI' ) && WP_CLI;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point, a dedicated WP_CLI command might make sense. It would only be a thin wrapper. The website and the unit tests would use the same underlying import library with their own dedicated logging facilities.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I made this way so that a user that want to use only the plugin do not need to have WP-CLI all the times.


if ( $is_wp_cli ) {
WP_CLI::line( "Importing from {$path}" );
}

while ( $importer->next_step() ) {
// Output the current stage if running in WP-CLI.
if ( $is_wp_cli ) {
$current_stage = $importer->get_current_stage();
WP_CLI::line( "Import: stage {$current_stage}" );
}
}

if ( $is_wp_cli ) {
WP_CLI::success( 'Import ended' );
}

return true;
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public function has_pending_requests() {
public function enqueue_if_not_exists( $url, $output_path ) {
$this->enqueued_resource_id = null;

$output_path = $this->output_root . '/' . ltrim( $output_path, '/' );
$output_path = ltrim( $output_path, '/' );
if ( file_exists( $output_path ) ) {
// @TODO: Reconsider the return value. The enqueuing operation failed,
// but overall already having a file seems like a success.
Expand Down Expand Up @@ -99,13 +99,14 @@ public function poll() {
if ( ! $this->client->await_next_event() ) {
return false;
}
$event = $this->client->get_event();
$request = $this->client->get_request();
// The request object we get from the client may be a redirect.
// Let's keep referring to the original request.
$original_request_id = $request->original_request()->id;

while ( true ) {
do {
$event = $this->client->get_event();
$request = $this->client->get_request();
// The request object we get from the client may be a redirect.
// Let's keep referring to the original request.
$original_request_id = $this->client->get_request()->original_request()->id;

switch ( $event ) {
case Client::EVENT_GOT_HEADERS:
if ( ! $request->is_redirected() ) {
Expand All @@ -129,7 +130,7 @@ public function poll() {
fclose( $this->fps[ $original_request_id ] );
}
if ( isset( $this->output_paths[ $original_request_id ] ) ) {
$partial_file = $this->output_root . '/' . $this->output_paths[ $original_request_id ] . '.partial';
$partial_file = $this->output_paths[ $original_request_id ] . '.partial';
if ( file_exists( $partial_file ) ) {
unlink( $partial_file );
}
Expand Down Expand Up @@ -162,7 +163,7 @@ public function poll() {
}
break;
}
}
} while ( $this->client->await_next_event() );

return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,15 @@ public function next_step() {
}
}

/**
* Get the current stage.
*
* @return string
*/
public function get_current_stage() {
return $this->stage;
}

/**
* Advance the cursor to the oldest finished download. For example:
*
Expand Down Expand Up @@ -400,7 +409,7 @@ private function enqueue_attachment_download( string $raw_url, $context_path = n

$enqueued = $this->downloader->enqueue_if_not_exists( $url, $output_path );
if ( $enqueued ) {
$resource_id = $this->downloader->get_last_enqueued_resource_id();
$resource_id = $this->downloader->get_enqueued_resource_id();
$entity_cursor = $this->entity_iterator->get_reentrancy_cursor();
$this->active_downloads[ $entity_cursor ][ $resource_id ] = true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

use PHPUnit\Framework\TestCase;

/**
* Tests for the WPStreamImporter class.
*/
class WPStreamImporterTests extends TestCase {

protected function setUp(): void {
parent::setUp();

if ( ! isset( $_SERVER['SERVER_SOFTWARE'] ) || $_SERVER['SERVER_SOFTWARE'] !== 'PHP.wasm' ) {
$this->markTestSkipped( 'Test only runs in Playground' );
}
}

public function test_import_simple_wxr() {
$import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' );

$this->assertTrue( $import );
}
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
{
"$schema": "../../../blueprints/public/blueprint-schema.json",
"constants": {
"WP_DEBUG": true,
"WP_DEBUG_DISPLAY": true,
"WP_DEBUG_LOG": true
},
"login": true,
"steps": [
{
"step": "activatePlugin",
"pluginPath": "data-liberation/plugin.php"
},
{
"step": "runPHP",
"code": "<?php require_once 'wordpress/wp-load.php'; $base = '/wordpress/wp-content/plugins/data-liberation/';\nrequire $base . 'vendor/autoload.php';\ntry {\n$arguments = [\n'--stderr',\n'--configuration', $base . 'phpunit.xml'\n];\n$res = (new PHPUnit\\TextUI\\Application())->run($arguments);\nif ( $res !== 0 ) {\ntrigger_error('PHPUnit failed', E_USER_ERROR);\n}\n} catch (Throwable $e) {\ntrigger_error('PHPUnit failed: ' . $e->getMessage(), E_USER_ERROR);\n};"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool idea! This will suffice for starters, but here's something if you'd like to the next level. What would it take to go from this to something more like a typical CLI command, e.g. cli --blueprint=... --mount=... run vendor/bin/phpunit --configuration phpunit.xml --stderr?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea. Didn't touched the CLI in this first phase.

}
]
}
2 changes: 1 addition & 1 deletion packages/playground/data-liberation/tests/import/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ bun ../../../cli/src/cli.ts \
server \
--mount=../../:/wordpress/wp-content/plugins/data-liberation \
--mount=../../../../docs:/wordpress/wp-content/docs \
--blueprint=/Users/cloudnik/www/Automattic/core/plugins/playground/packages/playground/data-liberation/tests/import/blueprint-import.json
--blueprint=./blueprint-import.json