Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data Liberation] First topological sorter draft #2030

Draft
wants to merge 50 commits into
base: trunk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
b0cbc9c
First topological sorter draft
zaerl Nov 26, 2024
fce76df
Move topological sort to separate function
zaerl Nov 26, 2024
1e3ec8a
Fix: missing importer initialization
zaerl Nov 26, 2024
987d0fa
Add categories to the sorter
zaerl Nov 26, 2024
361a40e
Add new in-place sort
zaerl Nov 27, 2024
e1baa75
Add memory-free functions
zaerl Nov 27, 2024
275c857
Replace bin script with wp-cli command
zaerl Nov 27, 2024
a25ed70
Add special cases
zaerl Nov 27, 2024
0630714
Change the sorting algorithm to qsort
zaerl Nov 28, 2024
9ba0c52
Add a TODO
zaerl Nov 28, 2024
c4295b4
Update names
zaerl Nov 29, 2024
a75ad15
Fix: change variable name
zaerl Nov 29, 2024
f3324cf
Add support for categories
zaerl Nov 29, 2024
c3afab7
Fix: remove double slashes
zaerl Dec 4, 2024
6645eb7
Add test check
zaerl Dec 4, 2024
d197de6
Add new hooks
zaerl Dec 4, 2024
e95618e
Add new topo sorting query
zaerl Dec 4, 2024
94d791c
Remove unused check
zaerl Dec 4, 2024
2acfba6
Temporary disable test
zaerl Dec 4, 2024
1e25c75
Remove debug code
zaerl Dec 4, 2024
dfc747d
Remove rebase artifacts
zaerl Dec 4, 2024
9198c57
Change to new function signature
zaerl Dec 6, 2024
49c8bcd
Add support for count
zaerl Dec 6, 2024
4b0a2ab
Add session to CLI
zaerl Dec 6, 2024
7ba5337
Add start session
zaerl Dec 6, 2024
6b7e315
Add support for sessions
zaerl Dec 9, 2024
76d883c
Add categories check
zaerl Dec 9, 2024
7927933
Fix: wrong name
zaerl Dec 9, 2024
4d612a6
Partial tests rework
zaerl Dec 9, 2024
4465eab
Add comments test
zaerl Dec 10, 2024
3ae9af2
New sorter indexing
zaerl Dec 11, 2024
5306517
Fix: missing key
zaerl Dec 11, 2024
cc151a5
Remove useless code
zaerl Dec 11, 2024
1ed8107
Remove SQLite case
zaerl Dec 11, 2024
b6a94b4
Move plugin methods outside class
zaerl Dec 11, 2024
4e2cc74
Create Playground base test class
zaerl Dec 11, 2024
d70861d
Fix: wrong keys
zaerl Dec 11, 2024
688c80d
Add core postmeta_no_cdata test
zaerl Dec 11, 2024
ea39a71
Add core importer tests
zaerl Dec 11, 2024
8db7508
Add new core importer tests
zaerl Dec 11, 2024
4932c14
Update WXR to last core importer
zaerl Dec 11, 2024
a49ebff
Add support for PHPUnit filters
zaerl Dec 11, 2024
10ecb41
Remove old test
zaerl Dec 11, 2024
e745fe9
Fix: remove debug code
zaerl Dec 11, 2024
e91b526
Fix: wrong check
zaerl Dec 11, 2024
9ca8a1d
Add new unit tests and remove old one
zaerl Dec 11, 2024
e33380c
Add support for term meta
zaerl Dec 12, 2024
24ae402
Add comment
zaerl Dec 12, 2024
416b294
Rename "elements" to "entities" to match name convention
zaerl Dec 12, 2024
b631a8a
Remove filters and actions and move mapping to WP_Entity_Importer
zaerl Dec 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
"pluginPath": "data-liberation/plugin.php"
},
{
"step": "runPHP",
"code": "<?php require_once 'wordpress/wp-load.php';\n$upload_dir = wp_upload_dir();\nforeach ( wp_visit_file_tree( $upload_dir['basedir'] . '/import-wxr' ) as $event ) {\nforeach ( $event->files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};"
"step": "wp-cli",
"command": "wp data-liberation import /wordpress/wp-content/uploads/import-wxr"
}
]
}
2 changes: 2 additions & 0 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
require_once __DIR__ . '/src/import/WP_Markdown_Importer.php';
require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php';
require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php';
require_once __DIR__ . '/src/import/WP_Logger.php';
require_once __DIR__ . '/src/import/WP_Topological_Sorter.php';

require_once __DIR__ . '/src/utf8_decoder.php';

Expand Down
1 change: 1 addition & 0 deletions packages/playground/data-liberation/phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
<file>tests/WPXMLProcessorTests.php</file>
<file>tests/UrldecodeNTests.php</file>
<file>tests/WPStreamImporterTests.php</file>
<!--<file>tests/WPTopologicalSorterTests.php</file>-->
</testsuite>
</testsuites>
</phpunit>
78 changes: 46 additions & 32 deletions packages/playground/data-liberation/plugin.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,40 +39,54 @@ function () {
}
);

add_action(
'init',
function () {
if ( defined( 'WP_CLI' ) && WP_CLI ) {
/**
* Import a WXR file.
*
* <file>
* : The WXR file to import.
*/
$command = function ( $args, $assoc_args ) {
$file = $args[0];
data_liberation_import( $file );
};

// Register the WP-CLI import command.
// Example usage: wp data-liberation /path/to/file.xml
WP_CLI::add_command( 'data-liberation', $command );
}
function data_liberation_init() {
if ( defined( 'WP_CLI' ) && WP_CLI ) {
require_once __DIR__ . '/src/cli/WP_Import_Command.php';

register_post_status(
'error',
array(
'label' => _x( 'Error', 'post' ), // Label name
'public' => false,
'exclude_from_search' => false,
'show_in_admin_all_list' => false,
'show_in_admin_status_list' => false,
// translators: %s is the number of errors
'label_count' => _n_noop( 'Error <span class="count">(%s)</span>', 'Error <span class="count">(%s)</span>' ),
)
);
// Register the WP-CLI import command.
WP_CLI::add_command( 'data-liberation', WP_Import_Command::class );
}
);

register_post_status(
'error',
array(
'label' => _x( 'Error', 'post' ), // Label name
'public' => false,
'exclude_from_search' => false,
'show_in_admin_all_list' => false,
'show_in_admin_status_list' => false,
// translators: %s is the number of errors
'label_count' => _n_noop( 'Error <span class="count">(%s)</span>', 'Error <span class="count">(%s)</span>' ),
)
);
}

add_action( 'init', 'data_liberation_init' );

function data_liberation_activate() {
// Activate the topological sorter. Create tables and options.
WP_Topological_Sorter::activate();
}

// Run when the plugin is activated.
register_activation_hook( __FILE__, 'data_liberation_activate' );

function data_liberation_deactivate() {
// Deactivate the topological sorter. Flush away all data.
WP_Topological_Sorter::deactivate();

// @TODO: Cancel any active import sessions and cleanup other data.
}

// Run when the plugin is deactivated.
register_deactivation_hook( __FILE__, 'data_liberation_deactivate' );

function data_liberation_load() {
WP_Topological_Sorter::load();
}

// Run when the plugin is loaded.
add_action( 'plugins_loaded', 'data_liberation_load' );

// Register admin menu
add_action(
Expand Down
246 changes: 246 additions & 0 deletions packages/playground/data-liberation/src/cli/WP_Import_Command.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
<?php

require_once __DIR__ . '/WP_Import_Logger.php';

/**
* Implements the `wp data-liberation` command.
*
* ## EXAMPLES
*
* # Import a WXR file.
* wp data-liberation import /path/to/file.xml
*
* # Import all files inside a folder.
* wp data-liberation import /path/to/folder
*
* # Import a WXR file from a URL.
* wp data-liberation import http://example.com/file.xml
*
* Success: Imported data.
*/
class WP_Import_Command {
/**
* @var bool $dry_run Whether to perform a dry run.
*/
private $dry_run = false;

/**
* @var WP_Stream_Importer $importer The importer instance.
*/
private $importer = null;

/**
* @var string $wxr_path The path to the WXR file.
*/
private $wxr_path = '';

/**
* @var int $count The number of items to import in one go.
*/
private $count;

/**
* @var WP_Import_Session $import_session The import session.
*/
private $import_session;

/**
* Import a WXR file.
*
* ## OPTIONS
*
* <path>
* : The path to the WXR file. Either a file, a directory or a URL.
*
* [--count=<count>]
* : The number of items to import in one go. Default is 10,000.
*
* [--dry-run]
* : Perform a dry run if set.
*
* ## EXAMPLES
*
* wp data-liberation import /path/to/file.xml
*
* @param array $args
* @param array $assoc_args
* @return void
*/
public function import( $args, $assoc_args ) {
$path = $args[0];
$this->dry_run = WP_CLI\Utils\get_flag_value( $assoc_args, 'dry-run', false );
$this->count = isset( $assoc_args['count'] ) ? (int) $assoc_args['count'] : 10000;
$options = array(
'logger' => new WP_Import_logger(),
);

if ( extension_loaded( 'pcntl' ) ) {
// Set the signal handler.
$this->register_handlers();
}

// Be sure Data Liberation is activated.
data_liberation_activate();

if ( filter_var( $path, FILTER_VALIDATE_URL ) ) {
// Import URL.
$this->import_wxr_url( $path, $options );
} elseif ( is_dir( $path ) ) {
$count = 0;
// Get all the WXR files in the directory.
foreach ( wp_visit_file_tree( $path ) as $event ) {
foreach ( $event->files as $file ) {
if ( $file->isFile() && 'xml' === pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) ) {
++$count;

// Import the WXR file.
$this->import_wxr_file( $file->getPathname(), $options );
}
}
}

if ( ! $count ) {
WP_CLI::error( WP_CLI::colorize( "No WXR files found in the %R{$path}%n directory" ) );
}
} else {
if ( ! is_file( $path ) ) {
WP_CLI::error( WP_CLI::colorize( "File not found: %R{$path}%n" ) );
}

// Import the WXR file.
$this->import_wxr_file( $path, $options );
}
}

private function start_session( $args ) {
if ( $this->dry_run ) {
WP_CLI::line( 'Dry run enabled. No session created.' );

return;
}

$active_session = WP_Import_Session::get_active();

if ( $active_session ) {
$this->import_session = $active_session;

$id = $this->import_session->get_id();
WP_CLI::line( WP_CLI::colorize( "Current session: %g{$id}%n" ) );
} else {
$this->import_session = WP_Import_Session::create( $args );

$id = $this->import_session->get_id();
WP_CLI::line( WP_CLI::colorize( "New session: %g{$id}%n" ) );
}
}

/**
* Import a WXR file.
*
* @param string $file_path The path to the WXR file.
* @return void
*/
private function import_wxr_file( $file_path, $options = array() ) {
$this->wxr_path = $file_path;

$this->start_session(
array(
'data_source' => 'wxr_file',
'file_name' => $file_path,
)
);

// Pass the session ID.
$options['session_id'] = $this->import_session->get_id();

$this->importer = WP_Stream_Importer::create_for_wxr_file( $file_path, $options );
$this->import_wxr();
}

/**
* Import a WXR file from a URL.
*
* @param string $url The URL to the WXR file.
* @return void
*/
private function import_wxr_url( $url, $options = array() ) {
$this->wxr_path = $url;

$this->start_session(
array(
'data_source' => 'wxr_url',
'file_name' => $url,
)
);

// Pass the session ID.
$options['session_id'] = $this->import_session->get_id();

$this->importer = WP_Stream_Importer::create_for_wxr_url( $url, $options );
$this->import_wxr();
}

/**
* Import the WXR file.
*/
private function import_wxr() {
if ( ! $this->importer ) {
WP_CLI::error( 'Could not create importer' );
}

if ( ! $this->import_session ) {
WP_CLI::error( 'Could not create session' );
}

WP_CLI::line( "Importing {$this->wxr_path}" );

if ( $this->dry_run ) {
// @TODO: do something with the dry run.
WP_CLI::line( 'Dry run enabled.' );
} else {
do {
$current_stage = $this->importer->get_stage();
WP_CLI::line( WP_CLI::colorize( "Stage %g{$current_stage}%n" ) );
$step_count = 0;

while ( $this->importer->next_step( $this->count ) ) {
++$step_count;
WP_CLI::line( WP_CLI::colorize( "Step %g{$step_count}%n" ) );
}
} while ( $this->importer->advance_to_next_stage() );
}

WP_CLI::success( 'Import finished' );
}

/**
* Callback function registered to `pcntl_signal` to handle signals.
*
* @param int $signal The signal number.
* @return void
*/
protected function signal_handler( $signal ) {
switch ( $signal ) {
case SIGINT:
WP_CLI::line( 'Received SIGINT signal' );
exit( 0 );

case SIGTERM:
WP_CLI::line( 'Received SIGTERM signal' );
exit( 0 );
}
}

/**
* Register signal handlers for the command.
*
* @return void
*/
private function register_handlers() {
// Handle the Ctrl + C signal to terminate the program.
pcntl_signal( SIGINT, array( $this, 'signal_handler' ) );

// Handle the `kill` command to terminate the program.
pcntl_signal( SIGTERM, array( $this, 'signal_handler' ) );
}
}
Loading