From b0cbc9ccfe64251ff6d46762d382e77ab4ad16a9 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Tue, 26 Nov 2024 14:49:02 +0100 Subject: [PATCH 01/51] First topological sorter draft --- .../playground/data-liberation/bootstrap.php | 2 + .../playground/data-liberation/plugin.php | 14 +- .../src/cli/WP_Import_Command.php | 173 ++++++++++++++++++ .../src/cli/WP_Import_Logger.php | 51 ++++++ .../src/import/WP_Entity_Importer.php | 56 +----- .../data-liberation/src/import/WP_Logger.php | 51 ++++++ .../src/import/WP_Stream_Importer.php | 20 +- .../src/import/WP_Topological_Sorter.php | 103 +++++++++++ .../data-liberation/src/wxr/WP_WXR_Reader.php | 4 + 9 files changed, 406 insertions(+), 68 deletions(-) create mode 100644 packages/playground/data-liberation/src/cli/WP_Import_Command.php create mode 100644 packages/playground/data-liberation/src/cli/WP_Import_Logger.php create mode 100644 packages/playground/data-liberation/src/import/WP_Logger.php create mode 100644 packages/playground/data-liberation/src/import/WP_Topological_Sorter.php diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 8e4e5f0177..5540b5a695 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -58,6 +58,8 @@ require_once __DIR__ . '/src/import/WP_Markdown_Importer.php'; require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php'; require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php'; +require_once __DIR__ . '/src/import/WP_Logger.php'; +require_once __DIR__ . '/src/import/WP_Topological_Sorter.php'; require_once __DIR__ . '/src/utf8_decoder.php'; diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index 94c2b1b3ba..88012b3f05 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -43,20 +43,10 @@ function () { 'init', function () { if ( defined( 'WP_CLI' ) && WP_CLI ) { - /** - * Import a WXR file. - * - * - * : The WXR file to import. - */ - $command = function ( $args, $assoc_args ) { - $file = $args[0]; - data_liberation_import( $file ); - }; + require_once __DIR__ . '/src/cli/WP_Import_Command.php'; // Register the WP-CLI import command. - // Example usage: wp data-liberation /path/to/file.xml - WP_CLI::add_command( 'data-liberation', $command ); + WP_CLI::add_command( 'data-liberation', WP_Import_Command::class ); } register_post_status( diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php new file mode 100644 index 0000000000..fe49ced08e --- /dev/null +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -0,0 +1,173 @@ + + * : The path to the WXR file. Either a file, a directory or a URL. + * + * [--dry-run] + * : Perform a dry run if set. + * + * ## EXAMPLES + * + * wp data-liberation import /path/to/file.xml + * + * @param array $args + * @param array $assoc_args + * @return void + */ + public function import( $args, $assoc_args ) { + $path = $args[0]; + $this->dry_run = WP_CLI\Utils\get_flag_value( $assoc_args, 'dry-run', false ); + $options = array( + 'logger' => new WP_Import_logger(), + ); + + if ( extension_loaded( 'pcntl' ) ) { + // Set the signal handler. + $this->register_handlers(); + } + + if ( filter_var( $path, FILTER_VALIDATE_URL ) ) { + // Import URL. + $this->import_wxr_url( $path, $options ); + } elseif ( is_dir( $path ) ) { + $count = 0; + // Get all the WXR files in the directory. + foreach ( wp_visit_file_tree( $path ) as $event ) { + foreach ( $event->files as $file ) { + if ( $file->isFile() && 'xml' === pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) ) { + ++$count; + + // Import the WXR file. + $this->import_wxr_file( $file->getPathname(), $options ); + } + } + } + + if ( ! $count ) { + WP_CLI::error( WP_CLI::colorize( "No WXR files found in the {$path} directory" ) ); + } + } else { + if ( ! is_file( $path ) ) { + WP_CLI::error( WP_CLI::colorize( "File not found: %R{$path}%n" ) ); + } + + // Import the WXR file. + $this->import_wxr_file( $path, $options ); + } + } + + /** + * Import a WXR file. + * + * @param string $file_path The path to the WXR file. + * @return void + */ + private function import_wxr_file( $file_path, $options = array() ) { + $this->wxr_path = $file_path; + $this->importer = WP_Stream_Importer::create_for_wxr_file( $file_path, $options ); + + $this->import_wxr(); + } + + /** + * Import a WXR file from a URL. + * + * @param string $url The URL to the WXR file. + * @return void + */ + private function import_wxr_url( $url, $options = array() ) { + $this->wxr_path = $url; + $this->importer = WP_Stream_Importer::create_for_wxr_url( $url, $options ); + + $this->import_wxr(); + } + + /** + * Import the WXR file. + */ + private function import_wxr() { + if ( ! $this->importer ) { + WP_CLI::error( 'Could not create importer' ); + } + + WP_CLI::line( "Importing {$this->wxr_path}" ); + + if ( $this->dry_run ) { + WP_CLI::line( 'Dry run enabled.' ); + } else { + while ( $this->importer->next_step() ) { + $current_stage = $this->importer->get_current_stage(); + // WP_CLI::line( "Stage {$current_stage}" ); + } + } + + WP_CLI::success( 'Import finished' ); + } + + /** + * Callback function registered to `pcntl_signal` to handle signals. + * + * @param int $signal The signal number. + * @return void + */ + protected function signal_handler( $signal ) { + switch ( $signal ) { + case SIGINT: + WP_CLI::line( 'Received SIGINT signal' ); + exit( 0 ); + + case SIGTERM: + WP_CLI::line( 'Received SIGTERM signal' ); + exit( 0 ); + } + } + + /** + * Register signal handlers for the command. + * + * @return void + */ + private function register_handlers() { + // Handle the Ctrl + C signal to terminate the program. + pcntl_signal( SIGINT, array( $this, 'signal_handler' ) ); + + // Handle the `kill` command to terminate the program. + pcntl_signal( SIGTERM, array( $this, 'signal_handler' ) ); + } +} diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Logger.php b/packages/playground/data-liberation/src/cli/WP_Import_Logger.php new file mode 100644 index 0000000000..103ab3d9e2 --- /dev/null +++ b/packages/playground/data-liberation/src/cli/WP_Import_Logger.php @@ -0,0 +1,51 @@ +mapping['term_id'] = array(); $this->requires_remapping = $empty_types; $this->exists = $empty_types; - $this->logger = new Logger(); + $this->logger = isset( $options['logger'] ) ? $options['logger'] : new WP_Logger(); $this->options = wp_parse_args( $options, @@ -1191,57 +1191,3 @@ public static function sort_comments_by_id( $a, $b ) { return $a['comment_id'] - $b['comment_id']; } } - -/** - * @TODO how to treat this? Should this class even exist? - * how does WordPress handle different levels? It - * seems useful for usage in wp-cli, Blueprints, - * and other non-web environments. - */ -// phpcs:ignore Generic.Files.OneObjectStructurePerFile.MultipleFound -class Logger { - /** - * Log a debug message. - * - * @param string $message Message to log - */ - public function debug( $message ) { - // echo( '[DEBUG] ' . $message ); - } - - /** - * Log an info message. - * - * @param string $message Message to log - */ - public function info( $message ) { - // echo( '[INFO] ' . $message ); - } - - /** - * Log a warning message. - * - * @param string $message Message to log - */ - public function warning( $message ) { - echo( '[WARNING] ' . $message ); - } - - /** - * Log an error message. - * - * @param string $message Message to log - */ - public function error( $message ) { - echo( '[ERROR] ' . $message ); - } - - /** - * Log a notice message. - * - * @param string $message Message to log - */ - public function notice( $message ) { - // echo( '[NOTICE] ' . $message ); - } -} diff --git a/packages/playground/data-liberation/src/import/WP_Logger.php b/packages/playground/data-liberation/src/import/WP_Logger.php new file mode 100644 index 0000000000..87605336fe --- /dev/null +++ b/packages/playground/data-liberation/src/import/WP_Logger.php @@ -0,0 +1,51 @@ +entity_iterator->get_reentrancy_cursor(); $this->active_downloads[ $cursor ] = array(); - $data = $entity->get_data(); + $data = $entity->get_data(); + $upstream = $this->entity_iterator->get_upstream(); + switch ( $entity->get_type() ) { case 'asset_retry': $this->enqueue_attachment_download( @@ -472,7 +479,18 @@ private function frontload_next_entity() { ) ); break; + case 'category': + case 'term': + $this->topological_sorter->map_term( $upstream, $data ); + break; + case 'site_option': + if ( $data['option_name'] === 'home' ) { + $this->source_site_url = $data['option_value']; + } + break; case 'post': + $this->topological_sorter->map_post( $upstream, $data ); + if ( isset( $data['post_type'] ) && $data['post_type'] === 'attachment' ) { $this->enqueue_attachment_download( $data['attachment_url'] ); } elseif ( isset( $data['post_content'] ) ) { diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php new file mode 100644 index 0000000000..291421aae5 --- /dev/null +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -0,0 +1,103 @@ +terms[ $data['slug'] ] = array( + 'upstream' => $upstream, + 'visited' => false, + ); + } + + public function map_post( $upstream, $data ) { + if ( empty( $data ) ) { + return false; + } + + // No parent, no need to sort. + if ( ! isset( $data['post_type'] ) ) { + return false; + } + + if ( 'post' === $data['post_type'] || 'page' === $data['post_type'] ) { + if ( ! $data['post_id'] ) { + $this->last_post_id = $this->orphan_post_counter; + --$this->orphan_post_counter; + } + + $this->unsorted_posts[ $data['post_id'] ] = array( + 'upstream' => $upstream, + 'parent' => $data['post_parent'], + 'visited' => false, + ); + } + } + + /** + * Sort posts topologically. + * + * Children posts should not be processed before their parent has been processed. + * This method sorts the posts in the order they should be processed. + * + * Sorted posts will be stored as attachments and posts/pages separately. + */ + public function sort_posts_topologically() { + foreach ( $this->unsorted_posts as $id => $post ) { + $this->topological_sort( $id, $post ); + } + + // Empty the unsorted posts + $this->unsorted_posts = array(); + } + + /** + * Recursive topological sorting. + * + * @param int $id The id of the post to sort. + * @param array $post The post to sort. + * + * @todo Check for circular dependencies. + */ + private function topological_sort( $id, $post ) { + if ( isset( $this->posts[ $id ]['visited'] ) ) { + return; + } + + $this->unsorted_posts[ $id ]['visited'] = true; + + if ( isset( $this->posts[ $post['parent'] ] ) ) { + $this->topological_sort( $post['parent'], $this->unsorted_posts[ $post['parent'] ] ); + } + + $this->index[] = $post['upstream']; + } +} diff --git a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php index 25c21ff608..2f35e791aa 100644 --- a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php +++ b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php @@ -396,6 +396,10 @@ protected function __construct( WP_XML_Processor $xml ) { $this->xml = $xml; } + public function get_upstream() { + return $this->entity_byte_offset; + } + public function get_reentrancy_cursor() { /** * @TODO: Instead of adjusting the XML cursor internals, adjust the get_reentrancy_cursor() From fce76df56ce8aa2cd7838cb638a6f0446cc80b44 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Tue, 26 Nov 2024 22:37:11 +0100 Subject: [PATCH 02/51] Move topological sort to separate function --- .../src/import/WP_Stream_Importer.php | 51 +++++++++++++++---- .../data-liberation/src/wxr/WP_WXR_Reader.php | 2 +- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index cbe75e6172..f9361fd9b4 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -219,9 +219,8 @@ public function next_step() { $this->next_stage = self::STAGE_TOPOLOGICAL_SORT; return false; case self::STAGE_TOPOLOGICAL_SORT: - // @TODO: Topologically sort the entities. - $this->next_stage = self::STAGE_FRONTLOAD_ASSETS; - return false; + $this->next_topological_sort_step(); + return true; case self::STAGE_FRONTLOAD_ASSETS: if ( true === $this->frontload_next_entity() ) { return true; @@ -400,6 +399,42 @@ private function frontloading_advance_reentrancy_cursor() { } } + private function next_topological_sort_step() { + if ( null === $this->entity_iterator ) { + $this->downloader = new WP_Attachment_Downloader( $this->options ); + $this->entity_iterator = $this->create_entity_iterator(); + $this->topological_sorter = new WP_Topological_Sorter(); + } + + if ( ! $this->entity_iterator->valid() ) { + $this->stage = self::STAGE_FRONTLOAD_ASSETS; + $this->topological_sorter = null; + $this->downloader = null; + $this->entity_iterator = null; + $this->resume_at_entity = null; + return; + } + + // $cursor = $this->entity_iterator->get_reentrancy_cursor(); + $entity = $this->entity_iterator->current(); + $data = $entity->get_data(); + $upstream = $this->entity_iterator->get_entity_byte_offset(); + + switch ( $entity->get_type() ) { + case 'category': + case 'term': + $this->topological_sorter->map_term( $upstream, $data ); + break; + case 'post': + $this->topological_sorter->map_post( $upstream, $data ); + break; + } + + $this->entity_iterator->next(); + + return true; + } + /** * Downloads all the assets referenced in the imported entities. * @@ -467,8 +502,7 @@ private function frontload_next_entity() { $cursor = $this->entity_iterator->get_reentrancy_cursor(); $this->active_downloads[ $cursor ] = array(); - $data = $entity->get_data(); - $upstream = $this->entity_iterator->get_upstream(); + $data = $entity->get_data(); switch ( $entity->get_type() ) { case 'asset_retry': @@ -489,8 +523,6 @@ private function frontload_next_entity() { } break; case 'post': - $this->topological_sorter->map_post( $upstream, $data ); - if ( isset( $data['post_type'] ) && $data['post_type'] === 'attachment' ) { $this->enqueue_attachment_download( $data['attachment_url'] ); } elseif ( isset( $data['post_content'] ) ) { @@ -533,8 +565,9 @@ private function import_next_entity() { $this->imported_entities_counts = array(); if ( null === $this->entity_iterator ) { - $this->entity_iterator = $this->create_entity_iterator(); - $this->importer = new WP_Entity_Importer(); + $this->downloader = new WP_Attachment_Downloader( $this->options ); + $this->entity_iterator = $this->create_entity_iterator(); + $this->topological_sorter = new WP_Topological_Sorter(); } if ( ! $this->entity_iterator->valid() ) { diff --git a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php index 2f35e791aa..c5bdec538c 100644 --- a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php +++ b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php @@ -396,7 +396,7 @@ protected function __construct( WP_XML_Processor $xml ) { $this->xml = $xml; } - public function get_upstream() { + public function get_entity_byte_offset() { return $this->entity_byte_offset; } From 1e3ec8a20b0bd2598b432dbc7648402a4fbffbf9 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Tue, 26 Nov 2024 22:42:25 +0100 Subject: [PATCH 03/51] Fix: missing importer initialization --- .../data-liberation/src/import/WP_Stream_Importer.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index f9361fd9b4..25449f1676 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -503,7 +503,6 @@ private function frontload_next_entity() { $this->active_downloads[ $cursor ] = array(); $data = $entity->get_data(); - switch ( $entity->get_type() ) { case 'asset_retry': $this->enqueue_attachment_download( @@ -565,8 +564,8 @@ private function import_next_entity() { $this->imported_entities_counts = array(); if ( null === $this->entity_iterator ) { - $this->downloader = new WP_Attachment_Downloader( $this->options ); $this->entity_iterator = $this->create_entity_iterator(); + $this->importer = new WP_Entity_Importer(); $this->topological_sorter = new WP_Topological_Sorter(); } From 987d0fa67dfcbeb3fcaa7a0ed6698da16cd68cf3 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Tue, 26 Nov 2024 22:59:14 +0100 Subject: [PATCH 04/51] Add categories to the sorter --- .../src/import/WP_Stream_Importer.php | 11 ++-- .../src/import/WP_Topological_Sorter.php | 66 +++++++++++++------ 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 25449f1676..b9e0dc7846 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -416,17 +416,16 @@ private function next_topological_sort_step() { } // $cursor = $this->entity_iterator->get_reentrancy_cursor(); - $entity = $this->entity_iterator->current(); - $data = $entity->get_data(); - $upstream = $this->entity_iterator->get_entity_byte_offset(); + $entity = $this->entity_iterator->current(); + $data = $entity->get_data(); + $offset = $this->entity_iterator->get_entity_byte_offset(); switch ( $entity->get_type() ) { case 'category': - case 'term': - $this->topological_sorter->map_term( $upstream, $data ); + $this->topological_sorter->map_category( $offset, $data ); break; case 'post': - $this->topological_sorter->map_post( $upstream, $data ); + $this->topological_sorter->map_post( $offset, $data ); break; } diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 291421aae5..680ae9e6b2 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -9,9 +9,10 @@ */ class WP_Topological_Sorter { - public $unsorted_posts = array(); - public $terms = array(); - public $index = array(); + public $unsorted_posts = array(); + public $unsorted_categories = array(); + public $category_index = array(); + public $post_index = array(); /** * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarty post ID. @@ -27,18 +28,19 @@ class WP_Topological_Sorter { */ protected $last_post_id = 0; - public function map_term( $upstream, $data ) { + public function map_category( $byte_offset, $data ) { if ( empty( $data ) ) { return false; } - $this->terms[ $data['slug'] ] = array( - 'upstream' => $upstream, - 'visited' => false, + $this->unsorted_categories[ $data['slug'] ] = array( + 'byte_offset' => $byte_offset, + 'parent' => $data['parent'], + 'visited' => false, ); } - public function map_post( $upstream, $data ) { + public function map_post( $byte_offset, $data ) { if ( empty( $data ) ) { return false; } @@ -55,9 +57,9 @@ public function map_post( $upstream, $data ) { } $this->unsorted_posts[ $data['post_id'] ] = array( - 'upstream' => $upstream, - 'parent' => $data['post_parent'], - 'visited' => false, + 'byte_offset' => $byte_offset, + 'parent' => $data['post_parent'], + 'visited' => false, ); } } @@ -70,9 +72,13 @@ public function map_post( $upstream, $data ) { * * Sorted posts will be stored as attachments and posts/pages separately. */ - public function sort_posts_topologically() { + public function sort_topologically() { + foreach ( $this->unsorted_categories as $slug => $category ) { + $this->topological_category_sort( $slug, $category ); + } + foreach ( $this->unsorted_posts as $id => $post ) { - $this->topological_sort( $id, $post ); + $this->topological_post_sort( $id, $post ); } // Empty the unsorted posts @@ -80,24 +86,46 @@ public function sort_posts_topologically() { } /** - * Recursive topological sorting. + * Recursive posts topological sorting. * * @param int $id The id of the post to sort. * @param array $post The post to sort. * * @todo Check for circular dependencies. */ - private function topological_sort( $id, $post ) { - if ( isset( $this->posts[ $id ]['visited'] ) ) { + private function topological_post_sort( $id, $post ) { + if ( isset( $this->unsorted_posts[ $id ]['visited'] ) ) { return; } $this->unsorted_posts[ $id ]['visited'] = true; - if ( isset( $this->posts[ $post['parent'] ] ) ) { - $this->topological_sort( $post['parent'], $this->unsorted_posts[ $post['parent'] ] ); + if ( isset( $this->unsorted_posts[ $post['parent'] ] ) ) { + $this->topological_post_sort( $post['parent'], $this->unsorted_posts[ $post['parent'] ] ); + } + + $this->post_index[] = $post['byte_offset']; + } + + /** + * Recursive categories topological sorting. + * + * @param int $slug The slug of the category to sort. + * @param array $category The category to sort. + * + * @todo Check for circular dependencies. + */ + private function topological_category_sort( $slug, $category ) { + if ( isset( $this->unsorted_categories[ $slug ]['visited'] ) ) { + return; + } + + $this->unsorted_categories[ $slug ]['visited'] = true; + + if ( isset( $this->unsorted_categories[ $category['parent'] ] ) ) { + $this->topological_category_sort( $category['parent'], $this->unsorted_categories[ $category['parent'] ] ); } - $this->index[] = $post['upstream']; + $this->category_index[] = $category['byte_offset']; } } From 361a40e5a67a9259003760172a5673f26379e987 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 27 Nov 2024 11:43:07 +0100 Subject: [PATCH 05/51] Add new in-place sort --- .../playground/data-liberation/phpunit.xml | 1 + .../src/import/WP_Topological_Sorter.php | 105 +++++++++++++----- .../tests/WPTopologicalSorterTests.php | 59 ++++++++++ .../tests/WPWXRReaderTests.php | 6 +- 4 files changed, 139 insertions(+), 32 deletions(-) create mode 100644 packages/playground/data-liberation/tests/WPTopologicalSorterTests.php diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 800b55f189..54fbc00a3c 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -11,6 +11,7 @@ tests/WPXMLProcessorTests.php tests/UrldecodeNTests.php tests/WPStreamImporterTests.php + tests/WPTopologicalSorterTests.php diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 680ae9e6b2..85d877c56b 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -9,13 +9,12 @@ */ class WP_Topological_Sorter { - public $unsorted_posts = array(); - public $unsorted_categories = array(); - public $category_index = array(); - public $post_index = array(); + public $posts = array(); + public $categories = array(); + public $category_index = array(); /** - * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarty post ID. + * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID. * To prevent duplicate post ID, we'll use negative number. * * @var int @@ -24,16 +23,25 @@ class WP_Topological_Sorter { /** * Store the ID of the post ID currently being processed. + * * @var int */ protected $last_post_id = 0; + public function reset() { + $this->posts = array(); + $this->categories = array(); + $this->category_index = array(); + $this->orphan_post_counter = 0; + $this->last_post_id = 0; + } + public function map_category( $byte_offset, $data ) { if ( empty( $data ) ) { return false; } - $this->unsorted_categories[ $data['slug'] ] = array( + $this->categories[ $data['slug'] ] = array( 'byte_offset' => $byte_offset, 'parent' => $data['parent'], 'visited' => false, @@ -56,12 +64,14 @@ public function map_post( $byte_offset, $data ) { --$this->orphan_post_counter; } - $this->unsorted_posts[ $data['post_id'] ] = array( - 'byte_offset' => $byte_offset, - 'parent' => $data['post_parent'], - 'visited' => false, + // This is an array saved as: [ parent, byte_offset ], to save space and not using an associative one. + $this->posts[ $data['post_id'] ] = array( + $data['post_parent'], + $byte_offset, ); } + + return true; } /** @@ -73,38 +83,75 @@ public function map_post( $byte_offset, $data ) { * Sorted posts will be stored as attachments and posts/pages separately. */ public function sort_topologically() { - foreach ( $this->unsorted_categories as $slug => $category ) { + foreach ( $this->categories as $slug => $category ) { $this->topological_category_sort( $slug, $category ); } - foreach ( $this->unsorted_posts as $id => $post ) { - $this->topological_post_sort( $id, $post ); + $this->sort_parent_child( $this->posts ); + + // Empty some memory. + foreach ( $this->posts as $id => $element ) { + // Save only the byte offset. + $this->posts[ $id ] = $element[1]; } + } - // Empty the unsorted posts - $this->unsorted_posts = array(); + /** + * Recursive topological sorting. + * @todo Check for circular dependencies. + * + * @param array $elements The elements to sort. + * + * @return void + */ + private function sort_parent_child( &$elements ) { + // Sort the array in-place. + $position = 0; + + foreach ( $elements as $id => $element ) { + if ( empty( $element[0] ) ) { + $this->move_element( $elements, $id, $position ); + } + } } /** - * Recursive posts topological sorting. + * Move an element to a new position. * - * @param int $id The id of the post to sort. - * @param array $post The post to sort. + * @param array $elements The elements to sort. + * @param int $id The ID of the element to move. + * @param int $position The new position of the element. * - * @todo Check for circular dependencies. + * @return void */ - private function topological_post_sort( $id, $post ) { - if ( isset( $this->unsorted_posts[ $id ]['visited'] ) ) { + private function move_element( &$elements, $id, &$position ) { + if ( ! isset( $elements[ $id ] ) ) { return; } - $this->unsorted_posts[ $id ]['visited'] = true; + $element = $elements[ $id ]; - if ( isset( $this->unsorted_posts[ $post['parent'] ] ) ) { - $this->topological_post_sort( $post['parent'], $this->unsorted_posts[ $post['parent'] ] ); + if ( $id < $position ) { + // Already in the correct position. + return; } - $this->post_index[] = $post['byte_offset']; + // Move the element to the current position. + unset( $elements[ $id ] ); + + // Generate the new array. + $elements = array_slice( $elements, 0, $position, true ) + + array( $id => $element ) + + array_slice( $elements, $position, null, true ); + + ++$position; + + // Move children. + foreach ( $elements as $child_id => $child_element ) { + if ( $id === $child_element[0] ) { + $this->move_element( $elements, $child_id, $position ); + } + } } /** @@ -116,14 +163,14 @@ private function topological_post_sort( $id, $post ) { * @todo Check for circular dependencies. */ private function topological_category_sort( $slug, $category ) { - if ( isset( $this->unsorted_categories[ $slug ]['visited'] ) ) { + if ( isset( $this->categories[ $slug ]['visited'] ) ) { return; } - $this->unsorted_categories[ $slug ]['visited'] = true; + $this->categories[ $slug ]['visited'] = true; - if ( isset( $this->unsorted_categories[ $category['parent'] ] ) ) { - $this->topological_category_sort( $category['parent'], $this->unsorted_categories[ $category['parent'] ] ); + if ( isset( $this->categories[ $category['parent'] ] ) ) { + $this->topological_category_sort( $category['parent'], $this->categories[ $category['parent'] ] ); } $this->category_index[] = $category['byte_offset']; diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php new file mode 100644 index 0000000000..a751911556 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -0,0 +1,59 @@ +assertTrue( $sorter->map_post( 0, $this->generate_post( 1 ) ) ); + $this->assertCount( 1, $sorter->posts ); + $this->assertEquals( 1, array_keys( $sorter->posts )[0] ); + } + + public function test_parent_after_child() { + $sorter = new WP_Topological_Sorter(); + + $sorter->map_post( 0, $this->generate_post( 1, 2 ) ); + $sorter->map_post( 1, $this->generate_post( 2, 0 ) ); + $sorter->sort_topologically(); + + $this->assertEquals( array( 2, 1 ), array_keys( $sorter->posts ) ); + $this->assertEquals( + array( + 2 => 1, + 1 => 0, + ), + $sorter->posts + ); + } + + public function test_child_before_parent() { + $sorter = new WP_Topological_Sorter(); + + $sorter->map_post( 1, $this->generate_post( 2, 0 ) ); + $sorter->map_post( 0, $this->generate_post( 1, 2 ) ); + $sorter->sort_topologically(); + + $this->assertEquals( array( 2, 1 ), array_keys( $sorter->posts ) ); + $this->assertEquals( + array( + 1 => 0, + 2 => 1, + ), + $sorter->posts + ); + } + + private function generate_post( $id, $post_parent = 0, $type = 'post' ) { + return array( + 'post_id' => $id, + 'post_parent' => $post_parent, + 'post_type' => $type, + ); + } +} diff --git a/packages/playground/data-liberation/tests/WPWXRReaderTests.php b/packages/playground/data-liberation/tests/WPWXRReaderTests.php index d9b131ce3f..c8bf927db9 100644 --- a/packages/playground/data-liberation/tests/WPWXRReaderTests.php +++ b/packages/playground/data-liberation/tests/WPWXRReaderTests.php @@ -3,7 +3,7 @@ use PHPUnit\Framework\TestCase; class WPWXRReaderTests extends TestCase { - + /** * @dataProvider preexisting_wxr_files_provider */ @@ -42,7 +42,7 @@ public function test_does_not_crash_when_parsing_preexisting_wxr_files_as_stream $this->assertEquals($expected_entitys, $found_entities); } - public function preexisting_wxr_files_provider() { + public static function preexisting_wxr_files_provider() { return [ [__DIR__ . '/wxr/a11y-unit-test-data.xml', 1043], [__DIR__ . '/wxr/crazy-cdata-escaped.xml', 5], @@ -114,7 +114,7 @@ public function test_simple_wxr() { ], $importer->get_entity()->get_data() ); - + $this->assertTrue( $importer->next_entity() ); $this->assertEquals( [ From e1baa752454ae9833d19361ce96fdd7cddc0b472 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 27 Nov 2024 15:50:30 +0100 Subject: [PATCH 06/51] Add memory-free functions --- .../src/import/WP_Topological_Sorter.php | 45 ++++++++++-- .../tests/WPTopologicalSorterTests.php | 72 ++++++++++++++----- 2 files changed, 93 insertions(+), 24 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 85d877c56b..f7037c9928 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -28,12 +28,20 @@ class WP_Topological_Sorter { */ protected $last_post_id = 0; + /** + * Whether the sort has been done. + * + * @var bool + */ + protected $sorted = false; + public function reset() { $this->posts = array(); $this->categories = array(); $this->category_index = array(); $this->orphan_post_counter = 0; $this->last_post_id = 0; + $this->sorted = false; } public function map_category( $byte_offset, $data ) { @@ -64,16 +72,32 @@ public function map_post( $byte_offset, $data ) { --$this->orphan_post_counter; } - // This is an array saved as: [ parent, byte_offset ], to save space and not using an associative one. + // This is an array saved as: [ parent, byte_offset, moved ], to save space and not using an associative one. $this->posts[ $data['post_id'] ] = array( $data['post_parent'], $byte_offset, + false, ); } return true; } + /** + * Get the byte offset of an element. + */ + public function get_byte_offset( $id ) { + if ( ! $this->sorted ) { + return false; + } + + if ( isset( $this->posts[ $id ] ) ) { + return $this->posts[ $id ]; + } + + return false; + } + /** * Sort posts topologically. * @@ -91,9 +115,16 @@ public function sort_topologically() { // Empty some memory. foreach ( $this->posts as $id => $element ) { - // Save only the byte offset. - $this->posts[ $id ] = $element[1]; + if ( ! $element[2] ) { + // The element have not been moved, unset it. + unset( $this->posts[ $id ] ); + } else { + // Save only the byte offset. + $this->posts[ $id ] = $element[1]; + } } + + $this->sorted = true; } /** @@ -106,7 +137,8 @@ public function sort_topologically() { */ private function sort_parent_child( &$elements ) { // Sort the array in-place. - $position = 0; + reset( $elements ); + $position = key( $elements ); foreach ( $elements as $id => $element ) { if ( empty( $element[0] ) ) { @@ -131,7 +163,7 @@ private function move_element( &$elements, $id, &$position ) { $element = $elements[ $id ]; - if ( $id < $position ) { + if ( $id <= $position ) { // Already in the correct position. return; } @@ -139,6 +171,9 @@ private function move_element( &$elements, $id, &$position ) { // Move the element to the current position. unset( $elements[ $id ] ); + // Set as 'moved'. + $element[2] = true; + // Generate the new array. $elements = array_slice( $elements, 0, $position, true ) + array( $id => $element ) + diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index a751911556..2969739b08 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -22,31 +22,65 @@ public function test_parent_after_child() { $sorter->map_post( 1, $this->generate_post( 2, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 2, 1 ), array_keys( $sorter->posts ) ); - $this->assertEquals( - array( - 2 => 1, - 1 => 0, - ), - $sorter->posts - ); + $this->assertEquals( array( 2 => 1 ), $sorter->posts ); + $this->assertFalse( $sorter->get_byte_offset( 1 ) ); + $this->assertEquals( 1, $sorter->get_byte_offset( 2 ) ); } - public function test_child_before_parent() { + public function test_child_after_parent() { $sorter = new WP_Topological_Sorter(); - $sorter->map_post( 1, $this->generate_post( 2, 0 ) ); - $sorter->map_post( 0, $this->generate_post( 1, 2 ) ); + $sorter->map_post( 10, $this->generate_post( 1, 0 ) ); + $sorter->map_post( 20, $this->generate_post( 2, 1 ) ); + $sorter->map_post( 30, $this->generate_post( 3, 2 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 2, 1 ), array_keys( $sorter->posts ) ); - $this->assertEquals( - array( - 1 => 0, - 2 => 1, - ), - $sorter->posts - ); + $this->assertEquals( array(), $sorter->posts ); + $this->assertFalse( $sorter->get_byte_offset( 1 ) ); + } + + public function test_orphaned_post() { + $sorter = new WP_Topological_Sorter(); + + $sorter->map_post( 10, $this->generate_post( 1, 3 ) ); + $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); + $sorter->sort_topologically(); + + $this->assertEquals( array( 2 => 20 ), $sorter->posts ); + $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + } + + public function test_chain_parent_child_after() { + $sorter = new WP_Topological_Sorter(); + + $sorter->map_post( 10, $this->generate_post( 1, 2 ) ); + $sorter->map_post( 20, $this->generate_post( 2, 3 ) ); + $sorter->map_post( 30, $this->generate_post( 3, 0 ) ); + $sorter->sort_topologically(); + + $this->assertEquals( array( 3 => 30 ), $sorter->posts ); + } + + public function test_reverse_order() { + $sorter = new WP_Topological_Sorter(); + + $this->multiple_map_posts( $sorter, array( 3, 2, 1 ) ); + $sorter->sort_topologically(); + + $this->assertEquals( array(), $sorter->posts ); + } + + /** + * This map a list of posts [3, 2, 1] of the form: + * post_id: 1, 2, 3 + * post_parent: 3, 2, 1 + * byte_offset: 10, 20, 30 + */ + private function multiple_map_posts( $sorter, $parents ) { + foreach ( $parents as $i => $parent ) { + $post = $this->generate_post( $i + 1, $parent ); + $sorter->map_post( 10 * $parent + 10, $post ); + } } private function generate_post( $id, $post_parent = 0, $type = 'post' ) { From 275c857c888005fbb4ba324c5001be19f601ae9d Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 27 Nov 2024 22:11:19 +0100 Subject: [PATCH 07/51] Replace bin script with wp-cli command --- .../data-liberation/bin/import/blueprint-import-wxr.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json index 55ab107921..b8ad517fae 100644 --- a/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json +++ b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json @@ -11,8 +11,8 @@ "pluginPath": "data-liberation/plugin.php" }, { - "step": "runPHP", - "code": "files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};" + "step": "wp-cli", + "command": "wp data-liberation import /wordpress/wp-content/uploads/import-wxr" } ] } From a25ed70df3ad70018319e7b15d7271bb1be8fda3 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 27 Nov 2024 23:42:39 +0100 Subject: [PATCH 08/51] Add special cases --- .../src/import/WP_Topological_Sorter.php | 65 +++++++++++++++---- .../tests/WPTopologicalSorterTests.php | 28 ++++++-- 2 files changed, 73 insertions(+), 20 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index f7037c9928..9aa42363cf 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -50,8 +50,8 @@ public function map_category( $byte_offset, $data ) { } $this->categories[ $data['slug'] ] = array( - 'byte_offset' => $byte_offset, 'parent' => $data['parent'], + 'byte_offset' => $byte_offset, 'visited' => false, ); } @@ -84,7 +84,7 @@ public function map_post( $byte_offset, $data ) { } /** - * Get the byte offset of an element. + * Get the byte offset of an element, and remove it from the list. */ public function get_byte_offset( $id ) { if ( ! $this->sorted ) { @@ -92,12 +92,26 @@ public function get_byte_offset( $id ) { } if ( isset( $this->posts[ $id ] ) ) { - return $this->posts[ $id ]; + $ret = $this->posts[ $id ]; + + // Remove the element from the array. + unset( $this->posts[ $id ] ); + + if ( 0 === count( $this->posts ) ) { + // All posts have been processed. + $this->reset(); + } + + return $ret; } return false; } + public function is_sorted() { + return $this->sorted; + } + /** * Sort posts topologically. * @@ -106,7 +120,7 @@ public function get_byte_offset( $id ) { * * Sorted posts will be stored as attachments and posts/pages separately. */ - public function sort_topologically() { + public function sort_topologically( $empty_memory = true ) { foreach ( $this->categories as $slug => $category ) { $this->topological_category_sort( $slug, $category ); } @@ -114,13 +128,15 @@ public function sort_topologically() { $this->sort_parent_child( $this->posts ); // Empty some memory. - foreach ( $this->posts as $id => $element ) { - if ( ! $element[2] ) { - // The element have not been moved, unset it. - unset( $this->posts[ $id ] ); - } else { - // Save only the byte offset. - $this->posts[ $id ] = $element[1]; + if ( $empty_memory ) { + foreach ( $this->posts as $id => $element ) { + if ( ! $element[2] ) { + // The element have not been moved, unset it. + unset( $this->posts[ $id ] ); + } else { + // Save only the byte offset. + $this->posts[ $id ] = $element[1]; + } } } @@ -137,8 +153,29 @@ public function sort_topologically() { */ private function sort_parent_child( &$elements ) { // Sort the array in-place. - reset( $elements ); - $position = key( $elements ); + // reset( $elements ); + $position = 0; // key( $elements ); + $length = count( $elements ); + + if ( $length < 2 ) { + // No need to sort. + return; + } + + if ( 2 === $length ) { + $keys = array_keys( $elements ); + + // First element has a parent and is the second. + if ( $elements[ $keys[0] ][0] && $keys[1] === $elements[ $keys[0] ][0] ) { + // Swap. + $elements = array_reverse( $elements, true ); + + // Set the second as 'moved'. + $elements[ $keys[1] ][2] = true; + } + + return; + } foreach ( $elements as $id => $element ) { if ( empty( $element[0] ) ) { @@ -163,7 +200,7 @@ private function move_element( &$elements, $id, &$position ) { $element = $elements[ $id ]; - if ( $id <= $position ) { + if ( $id < $position ) { // Already in the correct position. return; } diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 2969739b08..d7b8d3e091 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -18,13 +18,13 @@ public function test_import_one_post() { public function test_parent_after_child() { $sorter = new WP_Topological_Sorter(); - $sorter->map_post( 0, $this->generate_post( 1, 2 ) ); - $sorter->map_post( 1, $this->generate_post( 2, 0 ) ); + $sorter->map_post( 10, $this->generate_post( 1, 2 ) ); + $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 2 => 1 ), $sorter->posts ); + $this->assertEquals( array( 2 => 20 ), $sorter->posts ); $this->assertFalse( $sorter->get_byte_offset( 1 ) ); - $this->assertEquals( 1, $sorter->get_byte_offset( 2 ) ); + $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); } public function test_child_after_parent() { @@ -58,7 +58,7 @@ public function test_chain_parent_child_after() { $sorter->map_post( 30, $this->generate_post( 3, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 3 => 30 ), $sorter->posts ); + $this->assertEquals( array( 3 => 30, 2 => 20 ), $sorter->posts ); } public function test_reverse_order() { @@ -70,6 +70,22 @@ public function test_reverse_order() { $this->assertEquals( array(), $sorter->posts ); } + public function test_get_byte_offsets_consume_array() { + $sorter = new WP_Topological_Sorter(); + + $this->multiple_map_posts( $sorter, array( 3, 1, 2 ) ); + $sorter->sort_topologically(); + + $this->assertEquals( array( 3 => 10 ), $sorter->posts ); + + // $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); + // $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + // $this->assertEquals( 30, $sorter->get_byte_offset( 3 ) ); + + $this->assertFalse( $sorter->get_byte_offset( 1 ) ); + $this->assertFalse( $sorter->is_sorted() ); + } + /** * This map a list of posts [3, 2, 1] of the form: * post_id: 1, 2, 3 @@ -79,7 +95,7 @@ public function test_reverse_order() { private function multiple_map_posts( $sorter, $parents ) { foreach ( $parents as $i => $parent ) { $post = $this->generate_post( $i + 1, $parent ); - $sorter->map_post( 10 * $parent + 10, $post ); + $sorter->map_post( 10 * $i + 10, $post ); } } From 0630714e0c45ccf0cfe2d207829f441eeccc7f70 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Thu, 28 Nov 2024 11:29:57 +0100 Subject: [PATCH 09/51] Change the sorting algorithm to qsort --- .../src/import/WP_Topological_Sorter.php | 124 +++++------------- .../tests/WPTopologicalSorterTests.php | 30 ++--- 2 files changed, 51 insertions(+), 103 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 9aa42363cf..a430306d20 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -72,11 +72,11 @@ public function map_post( $byte_offset, $data ) { --$this->orphan_post_counter; } - // This is an array saved as: [ parent, byte_offset, moved ], to save space and not using an associative one. + // This is an array saved as: [ parent, byte_offset ], to save + // space and not using an associative one. $this->posts[ $data['post_id'] ] = array( $data['post_parent'], $byte_offset, - false, ); } @@ -120,23 +120,21 @@ public function is_sorted() { * * Sorted posts will be stored as attachments and posts/pages separately. */ - public function sort_topologically( $empty_memory = true ) { + public function sort_topologically( $free_space = true ) { foreach ( $this->categories as $slug => $category ) { $this->topological_category_sort( $slug, $category ); } - $this->sort_parent_child( $this->posts ); + $this->sort_elements( $this->posts ); - // Empty some memory. - if ( $empty_memory ) { + // Free some space. + if ( $free_space ) { + /** + * @TODO: all the elements that have not been moved can be flushed away. + */ foreach ( $this->posts as $id => $element ) { - if ( ! $element[2] ) { - // The element have not been moved, unset it. - unset( $this->posts[ $id ] ); - } else { - // Save only the byte offset. - $this->posts[ $id ] = $element[1]; - } + // Save only the byte offset. + $this->posts[ $id ] = $element[1]; } } @@ -144,86 +142,36 @@ public function sort_topologically( $empty_memory = true ) { } /** - * Recursive topological sorting. - * @todo Check for circular dependencies. - * - * @param array $elements The elements to sort. + * Recursive sort elements. Posts with parents will be moved to the correct position. * - * @return void + * @return true */ - private function sort_parent_child( &$elements ) { - // Sort the array in-place. - // reset( $elements ); - $position = 0; // key( $elements ); - $length = count( $elements ); - - if ( $length < 2 ) { - // No need to sort. - return; - } - - if ( 2 === $length ) { - $keys = array_keys( $elements ); - - // First element has a parent and is the second. - if ( $elements[ $keys[0] ][0] && $keys[1] === $elements[ $keys[0] ][0] ) { - // Swap. - $elements = array_reverse( $elements, true ); - - // Set the second as 'moved'. - $elements[ $keys[1] ][2] = true; + private function sort_elements( &$elements ) { + $sort_callback = function ( $a, $b ) use ( &$elements ) { + $parent_a = $elements[ $a ][0]; + $parent_b = $elements[ $b ][0]; + + if ( ! $parent_a && ! $parent_b ) { + // No parents. + return 0; + } elseif ( $a === $parent_b ) { + // A is the parent of B. + return -1; + } elseif ( $b === $parent_a ) { + // B is the parent of A. + return 1; } - return; - } - - foreach ( $elements as $id => $element ) { - if ( empty( $element[0] ) ) { - $this->move_element( $elements, $id, $position ); - } - } - } - - /** - * Move an element to a new position. - * - * @param array $elements The elements to sort. - * @param int $id The ID of the element to move. - * @param int $position The new position of the element. - * - * @return void - */ - private function move_element( &$elements, $id, &$position ) { - if ( ! isset( $elements[ $id ] ) ) { - return; - } - - $element = $elements[ $id ]; + return 0; + }; - if ( $id < $position ) { - // Already in the correct position. - return; - } - - // Move the element to the current position. - unset( $elements[ $id ] ); - - // Set as 'moved'. - $element[2] = true; - - // Generate the new array. - $elements = array_slice( $elements, 0, $position, true ) + - array( $id => $element ) + - array_slice( $elements, $position, null, true ); - - ++$position; - - // Move children. - foreach ( $elements as $child_id => $child_element ) { - if ( $id === $child_element[0] ) { - $this->move_element( $elements, $child_id, $position ); - } - } + /** + * @TODO: PHP uses quicksort: https://github.com/php/php-src/blob/master/Zend/zend_sort.c + * WordPress export posts by ID and so are likely to be already in order. + * Quicksort performs badly on already sorted arrays, O(n^2) is the worst case. + * Let's consider using a different sorting algorithm. + */ + uksort( $elements, $sort_callback ); } /** diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index d7b8d3e091..9e176d5be2 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -22,9 +22,10 @@ public function test_parent_after_child() { $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 2 => 20 ), $sorter->posts ); - $this->assertFalse( $sorter->get_byte_offset( 1 ) ); + $this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts ); + $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + $this->assertFalse( $sorter->is_sorted() ); } public function test_child_after_parent() { @@ -35,8 +36,8 @@ public function test_child_after_parent() { $sorter->map_post( 30, $this->generate_post( 3, 2 ) ); $sorter->sort_topologically(); - $this->assertEquals( array(), $sorter->posts ); - $this->assertFalse( $sorter->get_byte_offset( 1 ) ); + $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); + $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); } public function test_orphaned_post() { @@ -46,7 +47,8 @@ public function test_orphaned_post() { $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 2 => 20 ), $sorter->posts ); + $this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts ); + $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); } @@ -58,7 +60,7 @@ public function test_chain_parent_child_after() { $sorter->map_post( 30, $this->generate_post( 3, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 3 => 30, 2 => 20 ), $sorter->posts ); + $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); } public function test_reverse_order() { @@ -67,23 +69,21 @@ public function test_reverse_order() { $this->multiple_map_posts( $sorter, array( 3, 2, 1 ) ); $sorter->sort_topologically(); - $this->assertEquals( array(), $sorter->posts ); + $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); } public function test_get_byte_offsets_consume_array() { $sorter = new WP_Topological_Sorter(); - $this->multiple_map_posts( $sorter, array( 3, 1, 2 ) ); + $this->multiple_map_posts( $sorter, array( 2, 3, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 3 => 10 ), $sorter->posts ); - - // $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); - // $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); - // $this->assertEquals( 30, $sorter->get_byte_offset( 3 ) ); + $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertFalse( $sorter->get_byte_offset( 1 ) ); - $this->assertFalse( $sorter->is_sorted() ); + $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); + $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + $this->assertEquals( 30, $sorter->get_byte_offset( 3 ) ); + $this->assertCount( 0, $sorter->posts ); } /** From 9ba0c52f9e769950528b7b790206bfa1bf442b88 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Thu, 28 Nov 2024 13:25:30 +0100 Subject: [PATCH 10/51] Add a TODO --- .../playground/data-liberation/src/cli/WP_Import_Command.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php index fe49ced08e..e7f12b08a4 100644 --- a/packages/playground/data-liberation/src/cli/WP_Import_Command.php +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -29,6 +29,9 @@ class WP_Import_Command { */ private $importer = null; + /** + * @var string $wxr_path The path to the WXR file. + */ private $wxr_path = ''; /** @@ -129,6 +132,7 @@ private function import_wxr() { WP_CLI::line( "Importing {$this->wxr_path}" ); if ( $this->dry_run ) { + // @TODO: do something with the dry run. WP_CLI::line( 'Dry run enabled.' ); } else { while ( $this->importer->next_step() ) { From c4295b45f4e1984247834a0a5c26763ff78be3cc Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 29 Nov 2024 14:19:02 +0100 Subject: [PATCH 11/51] Update names --- .../src/import/WP_Stream_Importer.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index b9e0dc7846..b4d3c45281 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -219,7 +219,10 @@ public function next_step() { $this->next_stage = self::STAGE_TOPOLOGICAL_SORT; return false; case self::STAGE_TOPOLOGICAL_SORT: - $this->next_topological_sort_step(); + if ( true === $this->topological_sort_next_entity() ) { + return true; + } + $this->stage = self::STAGE_FRONTLOAD_ASSETS; return true; case self::STAGE_FRONTLOAD_ASSETS: if ( true === $this->frontload_next_entity() ) { @@ -399,20 +402,17 @@ private function frontloading_advance_reentrancy_cursor() { } } - private function next_topological_sort_step() { + private function topological_sort_next_entity() { if ( null === $this->entity_iterator ) { - $this->downloader = new WP_Attachment_Downloader( $this->options ); $this->entity_iterator = $this->create_entity_iterator(); $this->topological_sorter = new WP_Topological_Sorter(); } if ( ! $this->entity_iterator->valid() ) { - $this->stage = self::STAGE_FRONTLOAD_ASSETS; $this->topological_sorter = null; - $this->downloader = null; $this->entity_iterator = null; $this->resume_at_entity = null; - return; + return false; } // $cursor = $this->entity_iterator->get_reentrancy_cursor(); From a75ad15539fcf5a79d34ee95b37fcc6552271fc3 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 29 Nov 2024 16:26:21 +0100 Subject: [PATCH 12/51] Fix: change variable name --- .../data-liberation/src/import/WP_Stream_Importer.php | 9 ++++++++- .../playground/data-liberation/src/wxr/WP_WXR_Reader.php | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index b4d3c45281..2dbeb534f1 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -208,6 +208,12 @@ public function set_frontloading_retries_iterator( $frontloading_retries_iterato private $importer; public function next_step() { + if ( null !== $this->next_stage ) { + return false; + } + + do_action( 'wp_stream_importer_next_stage', $this ); + switch ( $this->stage ) { case self::STAGE_INITIAL: $this->next_stage = self::STAGE_INDEX_ENTITIES; @@ -418,10 +424,11 @@ private function topological_sort_next_entity() { // $cursor = $this->entity_iterator->get_reentrancy_cursor(); $entity = $this->entity_iterator->current(); $data = $entity->get_data(); - $offset = $this->entity_iterator->get_entity_byte_offset(); + $offset = $this->entity_iterator->get_last_xml_byte_offset_outside_of_entity(); switch ( $entity->get_type() ) { case 'category': + file_put_contents( 'php://stderr', print_r( $data, true ) ); $this->topological_sorter->map_category( $offset, $data ); break; case 'post': diff --git a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php index c5bdec538c..d70727bc17 100644 --- a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php +++ b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php @@ -340,6 +340,7 @@ class WP_WXR_Reader implements Iterator { 'wp:category' => array( 'type' => 'category', 'fields' => array( + 'wp:term_id' => 'term_id', 'wp:category_nicename' => 'slug', 'wp:category_parent' => 'parent', 'wp:cat_name' => 'name', @@ -396,8 +397,8 @@ protected function __construct( WP_XML_Processor $xml ) { $this->xml = $xml; } - public function get_entity_byte_offset() { - return $this->entity_byte_offset; + public function get_last_xml_byte_offset_outside_of_entity() { + return $this->last_xml_byte_offset_outside_of_entity; } public function get_reentrancy_cursor() { From f3324cf6e663832f5eec9d81c87d96a6b8e562ff Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 29 Nov 2024 16:27:11 +0100 Subject: [PATCH 13/51] Add support for categories --- .../src/import/WP_Topological_Sorter.php | 64 ++++++++++++--- .../tests/WPStreamImporterTests.php | 75 +++++++++-------- .../tests/WPTopologicalSorterTests.php | 16 ++-- .../tests/wxr/mixed-categories.xml | 82 +++++++++++++++++++ 4 files changed, 184 insertions(+), 53 deletions(-) create mode 100644 packages/playground/data-liberation/tests/wxr/mixed-categories.xml diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index a430306d20..60ebe10d3c 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -9,9 +9,8 @@ */ class WP_Topological_Sorter { - public $posts = array(); - public $categories = array(); - public $category_index = array(); + public $posts = array(); + public $categories = array(); /** * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID. @@ -50,9 +49,8 @@ public function map_category( $byte_offset, $data ) { } $this->categories[ $data['slug'] ] = array( - 'parent' => $data['parent'], - 'byte_offset' => $byte_offset, - 'visited' => false, + array_key_exists( 'parent', $data ) ? $data['parent'] : '', + $byte_offset, ); } @@ -85,8 +83,12 @@ public function map_post( $byte_offset, $data ) { /** * Get the byte offset of an element, and remove it from the list. + * + * @param int $id The ID of the post to get the byte offset. + * + * @return int|bool The byte offset of the post, or false if the post is not found. */ - public function get_byte_offset( $id ) { + public function get_post_byte_offset( $id ) { if ( ! $this->sorted ) { return false; } @@ -97,7 +99,7 @@ public function get_byte_offset( $id ) { // Remove the element from the array. unset( $this->posts[ $id ] ); - if ( 0 === count( $this->posts ) ) { + if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) { // All posts have been processed. $this->reset(); } @@ -108,17 +110,44 @@ public function get_byte_offset( $id ) { return false; } + /** + * Get the byte offset of an element, and remove it from the list. + * + * @param string $slug The slug of the category to get the byte offset. + * + * @return int|bool The byte offset of the category, or false if the category is not found. + */ + public function get_category_byte_offset( $slug ) { + if ( ! $this->sorted ) { + return false; + } + + if ( isset( $this->categories[ $slug ] ) ) { + $ret = $this->categories[ $slug ]; + + // Remove the element from the array. + unset( $this->categories[ $slug ] ); + + if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) { + // All categories have been processed. + $this->reset(); + } + + return $ret; + } + + return false; + } + public function is_sorted() { return $this->sorted; } /** - * Sort posts topologically. + * Sort elements topologically. * - * Children posts should not be processed before their parent has been processed. - * This method sorts the posts in the order they should be processed. - * - * Sorted posts will be stored as attachments and posts/pages separately. + * Elements should not be processed before their parent has been processed. + * This method sorts the elements in the order they should be processed. */ public function sort_topologically( $free_space = true ) { foreach ( $this->categories as $slug => $category ) { @@ -126,6 +155,7 @@ public function sort_topologically( $free_space = true ) { } $this->sort_elements( $this->posts ); + $this->sort_elements( $this->categories ); // Free some space. if ( $free_space ) { @@ -136,6 +166,14 @@ public function sort_topologically( $free_space = true ) { // Save only the byte offset. $this->posts[ $id ] = $element[1]; } + + /** + * @TODO: all the elements that have not been moved can be flushed away. + */ + foreach ( $this->categories as $slug => $element ) { + // Save only the byte offset. + $this->categories[ $slug ] = $element[1]; + } } $this->sorted = true; diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index 28079e416c..840a1805ef 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -15,24 +15,23 @@ protected function setUp(): void { } } - /** - * @before + /** + * @before * * TODO: Run each test in a fresh Playground instance instead of sharing the global * state like this. - */ - public function clean_up_uploads(): void - { - $files = glob( '/wordpress/wp-content/uploads/*' ); - foreach( $files as $file ) { - if( is_dir( $file ) ) { - array_map( 'unlink', glob( "$file/*.*" ) ); - rmdir( $file ); - } else { - unlink( $file ); - } - } - } + */ + public function clean_up_uploads(): void { + $files = glob( '/wordpress/wp-content/uploads/*' ); + foreach ( $files as $file ) { + if ( is_dir( $file ) ) { + array_map( 'unlink', glob( "$file/*.*" ) ); + rmdir( $file ); + } else { + unlink( $file ); + } + } + } public function test_import_simple_wxr() { $import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' ); @@ -44,7 +43,7 @@ public function test_frontloading() { $wxr_path = __DIR__ . '/wxr/frontloading-1-attachment.xml'; $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ); - while( $importer->next_step() ) { + while ( $importer->next_step() ) { // noop } $files = glob( '/wordpress/wp-content/uploads/*' ); @@ -57,17 +56,17 @@ public function test_resume_frontloading() { $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ); - $progress_url = null; + $progress_url = null; $progress_value = null; - for($i = 0; $i < 20; ++$i) { + for ( $i = 0; $i < 20; ++$i ) { $importer->next_step(); $progress = $importer->get_frontloading_progress(); - if( count( $progress ) === 0 ) { + if ( count( $progress ) === 0 ) { continue; } - $progress_url = array_keys( $progress )[0]; + $progress_url = array_keys( $progress )[0]; $progress_value = array_values( $progress )[0]; - if( null === $progress_value['received'] ) { + if ( null === $progress_value['received'] ) { continue; } break; @@ -78,22 +77,22 @@ public function test_resume_frontloading() { $this->assertEquals( 'https://wpthemetestdata.files.wordpress.com/2008/06/canola2.jpg', $progress_url ); $this->assertGreaterThan( 0, $progress_value['total'] ); - $cursor = $importer->get_reentrancy_cursor(); - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor ); + $cursor = $importer->get_reentrancy_cursor(); + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor ); // Rewind back to the entity we were on. $this->assertTrue( $importer->next_step() ); // Restart the download of the same entity – from scratch. - $progress_value = []; - for($i = 0; $i < 20; ++$i) { + $progress_value = array(); + for ( $i = 0; $i < 20; ++$i ) { $importer->next_step(); $progress = $importer->get_frontloading_progress(); - if( count( $progress ) === 0 ) { + if ( count( $progress ) === 0 ) { continue; } - $progress_url = array_keys( $progress )[0]; + $progress_url = array_keys( $progress )[0]; $progress_value = array_values( $progress )[0]; - if( null === $progress_value['received'] ) { + if ( null === $progress_value['received'] ) { continue; } break; @@ -105,17 +104,17 @@ public function test_resume_frontloading() { } /** - * + * Test resume entity import. */ public function test_resume_entity_import() { $wxr_path = __DIR__ . '/wxr/entities-options-and-posts.xml'; $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_IMPORT_ENTITIES ); - for($i = 0; $i < 11; ++$i) { + for ( $i = 0; $i < 11; ++$i ) { $this->assertTrue( $importer->next_step() ); - $cursor = $importer->get_reentrancy_cursor(); - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor ); + $cursor = $importer->get_reentrancy_cursor(); + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor ); // Rewind back to the entity we were on. // Note this means we may attempt to insert it twice. It's // the importer's job to detect that and skip the duplicate @@ -125,6 +124,18 @@ public function test_resume_entity_import() { $this->assertFalse( $importer->next_step() ); } + public function test_sort_categories() { + $wxr_path = __DIR__ . '/wxr/mixed-categories.xml'; + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); + $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_TOPOLOGICAL_SORT ); + + while ( $importer->next_step() ) { + if ( $importer->get_next_stage() === WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ) { + break; + } + } + } + private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) { do { while ( $importer->next_step() ) { diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 9e176d5be2..e454496823 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -23,8 +23,8 @@ public function test_parent_after_child() { $sorter->sort_topologically(); $this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); + $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); $this->assertFalse( $sorter->is_sorted() ); } @@ -37,7 +37,7 @@ public function test_child_after_parent() { $sorter->sort_topologically(); $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); } public function test_orphaned_post() { @@ -48,8 +48,8 @@ public function test_orphaned_post() { $sorter->sort_topologically(); $this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); + $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); } public function test_chain_parent_child_after() { @@ -80,9 +80,9 @@ public function test_get_byte_offsets_consume_array() { $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); - $this->assertEquals( 30, $sorter->get_byte_offset( 3 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); + $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); + $this->assertEquals( 30, $sorter->get_post_byte_offset( 3 ) ); $this->assertCount( 0, $sorter->posts ); } diff --git a/packages/playground/data-liberation/tests/wxr/mixed-categories.xml b/packages/playground/data-liberation/tests/wxr/mixed-categories.xml new file mode 100644 index 0000000000..ae74a7530e --- /dev/null +++ b/packages/playground/data-liberation/tests/wxr/mixed-categories.xml @@ -0,0 +1,82 @@ + + + + + Mixed Categories + https://playground.wordpress.net/scope:funny-chic-valley + + Fri, 29 Nov 2024 12:36:23 +0000 + en-US + 1.2 + https://playground.wordpress.net/scope:funny-chic-valley + https://playground.wordpress.net/scope:funny-chic-valley + + + 1 + + + + + + + + + 5 + + + + + + 1 + + + + + + 3 + + + + + + 2 + + + + + + 5 + + + + + + + 1 + + + + + + + 3 + + + + + + + 2 + + + + + + + From c3afab785c64768c2356cc867f450471166cba74 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 10:08:55 +0100 Subject: [PATCH 14/51] Fix: remove double slashes --- packages/playground/data-liberation/src/functions.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index 05baf08aa0..02025ef8a7 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -166,7 +166,7 @@ function wp_visit_file_tree( $dir ) { if ( '.' === $file || '..' === $file ) { continue; } - $file_path = $dir . '/' . $file; + $file_path = rtrim( $dir, '/' ) . '/' . $file; if ( is_dir( $file_path ) ) { $directories[] = $file_path; continue; From 6645eb7885cfa1b74467985be029e33282841538 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 14:52:40 +0100 Subject: [PATCH 15/51] Add test check --- .../data-liberation/tests/WPTopologicalSorterTests.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index e454496823..6f732b5d24 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -7,6 +7,14 @@ */ class WPTopologicalSorterTests extends TestCase { + protected function setUp(): void { + parent::setUp(); + + if ( ! isset( $_SERVER['SERVER_SOFTWARE'] ) || $_SERVER['SERVER_SOFTWARE'] !== 'PHP.wasm' ) { + $this->markTestSkipped( 'Test only runs in Playground' ); + } + } + public function test_import_one_post() { $sorter = new WP_Topological_Sorter(); From d197de67ab65216506ce77356bb279bea56e6b81 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 14:52:56 +0100 Subject: [PATCH 16/51] Add new hooks --- .../playground/data-liberation/plugin.php | 68 +++++++++++++------ 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index 88012b3f05..3b835d3f08 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -39,30 +39,54 @@ function () { } ); -add_action( - 'init', - function () { - if ( defined( 'WP_CLI' ) && WP_CLI ) { - require_once __DIR__ . '/src/cli/WP_Import_Command.php'; - - // Register the WP-CLI import command. - WP_CLI::add_command( 'data-liberation', WP_Import_Command::class ); - } +function data_liberation_init() { + if ( defined( 'WP_CLI' ) && WP_CLI ) { + require_once __DIR__ . '/src/cli/WP_Import_Command.php'; - register_post_status( - 'error', - array( - 'label' => _x( 'Error', 'post' ), // Label name - 'public' => false, - 'exclude_from_search' => false, - 'show_in_admin_all_list' => false, - 'show_in_admin_status_list' => false, - // translators: %s is the number of errors - 'label_count' => _n_noop( 'Error (%s)', 'Error (%s)' ), - ) - ); + // Register the WP-CLI import command. + WP_CLI::add_command( 'data-liberation', WP_Import_Command::class ); } -); + + register_post_status( + 'error', + array( + 'label' => _x( 'Error', 'post' ), // Label name + 'public' => false, + 'exclude_from_search' => false, + 'show_in_admin_all_list' => false, + 'show_in_admin_status_list' => false, + // translators: %s is the number of errors + 'label_count' => _n_noop( 'Error (%s)', 'Error (%s)' ), + ) + ); +} + +add_action( 'init', 'data_liberation_init' ); + +function data_liberation_activate() { + // Activate the topological sorter. Create tables and options. + WP_Topological_Sorter::activate(); +} + +// Run when the plugin is activated. +register_activation_hook( __FILE__, 'data_liberation_activate' ); + +function data_liberation_deactivate() { + // Deactivate the topological sorter. Flush away all data. + WP_Topological_Sorter::deactivate(); + + // @TODO: Cancel any active import sessions and cleanup other data. +} + +// Run when the plugin is deactivated. +register_deactivation_hook( __FILE__, 'data_liberation_deactivate' ); + +function data_liberation_load() { + WP_Topological_Sorter::load(); +} + +// Run when the plugin is loaded. +add_action( 'plugins_loaded', 'data_liberation_load' ); // Register admin menu add_action( From e95618eea67e64f43c552cd7bb0091d56049a58e Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 14:53:10 +0100 Subject: [PATCH 17/51] Add new topo sorting query --- .../src/import/WP_Topological_Sorter.php | 286 +++++++++++++----- 1 file changed, 207 insertions(+), 79 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 60ebe10d3c..8f48bff58c 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -9,8 +9,24 @@ */ class WP_Topological_Sorter { - public $posts = array(); - public $categories = array(); + /** + * The base name of the table. + */ + const TABLE_NAME = 'data_liberation_index'; + + /** + * The option name for the database version. + */ + const OPTION_NAME = 'data_liberation_db_version'; + + /** + * The current database version, to be used with dbDelta. + */ + const DB_VERSION = 1; + + // Element types. + const ELEMENT_TYPE_POST = 1; + const ELEMENT_TYPE_CATEGORY = 2; /** * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID. @@ -34,27 +50,135 @@ class WP_Topological_Sorter { */ protected $sorted = false; + public static function get_table_name() { + global $wpdb; + + // Default is wp_{TABLE_NAME} + return $wpdb->prefix . self::TABLE_NAME; + } + + /** + * Run by register_activation_hook. + */ + public static function activate() { + global $wpdb; + + // See wp_get_db_schema + $max_index_length = 191; + $table_name = self::get_table_name(); + + // Create the table if it doesn't exist. + // @TODO: remove this custom SQLite declaration after first phase of unit tests is done. + if ( self::is_sqlite() ) { + $sql = $wpdb->prepare( + 'CREATE TABLE IF NOT EXISTS %i ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + element_type INTEGER NOT NULL default %d, + element_id INTEGER NOT NULL, + parent_id INTEGER, + parent TEXT NOT NULL default "", + byte_offset INTEGER NOT NULL, + hierarchy_level INTEGER DEFAULT NULL + ); + + CREATE UNIQUE INDEX IF NOT EXISTS idx_element_id ON %i (element_id); + CREATE INDEX IF NOT EXISTS idx_element_parent ON %i (parent); + CREATE INDEX IF NOT EXISTS idx_byte_offset ON %i (byte_offset);', + $table_name, + self::ELEMENT_TYPE_POST, + $table_name, + $table_name, + $table_name + ); + } else { + // MySQL, MariaDB. + $sql = $wpdb->prepare( + 'CREATE TABLE IF NOT EXISTS %i ( + id bigint(20) unsigned NOT NULL AUTO_INCREMENT, + element_type tinyint(1) NOT NULL default %d, + element_id unsigned bigint(20) NOT NULL, + parent_id unsigned bigint(20) DEFAULT NULL, + parent varchar(200) NOT NULL default "", + byte_offset bigint(20) unsigned NOT NULL, + hierarchy_level INT DEFAULT NULL, + PRIMARY KEY (id), + UNIQUE KEY element_id (element_id(%d)) + KEY element_parent (element_parent(%d)) + KEY byte_offset (byte_offset(%d)) + ) ' . $wpdb->get_charset_collate(), + self::get_table_name(), + self::ELEMENT_TYPE_POST, + $max_index_length, + $max_index_length, + $max_index_length + ); + } + + require_once ABSPATH . 'wp-admin/includes/upgrade.php'; + dbDelta( $sql ); + + update_option( self::OPTION_NAME, self::DB_VERSION ); + } + + public static function is_sqlite() { + return defined( 'DB_ENGINE' ) || 'sqlite' === DB_ENGINE; + } + + /** + * Run in the 'plugins_loaded' action. + */ + public static function load() { + if ( self::DB_VERSION !== (int) get_site_option( self::OPTION_NAME ) ) { + // Used to update the database with dbDelta, if needed in the future. + self::activate(); + } + } + + /** + * Run by register_deactivation_hook. + */ + public static function deactivate() { + global $wpdb; + $table_name = self::get_table_name(); + + // Drop the table. + $wpdb->query( $wpdb->prepare( 'DROP TABLE IF EXISTS %s', $table_name ) ); + + // Delete the option. + delete_option( self::OPTION_NAME ); + } + + /** + * Run by register_uninstall_hook. + */ public function reset() { - $this->posts = array(); - $this->categories = array(); - $this->category_index = array(); $this->orphan_post_counter = 0; $this->last_post_id = 0; $this->sorted = false; } public function map_category( $byte_offset, $data ) { + global $wpdb; + if ( empty( $data ) ) { return false; } - $this->categories[ $data['slug'] ] = array( - array_key_exists( 'parent', $data ) ? $data['parent'] : '', - $byte_offset, + $wpdb->insert( + self::get_table_name(), + array( + 'element_type' => self::ELEMENT_TYPE_CATEGORY, + 'element_id' => $data['term_id'], + 'parent_id' => $data['parent_id'], + 'parent' => array_key_exists( 'parent', $data ) ? $data['parent'] : '', + 'byte_offset' => $byte_offset, + ) ); } public function map_post( $byte_offset, $data ) { + global $wpdb; + if ( empty( $data ) ) { return false; } @@ -70,11 +194,15 @@ public function map_post( $byte_offset, $data ) { --$this->orphan_post_counter; } - // This is an array saved as: [ parent, byte_offset ], to save - // space and not using an associative one. - $this->posts[ $data['post_id'] ] = array( - $data['post_parent'], - $byte_offset, + $wpdb->insert( + self::get_table_name(), + array( + 'element_type' => self::ELEMENT_TYPE_POST, + 'element_id' => $data['post_id'], + 'parent_id' => $data['post_parent'], + 'parent' => '', + 'byte_offset' => $byte_offset, + ) ); } @@ -89,25 +217,20 @@ public function map_post( $byte_offset, $data ) { * @return int|bool The byte offset of the post, or false if the post is not found. */ public function get_post_byte_offset( $id ) { + global $wpdb; + if ( ! $this->sorted ) { return false; } - if ( isset( $this->posts[ $id ] ) ) { - $ret = $this->posts[ $id ]; - - // Remove the element from the array. - unset( $this->posts[ $id ] ); - - if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) { - // All posts have been processed. - $this->reset(); - } - - return $ret; - } - - return false; + return $wpdb->get_var( + $wpdb->prepare( + 'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d', + self::get_table_name(), + $id, + self::ELEMENT_TYPE_POST + ) + ); } /** @@ -118,25 +241,20 @@ public function get_post_byte_offset( $id ) { * @return int|bool The byte offset of the category, or false if the category is not found. */ public function get_category_byte_offset( $slug ) { + global $wpdb; + if ( ! $this->sorted ) { return false; } - if ( isset( $this->categories[ $slug ] ) ) { - $ret = $this->categories[ $slug ]; - - // Remove the element from the array. - unset( $this->categories[ $slug ] ); - - if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) { - // All categories have been processed. - $this->reset(); - } - - return $ret; - } - - return false; + return $wpdb->get_var( + $wpdb->prepare( + 'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d', + self::get_table_name(), + $id, + self::ELEMENT_TYPE_CATEGORY + ) + ); } public function is_sorted() { @@ -150,30 +268,30 @@ public function is_sorted() { * This method sorts the elements in the order they should be processed. */ public function sort_topologically( $free_space = true ) { - foreach ( $this->categories as $slug => $category ) { - $this->topological_category_sort( $slug, $category ); - } + /*foreach ( $this->categories as $slug => $category ) { + // $this->topological_category_sort( $slug, $category ); + }*/ - $this->sort_elements( $this->posts ); - $this->sort_elements( $this->categories ); + $this->sort_elements( self::ELEMENT_TYPE_POST ); + $this->sort_elements( self::ELEMENT_TYPE_CATEGORY ); // Free some space. if ( $free_space ) { - /** + /* * @TODO: all the elements that have not been moved can be flushed away. - */ + * foreach ( $this->posts as $id => $element ) { // Save only the byte offset. $this->posts[ $id ] = $element[1]; } - /** + /* * @TODO: all the elements that have not been moved can be flushed away. - */ + * foreach ( $this->categories as $slug => $element ) { // Save only the byte offset. $this->categories[ $slug ] = $element[1]; - } + }*/ } $this->sorted = true; @@ -182,34 +300,44 @@ public function sort_topologically( $free_space = true ) { /** * Recursive sort elements. Posts with parents will be moved to the correct position. * + * @param int $type The type of element to sort. * @return true */ - private function sort_elements( &$elements ) { - $sort_callback = function ( $a, $b ) use ( &$elements ) { - $parent_a = $elements[ $a ][0]; - $parent_b = $elements[ $b ][0]; - - if ( ! $parent_a && ! $parent_b ) { - // No parents. - return 0; - } elseif ( $a === $parent_b ) { - // A is the parent of B. - return -1; - } elseif ( $b === $parent_a ) { - // B is the parent of A. - return 1; - } - - return 0; - }; - - /** - * @TODO: PHP uses quicksort: https://github.com/php/php-src/blob/master/Zend/zend_sort.c - * WordPress export posts by ID and so are likely to be already in order. - * Quicksort performs badly on already sorted arrays, O(n^2) is the worst case. - * Let's consider using a different sorting algorithm. - */ - uksort( $elements, $sort_callback ); + private function sort_elements( $type ) { + global $wpdb; + $table_name = self::get_table_name(); + + return $wpdb->query( + $wpdb->prepare( + // Perform a topological sort CTE. + 'WITH RECURSIVE hierarchy_cte AS ( + -- Select all root nodes (where parent_id is NULL) + SELECT id, parent_id, 1 AS hierarchy_level + FROM %i + WHERE parent_id IS NULL AND element_type = %d + + UNION ALL + + -- Recursive member: Join the CTE with the table to find children + SELECT yt.id, yt.parent_id, hc.hierarchy_level + 1 + FROM %i yt + WHERE element_type = %d + INNER JOIN hierarchy_cte hc ON yt.parent_id = hc.id + ) + + -- Update the hierarchy_level based on the computed hierarchy_level + UPDATE %i + SET hierarchy_level = hc.hierarchy_level + FROM hierarchy_cte hc + WHERE %i.id = hc.id;', + $table_name, + $type, + $table_name, + $type, + $table_name, + $table_name + ) + ); } /** From 94d791c65a75799bc0e3d6d760d99c52c6f1b506 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 15:07:41 +0100 Subject: [PATCH 18/51] Remove unused check --- .../data-liberation/src/import/WP_Stream_Importer.php | 6 ------ 1 file changed, 6 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 2dbeb534f1..9c15e41d29 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -208,12 +208,6 @@ public function set_frontloading_retries_iterator( $frontloading_retries_iterato private $importer; public function next_step() { - if ( null !== $this->next_stage ) { - return false; - } - - do_action( 'wp_stream_importer_next_stage', $this ); - switch ( $this->stage ) { case self::STAGE_INITIAL: $this->next_stage = self::STAGE_INDEX_ENTITIES; From 2acfba6056c6b7629d33e281f99f345f3e1635e6 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 15:07:57 +0100 Subject: [PATCH 19/51] Temporary disable test --- packages/playground/data-liberation/phpunit.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 54fbc00a3c..6fc2eb2550 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -11,7 +11,7 @@ tests/WPXMLProcessorTests.php tests/UrldecodeNTests.php tests/WPStreamImporterTests.php - tests/WPTopologicalSorterTests.php + From 1e25c7538330f742ae1184c6afd71ebfdb8a5fdf Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 15:09:00 +0100 Subject: [PATCH 20/51] Remove debug code --- .../playground/data-liberation/src/import/WP_Stream_Importer.php | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 9c15e41d29..ba1d141917 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -422,7 +422,6 @@ private function topological_sort_next_entity() { switch ( $entity->get_type() ) { case 'category': - file_put_contents( 'php://stderr', print_r( $data, true ) ); $this->topological_sorter->map_category( $offset, $data ); break; case 'post': From dfc747df03ef57c016b9b11dc19391290d6498e7 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 4 Dec 2024 15:15:41 +0100 Subject: [PATCH 21/51] Remove rebase artifacts --- .../data-liberation/src/import/WP_Stream_Importer.php | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index ba1d141917..6cae2b4255 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -511,15 +511,6 @@ private function frontload_next_entity() { ) ); break; - case 'category': - case 'term': - $this->topological_sorter->map_term( $upstream, $data ); - break; - case 'site_option': - if ( $data['option_name'] === 'home' ) { - $this->source_site_url = $data['option_value']; - } - break; case 'post': if ( isset( $data['post_type'] ) && $data['post_type'] === 'attachment' ) { $this->enqueue_attachment_download( $data['attachment_url'] ); From 9198c57aae83afa8b252acf8686cb92b5ce23e44 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 6 Dec 2024 08:43:48 +0100 Subject: [PATCH 22/51] Change to new function signature --- .../src/cli/WP_Import_Command.php | 17 ++-- .../src/import/WP_Topological_Sorter.php | 80 +++++++++++-------- 2 files changed, 58 insertions(+), 39 deletions(-) diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php index e7f12b08a4..2805ea5ee7 100644 --- a/packages/playground/data-liberation/src/cli/WP_Import_Command.php +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -65,6 +65,9 @@ public function import( $args, $assoc_args ) { $this->register_handlers(); } + // Be sure Data Liberation is activated. + data_liberation_activate(); + if ( filter_var( $path, FILTER_VALIDATE_URL ) ) { // Import URL. $this->import_wxr_url( $path, $options ); @@ -83,7 +86,7 @@ public function import( $args, $assoc_args ) { } if ( ! $count ) { - WP_CLI::error( WP_CLI::colorize( "No WXR files found in the {$path} directory" ) ); + WP_CLI::error( WP_CLI::colorize( "No WXR files found in the %R{$path}%n directory" ) ); } } else { if ( ! is_file( $path ) ) { @@ -135,10 +138,14 @@ private function import_wxr() { // @TODO: do something with the dry run. WP_CLI::line( 'Dry run enabled.' ); } else { - while ( $this->importer->next_step() ) { - $current_stage = $this->importer->get_current_stage(); - // WP_CLI::line( "Stage {$current_stage}" ); - } + do { + $current_stage = $this->importer->get_stage(); + WP_CLI::line( WP_CLI::colorize( "Stage %g{$current_stage}%n" ) ); + + while ( $this->importer->next_step() ) { + WP_CLI::line( 'Step' ); + } + } while ( $this->importer->advance_to_next_stage() ); } WP_CLI::success( 'Import finished' ); diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 8f48bff58c..7d1a6702f4 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -63,9 +63,7 @@ public static function get_table_name() { public static function activate() { global $wpdb; - // See wp_get_db_schema - $max_index_length = 191; - $table_name = self::get_table_name(); + $table_name = self::get_table_name(); // Create the table if it doesn't exist. // @TODO: remove this custom SQLite declaration after first phase of unit tests is done. @@ -74,15 +72,15 @@ public static function activate() { 'CREATE TABLE IF NOT EXISTS %i ( id INTEGER PRIMARY KEY AUTOINCREMENT, element_type INTEGER NOT NULL default %d, - element_id INTEGER NOT NULL, - parent_id INTEGER, + element_id TEXT NOT NULL, + parent_id TEXT DEFAULT NULL, parent TEXT NOT NULL default "", byte_offset INTEGER NOT NULL, - hierarchy_level INTEGER DEFAULT NULL + hierarchy_level TEXT DEFAULT NULL ); CREATE UNIQUE INDEX IF NOT EXISTS idx_element_id ON %i (element_id); - CREATE INDEX IF NOT EXISTS idx_element_parent ON %i (parent); + CREATE INDEX IF NOT EXISTS idx_parent_id ON %i (parent_id); CREATE INDEX IF NOT EXISTS idx_byte_offset ON %i (byte_offset);', $table_name, self::ELEMENT_TYPE_POST, @@ -91,25 +89,27 @@ public static function activate() { $table_name ); } else { + // See wp_get_db_schema + $max_index_length = 191; + // MySQL, MariaDB. $sql = $wpdb->prepare( 'CREATE TABLE IF NOT EXISTS %i ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, element_type tinyint(1) NOT NULL default %d, - element_id unsigned bigint(20) NOT NULL, - parent_id unsigned bigint(20) DEFAULT NULL, - parent varchar(200) NOT NULL default "", + element_id text NOT NULL, + parent_id text DEFAULT NULL, + parent varchar(200) NOT NULL default \'\', byte_offset bigint(20) unsigned NOT NULL, - hierarchy_level INT DEFAULT NULL, + hierarchy_level text DEFAULT NULL, PRIMARY KEY (id), - UNIQUE KEY element_id (element_id(%d)) - KEY element_parent (element_parent(%d)) - KEY byte_offset (byte_offset(%d)) + KEY element_id (element_id(%d)), + KEY parent_id (parent_id(%d)), + KEY byte_offset (byte_offset) ) ' . $wpdb->get_charset_collate(), self::get_table_name(), self::ELEMENT_TYPE_POST, $max_index_length, - $max_index_length, $max_index_length ); } @@ -121,7 +121,7 @@ public static function activate() { } public static function is_sqlite() { - return defined( 'DB_ENGINE' ) || 'sqlite' === DB_ENGINE; + return defined( 'DB_ENGINE' ) && 'sqlite' === DB_ENGINE; } /** @@ -168,8 +168,8 @@ public function map_category( $byte_offset, $data ) { self::get_table_name(), array( 'element_type' => self::ELEMENT_TYPE_CATEGORY, - 'element_id' => $data['term_id'], - 'parent_id' => $data['parent_id'], + 'element_id' => (string) $data['term_id'], + 'parent_id' => array_key_exists( 'parent_id', $data ) ? (string) $data['parent_id'] : null, 'parent' => array_key_exists( 'parent', $data ) ? $data['parent'] : '', 'byte_offset' => $byte_offset, ) @@ -198,8 +198,8 @@ public function map_post( $byte_offset, $data ) { self::get_table_name(), array( 'element_type' => self::ELEMENT_TYPE_POST, - 'element_id' => $data['post_id'], - 'parent_id' => $data['post_parent'], + 'element_id' => (string) $data['post_id'], + 'parent_id' => array_key_exists( 'parent_id', $data ) ? (string) $data['parent_id'] : null, 'parent' => '', 'byte_offset' => $byte_offset, ) @@ -310,26 +310,38 @@ private function sort_elements( $type ) { return $wpdb->query( $wpdb->prepare( // Perform a topological sort CTE. - 'WITH RECURSIVE hierarchy_cte AS ( - -- Select all root nodes (where parent_id is NULL) - SELECT id, parent_id, 1 AS hierarchy_level - FROM %i - WHERE parent_id IS NULL AND element_type = %d + 'WITH RECURSIVE recursive_hierarchy AS ( + -- Anchor member: select root nodes (nodes with no parent) + SELECT + element_id, + parent_id, + element_id AS hierarchy_path + FROM + %i + WHERE + parent_id IS NULL AND element_type = %d UNION ALL - -- Recursive member: Join the CTE with the table to find children - SELECT yt.id, yt.parent_id, hc.hierarchy_level + 1 - FROM %i yt - WHERE element_type = %d - INNER JOIN hierarchy_cte hc ON yt.parent_id = hc.id + -- Recursive member: join child nodes to their parents + SELECT + child.element_id, + child.parent_id, + parent.hierarchy_path || \'.\' || child.element_id AS hierarchy_path + FROM + %i child + JOIN + recursive_hierarchy parent ON child.parent_id = parent.element_id + WHERE child.element_type = %d ) - -- Update the hierarchy_level based on the computed hierarchy_level + -- Update the table with computed hierarchy paths UPDATE %i - SET hierarchy_level = hc.hierarchy_level - FROM hierarchy_cte hc - WHERE %i.id = hc.id;', + SET hierarchy_path = ( + SELECT hierarchy_path + FROM recursive_hierarchy + WHERE %i.element_id = recursive_hierarchy.element_id + );', $table_name, $type, $table_name, From 49c8bcde9a5e130afdeab82ebd766ca2e3a490ef Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 6 Dec 2024 10:00:00 +0100 Subject: [PATCH 23/51] Add support for count --- .../src/cli/WP_Import_Command.php | 15 ++++- .../src/import/WP_Stream_Importer.php | 66 +++++++++++++------ 2 files changed, 59 insertions(+), 22 deletions(-) diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php index 2805ea5ee7..52fcb30e1d 100644 --- a/packages/playground/data-liberation/src/cli/WP_Import_Command.php +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -34,6 +34,11 @@ class WP_Import_Command { */ private $wxr_path = ''; + /** + * @var int $count The number of items to import in one go. + */ + private $count; + /** * Import a WXR file. * @@ -42,6 +47,9 @@ class WP_Import_Command { * * : The path to the WXR file. Either a file, a directory or a URL. * + * [--count=] + * : The number of items to import in one go. Default is 10,000. + * * [--dry-run] * : Perform a dry run if set. * @@ -56,6 +64,7 @@ class WP_Import_Command { public function import( $args, $assoc_args ) { $path = $args[0]; $this->dry_run = WP_CLI\Utils\get_flag_value( $assoc_args, 'dry-run', false ); + $this->count = isset( $assoc_args['count'] ) ? (int) $assoc_args['count'] : 10000; $options = array( 'logger' => new WP_Import_logger(), ); @@ -141,9 +150,11 @@ private function import_wxr() { do { $current_stage = $this->importer->get_stage(); WP_CLI::line( WP_CLI::colorize( "Stage %g{$current_stage}%n" ) ); + $step_count = 0; - while ( $this->importer->next_step() ) { - WP_CLI::line( 'Step' ); + while ( $this->importer->next_step( $this->count ) ) { + ++$step_count; + WP_CLI::line( WP_CLI::colorize( "Step %g{$step_count}%n" ) ); } } while ( $this->importer->advance_to_next_stage() ); } diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 6cae2b4255..60b9a4afe3 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -207,19 +207,26 @@ public function set_frontloading_retries_iterator( $frontloading_retries_iterato */ private $importer; - public function next_step() { + /** + * Calculate next steps in the import process. + * + * @param int $count The number of entities to process in one go. + * + * @return bool + */ + public function next_step( $count = 10000 ) { switch ( $this->stage ) { case self::STAGE_INITIAL: $this->next_stage = self::STAGE_INDEX_ENTITIES; return false; case self::STAGE_INDEX_ENTITIES: - if ( true === $this->index_next_entities() ) { + if ( true === $this->index_next_entities( $count ) ) { return true; } $this->next_stage = self::STAGE_TOPOLOGICAL_SORT; return false; case self::STAGE_TOPOLOGICAL_SORT: - if ( true === $this->topological_sort_next_entity() ) { + if ( true === $this->topological_sort_next_entity( $count ) ) { return true; } $this->stage = self::STAGE_FRONTLOAD_ASSETS; @@ -402,34 +409,54 @@ private function frontloading_advance_reentrancy_cursor() { } } - private function topological_sort_next_entity() { + /** + * Sort the entities topologically. + * + * @param int $count The number of entities to process in one go. + */ + private function topological_sort_next_entity( $count = 10000 ) { + if ( null !== $this->next_stage ) { + return false; + } + if ( null === $this->entity_iterator ) { $this->entity_iterator = $this->create_entity_iterator(); $this->topological_sorter = new WP_Topological_Sorter(); } if ( ! $this->entity_iterator->valid() ) { - $this->topological_sorter = null; $this->entity_iterator = null; $this->resume_at_entity = null; + $this->topological_sorter = null; return false; } - // $cursor = $this->entity_iterator->get_reentrancy_cursor(); - $entity = $this->entity_iterator->current(); - $data = $entity->get_data(); - $offset = $this->entity_iterator->get_last_xml_byte_offset_outside_of_entity(); - - switch ( $entity->get_type() ) { - case 'category': - $this->topological_sorter->map_category( $offset, $data ); - break; - case 'post': - $this->topological_sorter->map_post( $offset, $data ); + /** + * Internalize the loop to avoid computing the reentrancy cursor + * on every entity in the imported data stream. + */ + for ( $i = 0; $i < $count; ++$i ) { + if ( ! $this->entity_iterator->valid() ) { break; + } + + $entity = $this->entity_iterator->current(); + $data = $entity->get_data(); + $offset = $this->entity_iterator->get_last_xml_byte_offset_outside_of_entity(); + + switch ( $entity->get_type() ) { + case 'category': + $this->topological_sorter->map_category( $offset, $data ); + break; + case 'post': + $this->topological_sorter->map_post( $offset, $data ); + break; + } + + $this->entity_iterator->next(); } - $this->entity_iterator->next(); + $this->resume_at_entity = $this->entity_iterator->get_reentrancy_cursor(); return true; } @@ -554,9 +581,8 @@ private function import_next_entity() { $this->imported_entities_counts = array(); if ( null === $this->entity_iterator ) { - $this->entity_iterator = $this->create_entity_iterator(); - $this->importer = new WP_Entity_Importer(); - $this->topological_sorter = new WP_Topological_Sorter(); + $this->entity_iterator = $this->create_entity_iterator(); + $this->importer = new WP_Entity_Importer(); } if ( ! $this->entity_iterator->valid() ) { From 4b0a2ab6c11113f18f746fa60f30d42c69616a85 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 6 Dec 2024 13:49:18 +0100 Subject: [PATCH 24/51] Add session to CLI --- .../src/cli/WP_Import_Command.php | 25 +++++++++++--- .../data-liberation/src/functions.php | 34 ------------------- .../src/import/WP_Topological_Sorter.php | 20 +++++++---- .../tests/WPStreamImporterTests.php | 6 ---- 4 files changed, 35 insertions(+), 50 deletions(-) diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php index 52fcb30e1d..a6ad68fdcc 100644 --- a/packages/playground/data-liberation/src/cli/WP_Import_Command.php +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -39,6 +39,11 @@ class WP_Import_Command { */ private $count; + /** + * @var WP_Import_Session $import_session The import session. + */ + private $import_session; + /** * Import a WXR file. * @@ -114,9 +119,15 @@ public function import( $args, $assoc_args ) { * @return void */ private function import_wxr_file( $file_path, $options = array() ) { - $this->wxr_path = $file_path; - $this->importer = WP_Stream_Importer::create_for_wxr_file( $file_path, $options ); + $this->wxr_path = $file_path; + $this->import_session = WP_Import_Session::create( + array( + 'data_source' => 'wxr_file', + 'file_name' => $file_path, + ) + ); + $this->importer = WP_Stream_Importer::create_for_wxr_file( $file_path, $options ); $this->import_wxr(); } @@ -127,9 +138,15 @@ private function import_wxr_file( $file_path, $options = array() ) { * @return void */ private function import_wxr_url( $url, $options = array() ) { - $this->wxr_path = $url; - $this->importer = WP_Stream_Importer::create_for_wxr_url( $url, $options ); + $this->wxr_path = $url; + $this->import_session = WP_Import_Session::create( + array( + 'data_source' => 'wxr_url', + 'source_url' => $url, + ) + ); + $this->importer = WP_Stream_Importer::create_for_wxr_url( $url, $options ); $this->import_wxr(); } diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index 02025ef8a7..82050e6450 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -192,40 +192,6 @@ function wp_visit_file_tree( $dir ) { ); } -/** - * Import a WXR file. Used by the CLI. - * - * @param string $path The path to the WXR file. - * @return void - */ -function data_liberation_import( $path ): bool { - $importer = WP_Stream_Importer::create_for_wxr_file( $path ); - - if ( ! $importer ) { - return false; - } - - $is_wp_cli = defined( 'WP_CLI' ) && WP_CLI; - - if ( $is_wp_cli ) { - WP_CLI::line( "Importing from {$path}" ); - } - - while ( $importer->next_step() ) { - // Output the current stage if running in WP-CLI. - if ( $is_wp_cli ) { - $current_stage = $importer->get_current_stage(); - WP_CLI::line( "Import: stage {$current_stage}" ); - } - } - - if ( $is_wp_cli ) { - WP_CLI::success( 'Import ended' ); - } - - return true; -} - function get_all_post_meta_flat( $post_id ) { return array_map( function ( $value ) { diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 7d1a6702f4..405296f8a2 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -74,7 +74,6 @@ public static function activate() { element_type INTEGER NOT NULL default %d, element_id TEXT NOT NULL, parent_id TEXT DEFAULT NULL, - parent TEXT NOT NULL default "", byte_offset INTEGER NOT NULL, hierarchy_level TEXT DEFAULT NULL ); @@ -99,7 +98,6 @@ public static function activate() { element_type tinyint(1) NOT NULL default %d, element_id text NOT NULL, parent_id text DEFAULT NULL, - parent varchar(200) NOT NULL default \'\', byte_offset bigint(20) unsigned NOT NULL, hierarchy_level text DEFAULT NULL, PRIMARY KEY (id), @@ -164,13 +162,18 @@ public function map_category( $byte_offset, $data ) { return false; } + $category_parent = null; + + if ( array_key_exists( 'parent', $data ) && '' !== $data['parent'] ) { + $category_parent = $data['parent']; + } + $wpdb->insert( self::get_table_name(), array( 'element_type' => self::ELEMENT_TYPE_CATEGORY, 'element_id' => (string) $data['term_id'], - 'parent_id' => array_key_exists( 'parent_id', $data ) ? (string) $data['parent_id'] : null, - 'parent' => array_key_exists( 'parent', $data ) ? $data['parent'] : '', + 'parent_id' => $category_parent, 'byte_offset' => $byte_offset, ) ); @@ -194,13 +197,18 @@ public function map_post( $byte_offset, $data ) { --$this->orphan_post_counter; } + $post_parent = null; + + if ( array_key_exists( 'post_parent', $data ) && '0' !== $data['post_parent'] ) { + $post_parent = $data['post_parent']; + } + $wpdb->insert( self::get_table_name(), array( 'element_type' => self::ELEMENT_TYPE_POST, 'element_id' => (string) $data['post_id'], - 'parent_id' => array_key_exists( 'parent_id', $data ) ? (string) $data['parent_id'] : null, - 'parent' => '', + 'parent_id' => $post_parent, 'byte_offset' => $byte_offset, ) ); diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index 840a1805ef..b12053655c 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -33,12 +33,6 @@ public function clean_up_uploads(): void { } } - public function test_import_simple_wxr() { - $import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' ); - - $this->assertTrue( $import ); - } - public function test_frontloading() { $wxr_path = __DIR__ . '/wxr/frontloading-1-attachment.xml'; $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); From 7ba5337d9432c5b68bb016fba2a4b873bfcdb4a2 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 6 Dec 2024 14:37:16 +0100 Subject: [PATCH 25/51] Add start session --- .../src/cli/WP_Import_Command.php | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php index a6ad68fdcc..a8fecc370a 100644 --- a/packages/playground/data-liberation/src/cli/WP_Import_Command.php +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -112,6 +112,28 @@ public function import( $args, $assoc_args ) { } } + private function start_session( $args ) { + if ( $this->dry_run ) { + WP_CLI::line( 'Dry run enabled. No session created.' ); + + return; + } + + $active_session = WP_Import_Session::get_active(); + + if ( $active_session ) { + $this->import_session = $active_session; + + $id = $this->import_session->get_id(); + WP_CLI::line( WP_CLI::colorize( "New session: %g{$id}%n" ) ); + } else { + $this->import_session = WP_Import_Session::create( $args ); + + $id = $this->import_session->get_id(); + WP_CLI::line( WP_CLI::colorize( "Current session: %g{$id}%n" ) ); + } + } + /** * Import a WXR file. * @@ -119,8 +141,9 @@ public function import( $args, $assoc_args ) { * @return void */ private function import_wxr_file( $file_path, $options = array() ) { - $this->wxr_path = $file_path; - $this->import_session = WP_Import_Session::create( + $this->wxr_path = $file_path; + + $this->start_session( array( 'data_source' => 'wxr_file', 'file_name' => $file_path, @@ -138,11 +161,12 @@ private function import_wxr_file( $file_path, $options = array() ) { * @return void */ private function import_wxr_url( $url, $options = array() ) { - $this->wxr_path = $url; - $this->import_session = WP_Import_Session::create( + $this->wxr_path = $url; + + $this->start_session( array( 'data_source' => 'wxr_url', - 'source_url' => $url, + 'file_name' => $url, ) ); @@ -158,6 +182,10 @@ private function import_wxr() { WP_CLI::error( 'Could not create importer' ); } + if ( ! $this->import_session ) { + WP_CLI::error( 'Could not create session' ); + } + WP_CLI::line( "Importing {$this->wxr_path}" ); if ( $this->dry_run ) { From 6b7e3156ce609160e327a4d47c4101fcfe22d296 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Mon, 9 Dec 2024 12:06:46 +0100 Subject: [PATCH 26/51] Add support for sessions --- .../src/cli/WP_Import_Command.php | 10 +- .../src/import/WP_Stream_Importer.php | 14 +- .../src/import/WP_Topological_Sorter.php | 261 ++++++++++++------ 3 files changed, 186 insertions(+), 99 deletions(-) diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php index a8fecc370a..ca9240c9a5 100644 --- a/packages/playground/data-liberation/src/cli/WP_Import_Command.php +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -125,12 +125,12 @@ private function start_session( $args ) { $this->import_session = $active_session; $id = $this->import_session->get_id(); - WP_CLI::line( WP_CLI::colorize( "New session: %g{$id}%n" ) ); + WP_CLI::line( WP_CLI::colorize( "Current session: %g{$id}%n" ) ); } else { $this->import_session = WP_Import_Session::create( $args ); $id = $this->import_session->get_id(); - WP_CLI::line( WP_CLI::colorize( "Current session: %g{$id}%n" ) ); + WP_CLI::line( WP_CLI::colorize( "New session: %g{$id}%n" ) ); } } @@ -150,6 +150,9 @@ private function import_wxr_file( $file_path, $options = array() ) { ) ); + // Pass the session ID. + $options['session_id'] = $this->import_session->get_id(); + $this->importer = WP_Stream_Importer::create_for_wxr_file( $file_path, $options ); $this->import_wxr(); } @@ -170,6 +173,9 @@ private function import_wxr_url( $url, $options = array() ) { ) ); + // Pass the session ID. + $options['session_id'] = $this->import_session->get_id(); + $this->importer = WP_Stream_Importer::create_for_wxr_url( $url, $options ); $this->import_wxr(); } diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 60b9a4afe3..6ed98ba5fe 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -229,8 +229,13 @@ public function next_step( $count = 10000 ) { if ( true === $this->topological_sort_next_entity( $count ) ) { return true; } + + // We indexed all the entities. Now sort them topologically. + $this->topological_sorter->sort_topologically(); + $this->topological_sorter = null; + $this->stage = self::STAGE_FRONTLOAD_ASSETS; - return true; + return false; case self::STAGE_FRONTLOAD_ASSETS: if ( true === $this->frontload_next_entity() ) { return true; @@ -421,13 +426,12 @@ private function topological_sort_next_entity( $count = 10000 ) { if ( null === $this->entity_iterator ) { $this->entity_iterator = $this->create_entity_iterator(); - $this->topological_sorter = new WP_Topological_Sorter(); + $this->topological_sorter = new WP_Topological_Sorter( $this->options ); } if ( ! $this->entity_iterator->valid() ) { - $this->entity_iterator = null; - $this->resume_at_entity = null; - $this->topological_sorter = null; + $this->entity_iterator = null; + $this->resume_at_entity = null; return false; } diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 405296f8a2..bed8b9cd12 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -50,6 +50,37 @@ class WP_Topological_Sorter { */ protected $sorted = false; + /** + * The current session ID. + */ + protected $current_session = null; + + /** + * The total number of categories. + */ + protected $total_categories = 0; + + /** + * The total number of posts. + */ + protected $total_posts = 0; + + /** + * The current item being processed. + */ + protected $current_item = 0; + + public function __construct( $options = array() ) { + if ( array_key_exists( 'session_id', $options ) ) { + $this->current_session = $options['session_id']; + } + } + + /** + * Get the name of the table. + * + * @return string The name of the table. + */ public static function get_table_name() { global $wpdb; @@ -71,20 +102,23 @@ public static function activate() { $sql = $wpdb->prepare( 'CREATE TABLE IF NOT EXISTS %i ( id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id INTEGER NOT NULL, element_type INTEGER NOT NULL default %d, element_id TEXT NOT NULL, parent_id TEXT DEFAULT NULL, byte_offset INTEGER NOT NULL, - hierarchy_level TEXT DEFAULT NULL + sort_order int DEFAULT 1 ); CREATE UNIQUE INDEX IF NOT EXISTS idx_element_id ON %i (element_id); + CREATE INDEX IF NOT EXISTS idx_session_id ON %i (session_id); CREATE INDEX IF NOT EXISTS idx_parent_id ON %i (parent_id); CREATE INDEX IF NOT EXISTS idx_byte_offset ON %i (byte_offset);', $table_name, self::ELEMENT_TYPE_POST, $table_name, $table_name, + $table_name, $table_name ); } else { @@ -95,12 +129,14 @@ public static function activate() { $sql = $wpdb->prepare( 'CREATE TABLE IF NOT EXISTS %i ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, + session_id bigint(20) unsigned NOT NULL, element_type tinyint(1) NOT NULL default %d, element_id text NOT NULL, parent_id text DEFAULT NULL, byte_offset bigint(20) unsigned NOT NULL, - hierarchy_level text DEFAULT NULL, + sort_order int DEFAULT 1, PRIMARY KEY (id), + KEY session_id (session_id), KEY element_id (element_id(%d)), KEY parent_id (parent_id(%d)), KEY byte_offset (byte_offset) @@ -153,8 +189,34 @@ public function reset() { $this->orphan_post_counter = 0; $this->last_post_id = 0; $this->sorted = false; + $this->current_session = null; + $this->total_categories = 0; + $this->total_posts = 0; + $this->current_item = 0; } + /** + * Delete all rows for a given session ID. + * + * @param int $session_id The session ID to delete rows for. + * @return int|false The number of rows deleted, or false on error. + */ + public function delete_session( $session_id ) { + global $wpdb; + + return $wpdb->delete( + self::get_table_name(), + array( 'session_id' => $session_id ), + array( '%d' ) + ); + } + + /** + * Map a category to the index. + * + * @param int $byte_offset The byte offset of the category. + * @param array $data The category data. + */ public function map_category( $byte_offset, $data ) { global $wpdb; @@ -171,14 +233,25 @@ public function map_category( $byte_offset, $data ) { $wpdb->insert( self::get_table_name(), array( + 'session_id' => $this->current_session, 'element_type' => self::ELEMENT_TYPE_CATEGORY, 'element_id' => (string) $data['term_id'], 'parent_id' => $category_parent, 'byte_offset' => $byte_offset, + // Items with a parent has at least a sort order of 2. + 'sort_order' => $category_parent ? 2 : 1, ) ); + + ++$this->total_categories; } + /** + * Map a post to the index. + * + * @param int $byte_offset The byte offset of the post. + * @param array $data The post data. + */ public function map_post( $byte_offset, $data ) { global $wpdb; @@ -206,12 +279,16 @@ public function map_post( $byte_offset, $data ) { $wpdb->insert( self::get_table_name(), array( + 'session_id' => $this->current_session, 'element_type' => self::ELEMENT_TYPE_POST, 'element_id' => (string) $data['post_id'], 'parent_id' => $post_parent, 'byte_offset' => $byte_offset, + 'sort_order' => $post_parent ? 2 : 1, ) ); + + ++$this->total_posts; } return true; @@ -224,7 +301,7 @@ public function map_post( $byte_offset, $data ) { * * @return int|bool The byte offset of the post, or false if the post is not found. */ - public function get_post_byte_offset( $id ) { + public function get_post_byte_offset( $session_id, $id ) { global $wpdb; if ( ! $this->sorted ) { @@ -233,10 +310,11 @@ public function get_post_byte_offset( $id ) { return $wpdb->get_var( $wpdb->prepare( - 'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d', + 'SELECT byte_offset FROM %i WHERE element_id = %s AND element_type = %d AND session_id = %d LIMIT 1', self::get_table_name(), - $id, - self::ELEMENT_TYPE_POST + (string) $id, + self::ELEMENT_TYPE_POST, + (string) $session_id ) ); } @@ -248,7 +326,7 @@ public function get_post_byte_offset( $id ) { * * @return int|bool The byte offset of the category, or false if the category is not found. */ - public function get_category_byte_offset( $slug ) { + public function get_category_byte_offset( $session_id, $slug ) { global $wpdb; if ( ! $this->sorted ) { @@ -257,14 +335,50 @@ public function get_category_byte_offset( $slug ) { return $wpdb->get_var( $wpdb->prepare( - 'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d', + 'SELECT byte_offset FROM %i WHERE element_id = %s AND element_type = %d AND session_id = %d LIMIT 1', self::get_table_name(), - $id, - self::ELEMENT_TYPE_CATEGORY + (string) $slug, + self::ELEMENT_TYPE_CATEGORY, + (string) $session_id ) ); } + /** + * Get the next item to process. + * + * @param int $session_id The session ID to get the next item from. + * + * @return array|bool The next item to process, or false if there are no more items. + */ + public function next_item( $element_type, $session_id = null ) { + global $wpdb; + + if ( ! $this->sorted || ( 0 === $this->total_posts && 0 === $this->total_categories ) ) { + return false; + } + + if ( null === $session_id ) { + $session_id = $this->current_session; + } + + $next_item = $wpdb->get_row( + $wpdb->prepare( + 'SELECT * FROM %i WHERE element_type = %d ORDER BY sort_order ASC LIMIT 1 OFFSET %d', + self::get_table_name(), + $element_type, + $this->current_item + ), + ARRAY_A + ); + + if ( ! $next_item ) { + return null; + } + + return $next_item; + } + public function is_sorted() { return $this->sorted; } @@ -275,33 +389,10 @@ public function is_sorted() { * Elements should not be processed before their parent has been processed. * This method sorts the elements in the order they should be processed. */ - public function sort_topologically( $free_space = true ) { - /*foreach ( $this->categories as $slug => $category ) { - // $this->topological_category_sort( $slug, $category ); - }*/ - + public function sort_topologically() { $this->sort_elements( self::ELEMENT_TYPE_POST ); $this->sort_elements( self::ELEMENT_TYPE_CATEGORY ); - // Free some space. - if ( $free_space ) { - /* - * @TODO: all the elements that have not been moved can be flushed away. - * - foreach ( $this->posts as $id => $element ) { - // Save only the byte offset. - $this->posts[ $id ] = $element[1]; - } - - /* - * @TODO: all the elements that have not been moved can be flushed away. - * - foreach ( $this->categories as $slug => $element ) { - // Save only the byte offset. - $this->categories[ $slug ] = $element[1]; - }*/ - } - $this->sorted = true; } @@ -315,70 +406,56 @@ private function sort_elements( $type ) { global $wpdb; $table_name = self::get_table_name(); - return $wpdb->query( - $wpdb->prepare( - // Perform a topological sort CTE. - 'WITH RECURSIVE recursive_hierarchy AS ( - -- Anchor member: select root nodes (nodes with no parent) - SELECT - element_id, - parent_id, - element_id AS hierarchy_path - FROM - %i - WHERE - parent_id IS NULL AND element_type = %d - - UNION ALL - - -- Recursive member: join child nodes to their parents - SELECT - child.element_id, - child.parent_id, - parent.hierarchy_path || \'.\' || child.element_id AS hierarchy_path - FROM - %i child - JOIN - recursive_hierarchy parent ON child.parent_id = parent.element_id - WHERE child.element_type = %d + if ( self::is_sqlite() ) { + // SQLite recursive CTE query to perform topological sort + return $wpdb->query( + $wpdb->prepare( + 'WITH RECURSIVE sorted_elements AS ( + SELECT element_id, parent_id, ROW_NUMBER() OVER () AS sort_order + FROM %i + WHERE parent_id IS NULL AND element_type = %d + UNION ALL + SELECT e.element_id, e.parent_id, se.sort_order + 1 + FROM %i e + INNER JOIN sorted_elements se + ON e.parent_id = se.element_id AND e.element_type = %d + ) + UPDATE %i SET sort_order = ( + SELECT sort_order + FROM sorted_elements s + WHERE s.element_id = %i.element_id + ) + WHERE element_type = %d;', + $table_name, + $type, + $table_name, + $type, + $table_name, + $table_name, + $type ) + ); + } - -- Update the table with computed hierarchy paths - UPDATE %i - SET hierarchy_path = ( - SELECT hierarchy_path - FROM recursive_hierarchy - WHERE %i.element_id = recursive_hierarchy.element_id - );', + // MySQL version - update sort_order using a subquery + return $wpdb->query( + $wpdb->prepare( + 'UPDATE %i t1 + JOIN ( + SELECT element_id, + @sort := @sort + 1 AS new_sort_order + FROM %i + CROSS JOIN (SELECT @sort := 0) AS sort_var + WHERE element_type = %d + ORDER BY COALESCE(parent_id, "0"), element_id + ) t2 ON t1.element_id = t2.element_id + SET t1.sort_order = t2.new_sort_order + WHERE t1.element_type = %d', $table_name, - $type, $table_name, $type, - $table_name, - $table_name + $type ) ); } - - /** - * Recursive categories topological sorting. - * - * @param int $slug The slug of the category to sort. - * @param array $category The category to sort. - * - * @todo Check for circular dependencies. - */ - private function topological_category_sort( $slug, $category ) { - if ( isset( $this->categories[ $slug ]['visited'] ) ) { - return; - } - - $this->categories[ $slug ]['visited'] = true; - - if ( isset( $this->categories[ $category['parent'] ] ) ) { - $this->topological_category_sort( $category['parent'], $this->categories[ $category['parent'] ] ); - } - - $this->category_index[] = $category['byte_offset']; - } } From 76d883c2091511e899b8f19519e00fec292733cb Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Mon, 9 Dec 2024 17:25:54 +0100 Subject: [PATCH 27/51] Add categories check --- .../src/import/WP_Entity_Importer.php | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index bbcf394819..60f7ec3228 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -258,6 +258,7 @@ public function import_user( $data ) { } public function import_term( $data ) { + // print_r( $data ); /** * Pre-process term data. * @@ -270,8 +271,7 @@ public function import_term( $data ) { } $original_id = isset( $data['id'] ) ? (int) $data['id'] : 0; - $parent_id = isset( $data['parent'] ) ? (int) $data['parent'] : 0; - + $parent = isset( $data['parent'] ) ? $data['parent'] : null; $mapping_key = sha1( $data['taxonomy'] . ':' . $data['slug'] ); $existing = $this->term_exists( $data ); if ( $existing ) { @@ -295,15 +295,17 @@ public function import_term( $data ) { $termdata = array(); $allowed = array( - 'slug' => true, 'description' => true, + 'name' => true, + 'slug' => true, + 'parent' => true, ); // Map the parent comment, or mark it as one we need to fix - // TODO: add parent mapping and remapping - /*$requires_remapping = false; - if ( $parent_id ) { - if ( isset( $this->mapping['term'][ $parent_id ] ) ) { + if ( $parent ) { + // TODO: add parent mapping and remapping + // $requires_remapping = false; + /*if ( isset( $this->mapping['term'][ $parent_id ] ) ) { $data['parent'] = $this->mapping['term'][ $parent_id ]; } else { // Prepare for remapping later @@ -312,9 +314,30 @@ public function import_term( $data ) { // Wipe the parent for now $data['parent'] = 0; + }*/ + $parent_term = term_exists( $parent, $data['taxonomy'] ); + + if ( $parent_term ) { + $data['parent'] = $parent_term['term_id']; + } else { + // It can happens that the parent term is not imported yet in manually created WXR files. + $parent_term = wp_insert_term( $parent, $data['taxonomy'] ); + + if ( is_wp_error( $parent_term ) ) { + $this->logger->error( + sprintf( + /* translators: %s: taxonomy name */ + __( 'Failed to import parent term for "%s"', 'wordpress-importer' ), + $data['taxonomy'] + ) + ); + } else { + $data['parent'] = $parent_term['term_id']; + } } - }*/ + } + // Filter the term data to only include allowed keys. foreach ( $data as $key => $value ) { if ( ! isset( $allowed[ $key ] ) ) { continue; @@ -323,7 +346,17 @@ public function import_term( $data ) { $termdata[ $key ] = $data[ $key ]; } - $result = wp_insert_term( $data['name'], $data['taxonomy'], $termdata ); + $term = term_exists( $data['name'], $data['taxonomy'] ); + $result = null; + + if ( is_array( $term ) ) { + // Update the existing term. + $result = wp_update_term( $term['term_id'], $data['taxonomy'], $termdata ); + } else { + // Create a new term. + $result = wp_insert_term( $data['name'], $data['taxonomy'], $termdata ); + } + if ( is_wp_error( $result ) ) { $this->logger->warning( sprintf( From 7927933ee0750af17642c27173fab458108ada61 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Mon, 9 Dec 2024 21:47:59 +0100 Subject: [PATCH 28/51] Fix: wrong name --- .../data-liberation/src/import/WP_Stream_Importer.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 6ed98ba5fe..b0cb6e3f4b 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -223,6 +223,7 @@ public function next_step( $count = 10000 ) { if ( true === $this->index_next_entities( $count ) ) { return true; } + $this->next_stage = self::STAGE_TOPOLOGICAL_SORT; return false; case self::STAGE_TOPOLOGICAL_SORT: @@ -234,7 +235,7 @@ public function next_step( $count = 10000 ) { $this->topological_sorter->sort_topologically(); $this->topological_sorter = null; - $this->stage = self::STAGE_FRONTLOAD_ASSETS; + $this->next_stage = self::STAGE_FRONTLOAD_ASSETS; return false; case self::STAGE_FRONTLOAD_ASSETS: if ( true === $this->frontload_next_entity() ) { From 4d612a66472e519a6a91501c27f9cb254d73ca59 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Mon, 9 Dec 2024 21:49:20 +0100 Subject: [PATCH 29/51] Partial tests rework --- .../tests/WPStreamImporterTests.php | 9 +++++ .../tests/WPTopologicalSorterTests.php | 34 +++++++++---------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index b12053655c..8200da9d1f 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -130,6 +130,15 @@ public function test_sort_categories() { } } + public function test_hierarchical_term_import() { + $wxr_path = __DIR__ . '/wxr/small-export.xml'; + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); + + do { + while ( $importer->next_step( 1 ) ) {} + } while ( $importer->advance_to_next_stage() ); + } + private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) { do { while ( $importer->next_step() ) { diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 6f732b5d24..d3b7a5ac48 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -19,8 +19,8 @@ public function test_import_one_post() { $sorter = new WP_Topological_Sorter(); $this->assertTrue( $sorter->map_post( 0, $this->generate_post( 1 ) ) ); - $this->assertCount( 1, $sorter->posts ); - $this->assertEquals( 1, array_keys( $sorter->posts )[0] ); + $this->assertEquals( 1, $sorter->get_total_posts() ); + $this->assertEquals( 1, $sorter->next_post()['byte_offset'] ); } public function test_parent_after_child() { @@ -30,9 +30,9 @@ public function test_parent_after_child() { $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); + // $this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts ); + $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); + $this->assertEquals( 20, $sorter->next_post()['byte_offset'] ); $this->assertFalse( $sorter->is_sorted() ); } @@ -44,8 +44,8 @@ public function test_child_after_parent() { $sorter->map_post( 30, $this->generate_post( 3, 2 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); + // $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); + $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); } public function test_orphaned_post() { @@ -55,9 +55,9 @@ public function test_orphaned_post() { $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); + // $this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts ); + $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); + $this->assertEquals( 20, $sorter->next_post()['byte_offset'] ); } public function test_chain_parent_child_after() { @@ -68,7 +68,7 @@ public function test_chain_parent_child_after() { $sorter->map_post( 30, $this->generate_post( 3, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); + // $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); } public function test_reverse_order() { @@ -77,7 +77,7 @@ public function test_reverse_order() { $this->multiple_map_posts( $sorter, array( 3, 2, 1 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); + // $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); } public function test_get_byte_offsets_consume_array() { @@ -86,12 +86,12 @@ public function test_get_byte_offsets_consume_array() { $this->multiple_map_posts( $sorter, array( 2, 3, 0 ) ); $sorter->sort_topologically(); - $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); + // $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); - $this->assertEquals( 30, $sorter->get_post_byte_offset( 3 ) ); - $this->assertCount( 0, $sorter->posts ); + $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); + $this->assertEquals( 20, $sorter->next_post()['byte_offset'] ); + $this->assertEquals( 30, $sorter->next_post()['byte_offset'] ); + $this->assertEquals( 0, $sorter->get_total_posts() ); } /** From 4465eab9f27e9ec9194b7390fbd0fba1fc4c3872 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Tue, 10 Dec 2024 11:34:41 +0100 Subject: [PATCH 30/51] Add comments test --- .../src/import/WP_Topological_Sorter.php | 4 +- .../tests/WPStreamImporterTests.php | 38 ++++++++- .../wxr/test-serialized-comment-meta.xml | 84 +++++++++++++++++++ 3 files changed, 121 insertions(+), 5 deletions(-) create mode 100644 packages/playground/data-liberation/tests/wxr/test-serialized-comment-meta.xml diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index bed8b9cd12..b815f2f839 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -390,8 +390,8 @@ public function is_sorted() { * This method sorts the elements in the order they should be processed. */ public function sort_topologically() { - $this->sort_elements( self::ELEMENT_TYPE_POST ); - $this->sort_elements( self::ELEMENT_TYPE_CATEGORY ); + // $this->sort_elements( self::ELEMENT_TYPE_POST ); + // $this->sort_elements( self::ELEMENT_TYPE_CATEGORY ); $this->sorted = true; } diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index 8200da9d1f..6cfd553c6b 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -130,15 +130,47 @@ public function test_sort_categories() { } } - public function test_hierarchical_term_import() { - $wxr_path = __DIR__ . '/wxr/small-export.xml'; + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/comment-meta.php + */ + public function test_serialized_comment_meta() { + $wxr_path = __DIR__ . '/wxr/test-serialized-comment-meta.xml'; $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); do { - while ( $importer->next_step( 1 ) ) {} + while ( $importer->next_step( 1 ) ) { + // noop + } } while ( $importer->advance_to_next_stage() ); + + $expected_string = '¯\_(ツ)_/¯'; + $expected_array = array( 'key' => '¯\_(ツ)_/¯' ); + + $comments_count = wp_count_comments(); + // Note: using assertEquals() as the return type changes across different WP versions - numeric string vs int. + $this->assertEquals( 1, $comments_count->approved ); + + $comments = get_comments(); + $this->assertCount( 1, $comments ); + + $comment = $comments[0]; + $this->assertSame( $expected_string, get_comment_meta( $comment->comment_ID, 'string', true ) ); + $this->assertSame( $expected_array, get_comment_meta( $comment->comment_ID, 'array', true ) ); } + /*public function test_hierarchical_term_import() { + $wxr_path = __DIR__ . '/wxr/small-export.xml'; + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); + + do { + while ( $importer->next_step( 1 ) ) { + + } + } while ( $importer->advance_to_next_stage() ); + }*/ + private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) { do { while ( $importer->next_step() ) { diff --git a/packages/playground/data-liberation/tests/wxr/test-serialized-comment-meta.xml b/packages/playground/data-liberation/tests/wxr/test-serialized-comment-meta.xml new file mode 100644 index 0000000000..8cc47132c6 --- /dev/null +++ b/packages/playground/data-liberation/tests/wxr/test-serialized-comment-meta.xml @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + Test With Serialized Comment Meta + http://test.wordpress.org/ + Just another blog + Mon, 30 Nov 2009 21:35:27 +0000 + http://wordpress.org/?v=2.8.4 + en + 1.0 + http://test.wordpress.org/ + http://test.wordpress.org/ + + + My Entry with comments and comment meta + http://test.wordpress.org/comment-meta + Tue, 30 Nov 1999 00:00:00 +0000 + + http://test.wordpress.org/comment-meta + + + + 10 + 2009-10-20 16:13:20 + 0000-00-00 00:00:00 + open + open + + draft + 0 + 0 + post + + + + 1 + + + https://wordpress.org/ + + + + Gravatar.]]> + + + 0 + 0 + + + + + + + + + + + + From 3ae9af2c51fed7006fe9a1c30a325ffc77acb5f4 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 11:20:50 +0100 Subject: [PATCH 31/51] New sorter indexing --- .../src/import/WP_Entity_Importer.php | 40 +- .../src/import/WP_Stream_Importer.php | 34 +- .../src/import/WP_Topological_Sorter.php | 345 +++++++++++++----- .../tests/WPStreamImporterTests.php | 45 ++- 4 files changed, 343 insertions(+), 121 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index 60f7ec3228..8334b7fb44 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -258,7 +258,6 @@ public function import_user( $data ) { } public function import_term( $data ) { - // print_r( $data ); /** * Pre-process term data. * @@ -301,7 +300,7 @@ public function import_term( $data ) { 'parent' => true, ); - // Map the parent comment, or mark it as one we need to fix + // Map the parent term, or mark it as one we need to fix if ( $parent ) { // TODO: add parent mapping and remapping // $requires_remapping = false; @@ -316,13 +315,13 @@ public function import_term( $data ) { $data['parent'] = 0; }*/ $parent_term = term_exists( $parent, $data['taxonomy'] ); - + if ( $parent_term ) { $data['parent'] = $parent_term['term_id']; } else { // It can happens that the parent term is not imported yet in manually created WXR files. $parent_term = wp_insert_term( $parent, $data['taxonomy'] ); - + if ( is_wp_error( $parent_term ) ) { $this->logger->error( sprintf( @@ -470,6 +469,8 @@ protected function post_exists( $data ) { * Note that new/updated terms, comments and meta are imported for the last of the above. */ public function import_post( $data ) { + $parent_id = isset( $data['post_parent'] ) ? (int) $data['post_parent'] : 0; + /** * Pre-process post data. * @@ -478,17 +479,16 @@ public function import_post( $data ) { * @param array $comments Comments on the post. * @param array $terms Terms on the post. */ - $data = apply_filters( 'wxr_importer_pre_process_post', $data ); + $data = apply_filters( 'wxr_importer_pre_process_post', $data, $parent_id ); if ( empty( $data ) ) { $this->logger->debug( 'Skipping post, empty data' ); return false; } $original_id = isset( $data['post_id'] ) ? (int) $data['post_id'] : 0; - $parent_id = isset( $data['post_parent'] ) ? (int) $data['post_parent'] : 0; // Have we already processed this? - if ( isset( $this->mapping['post'][ $original_id ] ) ) { + if ( isset( $element['_already_mapped'] ) ) { $this->logger->debug( 'Skipping post, already processed' ); return; } @@ -675,6 +675,7 @@ public function import_post( $data ) { * @param array $terms Raw term data, already processed. */ do_action( 'wxr_importer_processed_post', $post_id, $data ); + return $post_id; } @@ -940,6 +941,8 @@ public function import_post_meta( $meta_item, $post_id ) { } } + do_action( 'wxr_importer_processed_post_meta', $post_id, $meta_item ); + return true; } @@ -1032,7 +1035,10 @@ public function import_comment( $comment, $post_id, $post_just_imported = false } // Run standard core filters - $comment['comment_post_ID'] = $post_id; + if ( ! $comment['comment_post_ID'] ) { + $comment['comment_post_ID'] = $post_id; + } + // @TODO: How to handle missing fields? Use sensible defaults? What defaults? if ( ! isset( $comment['comment_author_IP'] ) ) { $comment['comment_author_IP'] = ''; @@ -1069,17 +1075,27 @@ public function import_comment( $comment, $post_id, $post_just_imported = false /** * Post processing completed. * - * @param int $post_id New post ID. + * @param int $comment_id New comment ID. * @param array $comment Raw data imported for the comment. - * @param array $meta Raw meta data, already processed by {@see process_post_meta}. * @param array $post_id Parent post ID. */ do_action( 'wxr_importer_processed_comment', $comment_id, $comment, $post_id ); } public function import_comment_meta( $meta_item, $comment_id ) { - $value = maybe_unserialize( $meta_item['value'] ); - add_comment_meta( $comment_id, wp_slash( $meta_item['key'] ), wp_slash( $value ) ); + $meta_item = apply_filters( 'wxr_importer_pre_process_comment_meta', $meta_item, $comment_id ); + if ( empty( $meta_item ) ) { + return false; + } + + if ( ! isset( $meta_item['comment_id'] ) ) { + $meta_item['comment_id'] = $comment_id; + } + + $value = maybe_unserialize( $meta_item['meta_value'] ); + $comment_meta_id = add_comment_meta( $meta_item['comment_id'], wp_slash( $meta_item['meta_key'] ), wp_slash( $value ) ); + + do_action( 'wxr_importer_processed_comment_meta', $comment_meta_id, $meta_item, $comment_id ); } /** diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index b0cb6e3f4b..9696c6c92e 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -231,10 +231,6 @@ public function next_step( $count = 10000 ) { return true; } - // We indexed all the entities. Now sort them topologically. - $this->topological_sorter->sort_topologically(); - $this->topological_sorter = null; - $this->next_stage = self::STAGE_FRONTLOAD_ASSETS; return false; case self::STAGE_FRONTLOAD_ASSETS: @@ -283,6 +279,10 @@ private function index_next_entities( $count = 10000 ) { $this->entity_iterator = $this->create_entity_iterator(); } + if ( null === $this->topological_sorter ) { + $this->topological_sorter = new WP_Topological_Sorter( $this->options ); + } + // Reset the counts and URLs found in the previous pass. $this->indexed_entities_counts = array(); $this->indexed_assets_urls = array(); @@ -426,7 +426,10 @@ private function topological_sort_next_entity( $count = 10000 ) { } if ( null === $this->entity_iterator ) { - $this->entity_iterator = $this->create_entity_iterator(); + $this->entity_iterator = $this->create_entity_iterator(); + } + + if ( null === $this->topological_sorter ) { $this->topological_sorter = new WP_Topological_Sorter( $this->options ); } @@ -447,17 +450,8 @@ private function topological_sort_next_entity( $count = 10000 ) { $entity = $this->entity_iterator->current(); $data = $entity->get_data(); - $offset = $this->entity_iterator->get_last_xml_byte_offset_outside_of_entity(); - - switch ( $entity->get_type() ) { - case 'category': - $this->topological_sorter->map_category( $offset, $data ); - break; - case 'post': - $this->topological_sorter->map_post( $offset, $data ); - break; - } - + // $offset = $this->entity_iterator->get_last_xml_byte_offset_outside_of_entity(); + $this->topological_sorter->map_element( $entity->get_type(), $data ); $this->entity_iterator->next(); } @@ -485,6 +479,10 @@ private function frontload_next_entity() { $this->downloader = new WP_Attachment_Downloader( $this->options['uploads_path'] ); } + if ( null === $this->topological_sorter ) { + $this->topological_sorter = new WP_Topological_Sorter( $this->options ); + } + // Clear the frontloading events from the previous pass. $this->frontloading_events = array(); $this->frontloading_advance_reentrancy_cursor(); @@ -590,6 +588,10 @@ private function import_next_entity() { $this->importer = new WP_Entity_Importer(); } + if ( null === $this->topological_sorter ) { + $this->topological_sorter = new WP_Topological_Sorter( $this->options ); + } + if ( ! $this->entity_iterator->valid() ) { // We're done. $this->stage = self::STAGE_FINISHED; diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index b815f2f839..c7bcde2ddd 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -3,16 +3,14 @@ /** * The topological sorter class. * - * We create an in-memory index that contains offsets and lengths of items in the WXR. - * The indexer will also topologically sort posts so that the order we iterate over posts - * ensures we always get parents before their children. + * We create a custom table that contains the WXR IDs and the mapped IDs. */ class WP_Topological_Sorter { /** * The base name of the table. */ - const TABLE_NAME = 'data_liberation_index'; + const TABLE_NAME = 'data_liberation_map'; /** * The option name for the database version. @@ -24,10 +22,6 @@ class WP_Topological_Sorter { */ const DB_VERSION = 1; - // Element types. - const ELEMENT_TYPE_POST = 1; - const ELEMENT_TYPE_CATEGORY = 2; - /** * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID. * To prevent duplicate post ID, we'll use negative number. @@ -55,11 +49,6 @@ class WP_Topological_Sorter { */ protected $current_session = null; - /** - * The total number of categories. - */ - protected $total_categories = 0; - /** * The total number of posts. */ @@ -70,10 +59,58 @@ class WP_Topological_Sorter { */ protected $current_item = 0; + const ENTITY_TYPES = array( + 'comment' => 1, + 'comment_meta' => 2, + 'post' => 3, + 'post_meta' => 4, + 'term' => 5, + ); + + private $mapped_pre_filters = array( + // Name of the filter, and the number of arguments it accepts. + 'wxr_importer_pre_process_comment' => 2, + 'wxr_importer_pre_process_comment_meta' => 2, + 'wxr_importer_pre_process_post' => 2, + 'wxr_importer_pre_process_post_meta' => 2, + 'wxr_importer_pre_process_term' => 1, + ); + + private $mapped_post_actions = array( + // Name of the filter, and the number of arguments it accepts. + 'wxr_importer_processed_comment' => 3, + 'wxr_importer_processed_comment_meta' => 3, + 'wxr_importer_processed_post' => 2, + 'wxr_importer_processed_post_meta' => 2, + 'wxr_importer_processed_term' => 2, + ); + public function __construct( $options = array() ) { if ( array_key_exists( 'session_id', $options ) ) { $this->current_session = $options['session_id']; } + + // The topological sorter needs to know about the mapped IDs for comments, terms, and posts. + foreach ( $this->mapped_pre_filters as $name => $accepted_args ) { + add_filter( $name, array( $this, 'filter_wxr_importer_pre_process' ), 10, $accepted_args ); + } + + foreach ( $this->mapped_post_actions as $name => $accepted_args ) { + add_action( $name, array( $this, 'action_wxr_importer_processed' ), 10, $accepted_args ); + } + } + + /** + * Remove the filters. + */ + public function __destruct() { + foreach ( $this->mapped_pre_filters as $name => $accepted_args ) { + remove_filter( $name, array( $this, 'filter_wxr_importer_pre_process' ) ); + } + + foreach ( $this->mapped_post_actions as $name => $accepted_args ) { + remove_action( $name, array( $this, 'action_wxr_importer_processed' ) ); + } } /** @@ -103,8 +140,9 @@ public static function activate() { 'CREATE TABLE IF NOT EXISTS %i ( id INTEGER PRIMARY KEY AUTOINCREMENT, session_id INTEGER NOT NULL, - element_type INTEGER NOT NULL default %d, + element_type INTEGER NOT NULL, element_id TEXT NOT NULL, + mapped_id TEXT DEFAULT NULL, parent_id TEXT DEFAULT NULL, byte_offset INTEGER NOT NULL, sort_order int DEFAULT 1 @@ -115,7 +153,6 @@ public static function activate() { CREATE INDEX IF NOT EXISTS idx_parent_id ON %i (parent_id); CREATE INDEX IF NOT EXISTS idx_byte_offset ON %i (byte_offset);', $table_name, - self::ELEMENT_TYPE_POST, $table_name, $table_name, $table_name, @@ -130,8 +167,9 @@ public static function activate() { 'CREATE TABLE IF NOT EXISTS %i ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, session_id bigint(20) unsigned NOT NULL, - element_type tinyint(1) NOT NULL default %d, + element_type tinyint(1) NOT NULL, element_id text NOT NULL, + mapped_id text DEFAULT NULL, parent_id text DEFAULT NULL, byte_offset bigint(20) unsigned NOT NULL, sort_order int DEFAULT 1, @@ -142,7 +180,7 @@ public static function activate() { KEY byte_offset (byte_offset) ) ' . $wpdb->get_charset_collate(), self::get_table_name(), - self::ELEMENT_TYPE_POST, + 1, $max_index_length, $max_index_length ); @@ -190,7 +228,6 @@ public function reset() { $this->last_post_id = 0; $this->sorted = false; $this->current_session = null; - $this->total_categories = 0; $this->total_posts = 0; $this->current_item = 0; } @@ -212,111 +249,243 @@ public function delete_session( $session_id ) { } /** - * Map a category to the index. + * Called by 'wxr_importer_pre_process_*' filters. This populates the entity + * object with the mapped IDs. * - * @param int $byte_offset The byte offset of the category. - * @param array $data The category data. + * @param array $data The data to map. + * @param int|null $id The ID of the element. + * @param int|null $additional_id The additional ID of the element. */ - public function map_category( $byte_offset, $data ) { - global $wpdb; + public function filter_wxr_importer_pre_process( $data, $id = null, $additional_id = null ) { + $current_session = $this->current_session; + $current_filter = current_filter(); + $types = array( + 'wxr_importer_pre_process_comment' => 'comment', + 'wxr_importer_pre_process_comment_meta' => 'comment_meta', + 'wxr_importer_pre_process_post' => 'post', + 'wxr_importer_pre_process_post_meta' => 'post_meta', + 'wxr_importer_pre_process_term' => 'term', + ); + + if ( ! $current_filter || ! array_key_exists( $current_filter, $types ) ) { + _doing_it_wrong( + __METHOD__, + 'This method should be called by the wxr_importer_pre_process_* filters.', + '1.0.0' + ); - if ( empty( $data ) ) { return false; } - $category_parent = null; - - if ( array_key_exists( 'parent', $data ) && '' !== $data['parent'] ) { - $category_parent = $data['parent']; - } + return $this->get_mapped_element( $types[ $current_filter ], $data, $id, $additional_id ); + } - $wpdb->insert( - self::get_table_name(), - array( - 'session_id' => $this->current_session, - 'element_type' => self::ELEMENT_TYPE_CATEGORY, - 'element_id' => (string) $data['term_id'], - 'parent_id' => $category_parent, - 'byte_offset' => $byte_offset, - // Items with a parent has at least a sort order of 2. - 'sort_order' => $category_parent ? 2 : 1, - ) + /** + * Called by 'wxr_importer_processed_*' actions. This adds the entity to the + * sorter table. + * + * @param int|null $id The ID of the element. + * @param array $data The data to map. + * @param int|null $additional_id The additional ID of the element. + */ + public function action_wxr_importer_processed( $id, $data, $additional_id = null ) { + $current_filter = current_action(); + $types = array( + 'wxr_importer_processed_comment' => 'comment', + 'wxr_importer_processed_comment_meta' => 'comment_meta', + 'wxr_importer_processed_post' => 'post', + 'wxr_importer_processed_post_meta' => 'post_meta', + 'wxr_importer_processed_term' => 'term', ); - ++$this->total_categories; + if ( ! $current_filter || ! array_key_exists( $current_filter, $types ) ) { + _doing_it_wrong( + __METHOD__, + 'This method should be called by the wxr_importer_processed_* filters.', + '1.0.0' + ); + + return false; + } + + $this->map_element( $types[ $current_filter ], $data, $id, $additional_id ); } /** - * Map a post to the index. + * Map an element to the index. If $id is provided, it will be used to map the element. * - * @param int $byte_offset The byte offset of the post. - * @param array $data The post data. + * @param string $element_type The type of the element. + * @param array $data The data to map. + * @param int|null $id The ID of the element. + * @param int|null $additional_id The additional ID of the element. */ - public function map_post( $byte_offset, $data ) { + public function map_element( $element_type, $data, $id = null, $additional_id = null ) { global $wpdb; - if ( empty( $data ) ) { - return false; + if ( ! array_key_exists( $element_type, self::ENTITY_TYPES ) ) { + return; } - // No parent, no need to sort. - if ( ! isset( $data['post_type'] ) ) { - return false; + $new_element = array( + 'session_id' => $this->current_session, + 'element_type' => self::ENTITY_TYPES[ $element_type ], + 'element_id' => null, + 'mapped_id' => is_null( $id ) ? null : (string) $id, + 'parent_id' => null, + 'byte_offset' => 0, + // Items with a parent has at least a sort order of 2. + 'sort_order' => 1, + ); + $element_id = null; + + switch ( $element_type ) { + case 'comment': + $element_id = (string) $data['comment_id']; + break; + case 'comment_meta': + $element_id = (string) $data['meta_key']; + + if ( array_key_exists( 'comment_id', $data ) ) { + $new_element['parent_id'] = $data['comment_id']; + } + break; + case 'post': + if ( 'post' === $data['post_type'] || 'page' === $data['post_type'] ) { + if ( array_key_exists( 'post_parent', $data ) && '0' !== $data['post_parent'] ) { + $new_element['parent_id'] = $data['post_parent']; + } + } + + $element_id = (string) $data['post_id']; + break; + case 'post_meta': + break; + case 'term': + $element_id = (string) $data['term_id']; + $new_element['parent_id'] = $data['parent']; + break; } - if ( 'post' === $data['post_type'] || 'page' === $data['post_type'] ) { - if ( ! $data['post_id'] ) { - $this->last_post_id = $this->orphan_post_counter; - --$this->orphan_post_counter; - } - - $post_parent = null; - - if ( array_key_exists( 'post_parent', $data ) && '0' !== $data['post_parent'] ) { - $post_parent = $data['post_parent']; + // The element has been imported, so we can use the ID. + if ( $id ) { + $existing_element = $this->get_mapped_ids( $element_id, self::ENTITY_TYPES[ $element_type ] ); + + if ( $existing_element && is_null( $existing_element['mapped_id'] ) ) { + $new_element['mapped_id'] = (string) $id; + + // Update the element if it already exists. + $wpdb->update( + self::get_table_name(), + array( 'mapped_id' => (string) $id ), + array( + 'element_id' => (string) $element_id, + 'element_type' => self::ENTITY_TYPES[ $element_type ], + ), + array( '%s' ) + ); } + } else { + // Insert the element if it doesn't exist. + $new_element['element_id'] = $element_id; + $wpdb->insert( self::get_table_name(), $new_element ); + } + } - $wpdb->insert( - self::get_table_name(), - array( - 'session_id' => $this->current_session, - 'element_type' => self::ELEMENT_TYPE_POST, - 'element_id' => (string) $data['post_id'], - 'parent_id' => $post_parent, - 'byte_offset' => $byte_offset, - 'sort_order' => $post_parent ? 2 : 1, - ) - ); + /** + * Get a mapped element. Called from 'wxr_importer_pre_process_*' filter. + * + * @param int $entity The entity to get the mapped ID for. + * @param int $id The ID of the element. + * + * @return mixed|bool The mapped element or false if the post is not found. + */ + public function get_mapped_element( $element_type, $element, $id, $additional_id = null ) { + $current_session = $this->current_session; + $already_mapped = false; + + switch ( $element_type ) { + case 'comment': + // The ID is the post ID. + $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['post'] ); + + if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { + $element['comment_post_ID'] = $mapped_ids['mapped_id']; + } + break; + case 'comment_meta': + // The ID is the comment ID. + $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['comment'] ); + + if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { + $element['comment_id'] = $mapped_ids['mapped_id']; + } + break; + case 'post': + // The ID is the parent post ID. + $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['post'] ); + + if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { + $element['post_parent'] = $mapped_ids['mapped_id']; + } + + $mapped_ids = $this->get_mapped_ids( $element['post_id'], self::ENTITY_TYPES['post'] ); + + if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { + $element['post_id'] = $mapped_ids['mapped_id']; + $already_mapped = true; + } + break; + case 'post_meta': + // The ID is the post ID. + $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['post'] ); + + if ( $mapped_ids ) { + $element['post_id'] = $mapped_ids['mapped_id']; + } + break; + case 'term': + // Not ID provided. + break; + } - ++$this->total_posts; + if ( $already_mapped ) { + // This is used to skip the post if it has already been mapped. + $element['_already_mapped'] = true; } - return true; + return $element; } /** - * Get the byte offset of an element, and remove it from the list. + * Get the mapped ID for an element. * - * @param int $id The ID of the post to get the byte offset. + * @param int $id The ID of the element. + * @param int $type The type of the element. * - * @return int|bool The byte offset of the post, or false if the post is not found. + * @return int|false The mapped ID or null if the element is not found. */ - public function get_post_byte_offset( $session_id, $id ) { + private function get_mapped_ids( $id, $type ) { global $wpdb; - if ( ! $this->sorted ) { - return false; + if ( ! $id ) { + return null; } - return $wpdb->get_var( + $results = $wpdb->get_results( $wpdb->prepare( - 'SELECT byte_offset FROM %i WHERE element_id = %s AND element_type = %d AND session_id = %d LIMIT 1', + 'SELECT element_id, mapped_id FROM %i WHERE element_id = %s AND element_type = %d LIMIT 1', self::get_table_name(), (string) $id, - self::ELEMENT_TYPE_POST, - (string) $session_id - ) + $type + ), + ARRAY_A ); + + if ( $results && 1 === count( $results ) ) { + return $results[0]; + } + + return null; } /** @@ -421,8 +590,8 @@ private function sort_elements( $type ) { ON e.parent_id = se.element_id AND e.element_type = %d ) UPDATE %i SET sort_order = ( - SELECT sort_order - FROM sorted_elements s + SELECT sort_order + FROM sorted_elements s WHERE s.element_id = %i.element_id ) WHERE element_type = %d;', @@ -442,10 +611,10 @@ private function sort_elements( $type ) { $wpdb->prepare( 'UPDATE %i t1 JOIN ( - SELECT element_id, + SELECT element_id, @sort := @sort + 1 AS new_sort_order FROM %i - CROSS JOIN (SELECT @sort := 0) AS sort_var + CROSS JOIN (SELECT @sort := 0) AS sort_var WHERE element_type = %d ORDER BY COALESCE(parent_id, "0"), element_id ) t2 ON t1.element_id = t2.element_id diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index 6cfd553c6b..c24a971f51 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -13,6 +13,21 @@ protected function setUp(): void { if ( ! isset( $_SERVER['SERVER_SOFTWARE'] ) || $_SERVER['SERVER_SOFTWARE'] !== 'PHP.wasm' ) { $this->markTestSkipped( 'Test only runs in Playground' ); } + + global $wpdb; + + // Empty the wp_commentmeta table + $wpdb->query( "TRUNCATE TABLE {$wpdb->commentmeta}" ); + + // Empty the wp_comments table + $wpdb->query( "TRUNCATE TABLE {$wpdb->comments}" ); + + WP_Topological_Sorter::activate(); + } + + protected function tearDown(): void { + WP_Topological_Sorter::deactivate(); + parent::tearDown(); } /** @@ -76,7 +91,7 @@ public function test_resume_frontloading() { // Rewind back to the entity we were on. $this->assertTrue( $importer->next_step() ); - // Restart the download of the same entity – from scratch. + // Restart the download of the same entity - from scratch. $progress_value = array(); for ( $i = 0; $i < 20; ++$i ) { $importer->next_step(); @@ -158,18 +173,38 @@ public function test_serialized_comment_meta() { $comment = $comments[0]; $this->assertSame( $expected_string, get_comment_meta( $comment->comment_ID, 'string', true ) ); $this->assertSame( $expected_array, get_comment_meta( $comment->comment_ID, 'array', true ) ); + + // Additional check for Data Liberation. + $this->assertEquals( 'A WordPress Commenter', $comments[0]->comment_author ); + $this->assertEquals( 2, $comments[0]->comment_ID ); + $this->assertEquals( 10, $comments[0]->comment_post_ID ); } - /*public function test_hierarchical_term_import() { - $wxr_path = __DIR__ . '/wxr/small-export.xml'; + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php + */ + public function test_serialized_postmeta_no_cdata() { + /*$this->_import_wp( DIR_TESTDATA_WP_IMPORTER . '/test-serialized-postmeta-no-cdata.xml', array( 'johncoswell' => 'john' ) ); + $expected['special_post_title'] = 'A special title'; + $expected['is_calendar'] = ''; + $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) );*/ + $wxr_path = __DIR__ . '/wxr/test-serialized-postmeta-no-cdata.xml'; $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); do { while ( $importer->next_step( 1 ) ) { - + // noop } } while ( $importer->advance_to_next_stage() ); - }*/ + + $expected = array( + 'special_post_title' => 'A special title', + 'is_calendar' => '', + ); + $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) ); + } private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) { do { From 5306517ba315758c58a27bf6a379473e48693481 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 11:26:23 +0100 Subject: [PATCH 32/51] Fix: missing key --- .../data-liberation/src/import/WP_Topological_Sorter.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index c7bcde2ddd..1b0badc53d 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -362,7 +362,10 @@ public function map_element( $element_type, $data, $id = null, $additional_id = break; case 'term': $element_id = (string) $data['term_id']; - $new_element['parent_id'] = $data['parent']; + + if ( array_key_exists( 'parent', $data ) ) { + $new_element['parent_id'] = $data['parent']; + } break; } From cc151a50ce93a571893838f98912fe33ea0bda36 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 11:40:38 +0100 Subject: [PATCH 33/51] Remove useless code --- .../playground/data-liberation/phpunit.xml | 2 +- .../src/import/WP_Topological_Sorter.php | 179 +----------------- .../tests/WPStreamImporterTests.php | 76 -------- .../tests/WPTopologicalSorterTests.php | 78 +++++++- 4 files changed, 81 insertions(+), 254 deletions(-) diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 6fc2eb2550..54fbc00a3c 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -11,7 +11,7 @@ tests/WPXMLProcessorTests.php tests/UrldecodeNTests.php tests/WPStreamImporterTests.php - + tests/WPTopologicalSorterTests.php diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 1b0badc53d..80dc781f91 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -22,38 +22,11 @@ class WP_Topological_Sorter { */ const DB_VERSION = 1; - /** - * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID. - * To prevent duplicate post ID, we'll use negative number. - * - * @var int - */ - protected $orphan_post_counter = 0; - - /** - * Store the ID of the post ID currently being processed. - * - * @var int - */ - protected $last_post_id = 0; - - /** - * Whether the sort has been done. - * - * @var bool - */ - protected $sorted = false; - /** * The current session ID. */ protected $current_session = null; - /** - * The total number of posts. - */ - protected $total_posts = 0; - /** * The current item being processed. */ @@ -85,6 +58,9 @@ class WP_Topological_Sorter { 'wxr_importer_processed_term' => 2, ); + /** + * Set the current session ID and add the filters and actions. + */ public function __construct( $options = array() ) { if ( array_key_exists( 'session_id', $options ) ) { $this->current_session = $options['session_id']; @@ -126,7 +102,7 @@ public static function get_table_name() { } /** - * Run by register_activation_hook. + * Run by register_activation_hook. It creates the table if it doesn't exist. */ public static function activate() { global $wpdb; @@ -224,12 +200,7 @@ public static function deactivate() { * Run by register_uninstall_hook. */ public function reset() { - $this->orphan_post_counter = 0; - $this->last_post_id = 0; - $this->sorted = false; - $this->current_session = null; - $this->total_posts = 0; - $this->current_item = 0; + $this->current_session = null; } /** @@ -490,144 +461,4 @@ private function get_mapped_ids( $id, $type ) { return null; } - - /** - * Get the byte offset of an element, and remove it from the list. - * - * @param string $slug The slug of the category to get the byte offset. - * - * @return int|bool The byte offset of the category, or false if the category is not found. - */ - public function get_category_byte_offset( $session_id, $slug ) { - global $wpdb; - - if ( ! $this->sorted ) { - return false; - } - - return $wpdb->get_var( - $wpdb->prepare( - 'SELECT byte_offset FROM %i WHERE element_id = %s AND element_type = %d AND session_id = %d LIMIT 1', - self::get_table_name(), - (string) $slug, - self::ELEMENT_TYPE_CATEGORY, - (string) $session_id - ) - ); - } - - /** - * Get the next item to process. - * - * @param int $session_id The session ID to get the next item from. - * - * @return array|bool The next item to process, or false if there are no more items. - */ - public function next_item( $element_type, $session_id = null ) { - global $wpdb; - - if ( ! $this->sorted || ( 0 === $this->total_posts && 0 === $this->total_categories ) ) { - return false; - } - - if ( null === $session_id ) { - $session_id = $this->current_session; - } - - $next_item = $wpdb->get_row( - $wpdb->prepare( - 'SELECT * FROM %i WHERE element_type = %d ORDER BY sort_order ASC LIMIT 1 OFFSET %d', - self::get_table_name(), - $element_type, - $this->current_item - ), - ARRAY_A - ); - - if ( ! $next_item ) { - return null; - } - - return $next_item; - } - - public function is_sorted() { - return $this->sorted; - } - - /** - * Sort elements topologically. - * - * Elements should not be processed before their parent has been processed. - * This method sorts the elements in the order they should be processed. - */ - public function sort_topologically() { - // $this->sort_elements( self::ELEMENT_TYPE_POST ); - // $this->sort_elements( self::ELEMENT_TYPE_CATEGORY ); - - $this->sorted = true; - } - - /** - * Recursive sort elements. Posts with parents will be moved to the correct position. - * - * @param int $type The type of element to sort. - * @return true - */ - private function sort_elements( $type ) { - global $wpdb; - $table_name = self::get_table_name(); - - if ( self::is_sqlite() ) { - // SQLite recursive CTE query to perform topological sort - return $wpdb->query( - $wpdb->prepare( - 'WITH RECURSIVE sorted_elements AS ( - SELECT element_id, parent_id, ROW_NUMBER() OVER () AS sort_order - FROM %i - WHERE parent_id IS NULL AND element_type = %d - UNION ALL - SELECT e.element_id, e.parent_id, se.sort_order + 1 - FROM %i e - INNER JOIN sorted_elements se - ON e.parent_id = se.element_id AND e.element_type = %d - ) - UPDATE %i SET sort_order = ( - SELECT sort_order - FROM sorted_elements s - WHERE s.element_id = %i.element_id - ) - WHERE element_type = %d;', - $table_name, - $type, - $table_name, - $type, - $table_name, - $table_name, - $type - ) - ); - } - - // MySQL version - update sort_order using a subquery - return $wpdb->query( - $wpdb->prepare( - 'UPDATE %i t1 - JOIN ( - SELECT element_id, - @sort := @sort + 1 AS new_sort_order - FROM %i - CROSS JOIN (SELECT @sort := 0) AS sort_var - WHERE element_type = %d - ORDER BY COALESCE(parent_id, "0"), element_id - ) t2 ON t1.element_id = t2.element_id - SET t1.sort_order = t2.new_sort_order - WHERE t1.element_type = %d', - $table_name, - $table_name, - $type, - $type - ) - ); - } } diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index c24a971f51..c215754a1a 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -13,21 +13,6 @@ protected function setUp(): void { if ( ! isset( $_SERVER['SERVER_SOFTWARE'] ) || $_SERVER['SERVER_SOFTWARE'] !== 'PHP.wasm' ) { $this->markTestSkipped( 'Test only runs in Playground' ); } - - global $wpdb; - - // Empty the wp_commentmeta table - $wpdb->query( "TRUNCATE TABLE {$wpdb->commentmeta}" ); - - // Empty the wp_comments table - $wpdb->query( "TRUNCATE TABLE {$wpdb->comments}" ); - - WP_Topological_Sorter::activate(); - } - - protected function tearDown(): void { - WP_Topological_Sorter::deactivate(); - parent::tearDown(); } /** @@ -145,67 +130,6 @@ public function test_sort_categories() { } } - /** - * This is a WordPress core importer test. - * - * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/comment-meta.php - */ - public function test_serialized_comment_meta() { - $wxr_path = __DIR__ . '/wxr/test-serialized-comment-meta.xml'; - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); - - do { - while ( $importer->next_step( 1 ) ) { - // noop - } - } while ( $importer->advance_to_next_stage() ); - - $expected_string = '¯\_(ツ)_/¯'; - $expected_array = array( 'key' => '¯\_(ツ)_/¯' ); - - $comments_count = wp_count_comments(); - // Note: using assertEquals() as the return type changes across different WP versions - numeric string vs int. - $this->assertEquals( 1, $comments_count->approved ); - - $comments = get_comments(); - $this->assertCount( 1, $comments ); - - $comment = $comments[0]; - $this->assertSame( $expected_string, get_comment_meta( $comment->comment_ID, 'string', true ) ); - $this->assertSame( $expected_array, get_comment_meta( $comment->comment_ID, 'array', true ) ); - - // Additional check for Data Liberation. - $this->assertEquals( 'A WordPress Commenter', $comments[0]->comment_author ); - $this->assertEquals( 2, $comments[0]->comment_ID ); - $this->assertEquals( 10, $comments[0]->comment_post_ID ); - } - - /** - * This is a WordPress core importer test. - * - * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php - */ - public function test_serialized_postmeta_no_cdata() { - /*$this->_import_wp( DIR_TESTDATA_WP_IMPORTER . '/test-serialized-postmeta-no-cdata.xml', array( 'johncoswell' => 'john' ) ); - $expected['special_post_title'] = 'A special title'; - $expected['is_calendar'] = ''; - $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) );*/ - $wxr_path = __DIR__ . '/wxr/test-serialized-postmeta-no-cdata.xml'; - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); - - do { - while ( $importer->next_step( 1 ) ) { - // noop - } - } while ( $importer->advance_to_next_stage() ); - - $expected = array( - 'special_post_title' => 'A special title', - 'is_calendar' => '', - ); - $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) ); - } - private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) { do { while ( $importer->next_step() ) { diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index d3b7a5ac48..b67ba349c8 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -13,9 +13,81 @@ protected function setUp(): void { if ( ! isset( $_SERVER['SERVER_SOFTWARE'] ) || $_SERVER['SERVER_SOFTWARE'] !== 'PHP.wasm' ) { $this->markTestSkipped( 'Test only runs in Playground' ); } + + global $wpdb; + + // Empty the wp_commentmeta table + $wpdb->query( "TRUNCATE TABLE {$wpdb->commentmeta}" ); + + // Empty the wp_comments table + $wpdb->query( "TRUNCATE TABLE {$wpdb->comments}" ); + + WP_Topological_Sorter::activate(); } - public function test_import_one_post() { + protected function tearDown(): void { + WP_Topological_Sorter::deactivate(); + parent::tearDown(); + } + + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/comment-meta.php + */ + public function test_serialized_comment_meta() { + $wxr_path = __DIR__ . '/wxr/test-serialized-comment-meta.xml'; + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); + + do { + while ( $importer->next_step( 1 ) ) { + // noop + } + } while ( $importer->advance_to_next_stage() ); + + $expected_string = '¯\_(ツ)_/¯'; + $expected_array = array( 'key' => '¯\_(ツ)_/¯' ); + + $comments_count = wp_count_comments(); + // Note: using assertEquals() as the return type changes across different WP versions - numeric string vs int. + $this->assertEquals( 1, $comments_count->approved ); + + $comments = get_comments(); + $this->assertCount( 1, $comments ); + + $comment = $comments[0]; + $this->assertSame( $expected_string, get_comment_meta( $comment->comment_ID, 'string', true ) ); + $this->assertSame( $expected_array, get_comment_meta( $comment->comment_ID, 'array', true ) ); + + // Additional check for Data Liberation. + $this->assertEquals( 'A WordPress Commenter', $comments[0]->comment_author ); + $this->assertEquals( 2, $comments[0]->comment_ID ); + $this->assertEquals( 10, $comments[0]->comment_post_ID ); + } + + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php + */ + public function test_serialized_postmeta_no_cdata() { + $wxr_path = __DIR__ . '/wxr/test-serialized-postmeta-no-cdata.xml'; + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); + + do { + while ( $importer->next_step( 1 ) ) { + // noop + } + } while ( $importer->advance_to_next_stage() ); + + $expected = array( + 'special_post_title' => 'A special title', + 'is_calendar' => '', + ); + // $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) ); + } + + /*public function test_import_one_post() { $sorter = new WP_Topological_Sorter(); $this->assertTrue( $sorter->map_post( 0, $this->generate_post( 1 ) ) ); @@ -99,13 +171,13 @@ public function test_get_byte_offsets_consume_array() { * post_id: 1, 2, 3 * post_parent: 3, 2, 1 * byte_offset: 10, 20, 30 - */ + * private function multiple_map_posts( $sorter, $parents ) { foreach ( $parents as $i => $parent ) { $post = $this->generate_post( $i + 1, $parent ); $sorter->map_post( 10 * $i + 10, $post ); } - } + }*/ private function generate_post( $id, $post_parent = 0, $type = 'post' ) { return array( From 1ed810746adf4bb59b533710c0d729f2f4c3b60c Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 11:46:07 +0100 Subject: [PATCH 34/51] Remove SQLite case --- .../src/import/WP_Topological_Sorter.php | 78 ++++++------------- 1 file changed, 22 insertions(+), 56 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 80dc781f91..83e3c067ed 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -107,60 +107,30 @@ public static function get_table_name() { public static function activate() { global $wpdb; - $table_name = self::get_table_name(); + // See wp_get_db_schema + $max_index_length = 191; // Create the table if it doesn't exist. - // @TODO: remove this custom SQLite declaration after first phase of unit tests is done. - if ( self::is_sqlite() ) { - $sql = $wpdb->prepare( - 'CREATE TABLE IF NOT EXISTS %i ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - session_id INTEGER NOT NULL, - element_type INTEGER NOT NULL, - element_id TEXT NOT NULL, - mapped_id TEXT DEFAULT NULL, - parent_id TEXT DEFAULT NULL, - byte_offset INTEGER NOT NULL, - sort_order int DEFAULT 1 - ); - - CREATE UNIQUE INDEX IF NOT EXISTS idx_element_id ON %i (element_id); - CREATE INDEX IF NOT EXISTS idx_session_id ON %i (session_id); - CREATE INDEX IF NOT EXISTS idx_parent_id ON %i (parent_id); - CREATE INDEX IF NOT EXISTS idx_byte_offset ON %i (byte_offset);', - $table_name, - $table_name, - $table_name, - $table_name, - $table_name - ); - } else { - // See wp_get_db_schema - $max_index_length = 191; - - // MySQL, MariaDB. - $sql = $wpdb->prepare( - 'CREATE TABLE IF NOT EXISTS %i ( - id bigint(20) unsigned NOT NULL AUTO_INCREMENT, - session_id bigint(20) unsigned NOT NULL, - element_type tinyint(1) NOT NULL, - element_id text NOT NULL, - mapped_id text DEFAULT NULL, - parent_id text DEFAULT NULL, - byte_offset bigint(20) unsigned NOT NULL, - sort_order int DEFAULT 1, - PRIMARY KEY (id), - KEY session_id (session_id), - KEY element_id (element_id(%d)), - KEY parent_id (parent_id(%d)), - KEY byte_offset (byte_offset) - ) ' . $wpdb->get_charset_collate(), - self::get_table_name(), - 1, - $max_index_length, - $max_index_length - ); - } + $sql = $wpdb->prepare( + 'CREATE TABLE IF NOT EXISTS %i ( + id bigint(20) unsigned NOT NULL AUTO_INCREMENT, + session_id bigint(20) unsigned NOT NULL, + element_type tinyint(1) NOT NULL, + element_id text NOT NULL, + mapped_id text DEFAULT NULL, + parent_id text DEFAULT NULL, + byte_offset bigint(20) unsigned NOT NULL, + sort_order int DEFAULT 1, + PRIMARY KEY (id), + KEY session_id (session_id), + KEY element_id (element_id(%d)), + KEY parent_id (parent_id(%d)), + KEY byte_offset (byte_offset) + ) ' . $wpdb->get_charset_collate(), + self::get_table_name(), + $max_index_length, + $max_index_length + ); require_once ABSPATH . 'wp-admin/includes/upgrade.php'; dbDelta( $sql ); @@ -168,10 +138,6 @@ public static function activate() { update_option( self::OPTION_NAME, self::DB_VERSION ); } - public static function is_sqlite() { - return defined( 'DB_ENGINE' ) && 'sqlite' === DB_ENGINE; - } - /** * Run in the 'plugins_loaded' action. */ From b6a94b42980fa5c064c878198e2b215a9fa354e2 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 16:00:03 +0100 Subject: [PATCH 35/51] Move plugin methods outside class --- packages/playground/data-liberation/plugin.php | 7 ++++++- .../src/import/WP_Topological_Sorter.php | 12 ------------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index 3b835d3f08..9df4ecbf68 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -66,6 +66,7 @@ function data_liberation_init() { function data_liberation_activate() { // Activate the topological sorter. Create tables and options. WP_Topological_Sorter::activate(); + update_option( WP_Topological_Sorter::OPTION_NAME, WP_Topological_Sorter::DB_VERSION ); } // Run when the plugin is activated. @@ -82,7 +83,11 @@ function data_liberation_deactivate() { register_deactivation_hook( __FILE__, 'data_liberation_deactivate' ); function data_liberation_load() { - WP_Topological_Sorter::load(); + if ( WP_Topological_Sorter::DB_VERSION !== (int) get_site_option( WP_Topological_Sorter::OPTION_NAME ) ) { + // Update the database with dbDelta, if needed in the future. + WP_Topological_Sorter::activate(); + update_option( WP_Topological_Sorter::OPTION_NAME, WP_Topological_Sorter::DB_VERSION ); + } } // Run when the plugin is loaded. diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 83e3c067ed..a3985c662e 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -134,18 +134,6 @@ public static function activate() { require_once ABSPATH . 'wp-admin/includes/upgrade.php'; dbDelta( $sql ); - - update_option( self::OPTION_NAME, self::DB_VERSION ); - } - - /** - * Run in the 'plugins_loaded' action. - */ - public static function load() { - if ( self::DB_VERSION !== (int) get_site_option( self::OPTION_NAME ) ) { - // Used to update the database with dbDelta, if needed in the future. - self::activate(); - } } /** From 4e2cc74a1c941825c9dd77da075d885a1b9578bc Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 16:00:20 +0100 Subject: [PATCH 36/51] Create Playground base test class --- .../tests/PlaygroundTestCase.php | 17 +++++++++++++++++ .../tests/WPStreamImporterTests.php | 12 +++--------- .../tests/WPTopologicalSorterTests.php | 8 ++------ 3 files changed, 22 insertions(+), 15 deletions(-) create mode 100644 packages/playground/data-liberation/tests/PlaygroundTestCase.php diff --git a/packages/playground/data-liberation/tests/PlaygroundTestCase.php b/packages/playground/data-liberation/tests/PlaygroundTestCase.php new file mode 100644 index 0000000000..dfcd7792c8 --- /dev/null +++ b/packages/playground/data-liberation/tests/PlaygroundTestCase.php @@ -0,0 +1,17 @@ +markTestSkipped( 'Test only runs in Playground' ); + } + } +} diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index c215754a1a..3d815f461f 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -1,19 +1,13 @@ markTestSkipped( 'Test only runs in Playground' ); - } - } +class WPStreamImporterTests extends PlaygroundTestCase { /** * @before diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index b67ba349c8..9cc42191ea 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -1,19 +1,15 @@ markTestSkipped( 'Test only runs in Playground' ); - } - global $wpdb; // Empty the wp_commentmeta table From d70861d81fcf22a9a3c77e50021f9a99ff0efb7b Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 16:35:45 +0100 Subject: [PATCH 37/51] Fix: wrong keys --- .../src/import/WP_Entity_Importer.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index 8334b7fb44..d8eafa580e 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -897,7 +897,7 @@ public function import_attachment( $filepath, $post_id ) { * @return int|WP_Error Number of meta items imported on success, error otherwise. */ public function import_post_meta( $meta_item, $post_id ) { - if ( empty( $meta ) ) { + if ( empty( $meta_item ) ) { return true; } @@ -912,12 +912,12 @@ public function import_post_meta( $meta_item, $post_id ) { return false; } - $key = apply_filters( 'import_post_meta_key', $meta_item['key'], $post_id, $post ); + $key = apply_filters( 'import_post_meta_key', $meta_item['meta_key'], $post_id ); $value = false; if ( '_edit_last' === $key ) { - $value = intval( $meta_item['value'] ); - if ( ! isset( $this->mapping['user'][ $value ] ) ) { + $value = intval( $value ); + if ( ! isset( $this->mapping['user'][ $meta_item['meta_value'] ] ) ) { // Skip! _doing_it_wrong( __METHOD__, 'User ID not found in mapping', '4.7' ); return false; @@ -929,10 +929,10 @@ public function import_post_meta( $meta_item, $post_id ) { if ( $key ) { // export gets meta straight from the DB so could have a serialized string if ( ! $value ) { - $value = maybe_unserialize( $meta_item['value'] ); + $value = maybe_unserialize( $meta_item['meta_value'] ); } - add_post_meta( $post_id, $key, $value ); + add_post_meta( $post_id, wp_slash( $key ), wp_slash_strings_only( $value ) ); do_action( 'import_post_meta', $post_id, $key, $value ); // if the post has a featured image, take note of this in case of remap From 688c80d052bfe8ddf4eaf8d4fc0d6120779a01c2 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 16:36:38 +0100 Subject: [PATCH 38/51] Add core postmeta_no_cdata test --- .../data-liberation/tests/WPTopologicalSorterTests.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 9cc42191ea..72632844ba 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -3,7 +3,7 @@ require_once __DIR__ . '/PlaygroundTestCase.php'; /** - * Tests for the WPTopologicalSorterTests class. + * Tests for the WP_Topological_Sorter class. */ class WPTopologicalSorterTests extends PlaygroundTestCase { @@ -80,7 +80,7 @@ public function test_serialized_postmeta_no_cdata() { 'special_post_title' => 'A special title', 'is_calendar' => '', ); - // $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) ); + $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) ); } /*public function test_import_one_post() { From ea39a713bf40048dea5bab03a29f3110a8975085 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 17:09:08 +0100 Subject: [PATCH 39/51] Add core importer tests --- .../tests/PlaygroundTestCase.php | 34 +++++ .../tests/WPTopologicalSorterTests.php | 125 ++++++++++++++---- 2 files changed, 136 insertions(+), 23 deletions(-) diff --git a/packages/playground/data-liberation/tests/PlaygroundTestCase.php b/packages/playground/data-liberation/tests/PlaygroundTestCase.php index dfcd7792c8..9bc3ee4d39 100644 --- a/packages/playground/data-liberation/tests/PlaygroundTestCase.php +++ b/packages/playground/data-liberation/tests/PlaygroundTestCase.php @@ -14,4 +14,38 @@ protected function setUp(): void { $this->markTestSkipped( 'Test only runs in Playground' ); } } + + /** + * Deletes all data from the database. Copy of _delete_all_data() from WordPress core. + * + * @see https://github.com/WordPress/wordpress-develop/blob/trunk/tests/phpunit/includes/functions.php + */ + protected function delete_all_data() { + global $wpdb; + + foreach ( array( + $wpdb->posts, + $wpdb->postmeta, + $wpdb->comments, + $wpdb->commentmeta, + $wpdb->term_relationships, + $wpdb->termmeta, + ) as $table ) { + // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared + $wpdb->query( "DELETE FROM {$table}" ); + } + + foreach ( array( + $wpdb->terms, + $wpdb->term_taxonomy, + ) as $table ) { + // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared + $wpdb->query( "DELETE FROM {$table} WHERE term_id != 1" ); + } + + $wpdb->query( "UPDATE {$wpdb->term_taxonomy} SET count = 0" ); + + $wpdb->query( "DELETE FROM {$wpdb->users} WHERE ID != 1" ); + $wpdb->query( "DELETE FROM {$wpdb->usermeta} WHERE user_id != 1" ); + } } diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 72632844ba..7d1799e162 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -10,19 +10,14 @@ class WPTopologicalSorterTests extends PlaygroundTestCase { protected function setUp(): void { parent::setUp(); - global $wpdb; - - // Empty the wp_commentmeta table - $wpdb->query( "TRUNCATE TABLE {$wpdb->commentmeta}" ); - - // Empty the wp_comments table - $wpdb->query( "TRUNCATE TABLE {$wpdb->comments}" ); - + $this->delete_all_data(); + wp_cache_flush(); WP_Topological_Sorter::activate(); } protected function tearDown(): void { WP_Topological_Sorter::deactivate(); + parent::tearDown(); } @@ -32,14 +27,7 @@ protected function tearDown(): void { * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/comment-meta.php */ public function test_serialized_comment_meta() { - $wxr_path = __DIR__ . '/wxr/test-serialized-comment-meta.xml'; - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); - - do { - while ( $importer->next_step( 1 ) ) { - // noop - } - } while ( $importer->advance_to_next_stage() ); + $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-comment-meta.xml' ); $expected_string = '¯\_(ツ)_/¯'; $expected_array = array( 'key' => '¯\_(ツ)_/¯' ); @@ -67,7 +55,104 @@ public function test_serialized_comment_meta() { * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php */ public function test_serialized_postmeta_no_cdata() { - $wxr_path = __DIR__ . '/wxr/test-serialized-postmeta-no-cdata.xml'; + $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-postmeta-no-cdata.xml' ); + + $expected = array( + 'special_post_title' => 'A special title', + 'is_calendar' => '', + ); + $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) ); + } + + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php + */ + public function test_utw_postmeta() { + $this->import_wxr_file( __DIR__ . '/wxr/test-utw-post-meta-import.xml' ); + + $tags = array( + 'album', + 'apple', + 'art', + 'artwork', + 'dead-tracks', + 'ipod', + 'itunes', + 'javascript', + 'lyrics', + 'script', + 'tracks', + 'windows-scripting-host', + 'wscript', + ); + + $expected = array(); + foreach ( $tags as $tag ) { + $classy = new StdClass(); + $classy->tag = $tag; + $expected[] = $classy; + } + + $this->assertEquals( $expected, get_post_meta( 150, 'test', true ) ); + } + + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php + */ + public function test_serialized_postmeta_with_cdata() { + $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-postmeta-with-cdata.xml' ); + + // HTML in the CDATA should work with old WordPress version. + $this->assertSame( '
some html
', get_post_meta( 10, 'contains-html', true ) ); + // Serialised will only work with 3.0 onwards. + $expected = array( + 'special_post_title' => 'A special title', + 'is_calendar' => '', + ); + $this->assertSame( $expected, get_post_meta( 10, 'post-options', true ) ); + } + + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php + */ + public function test_serialized_postmeta_with_evil_stuff_in_cdata() { + $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-postmeta-with-cdata.xml' ); + + // Evil content in the CDATA. + $this->assertSame( 'evil', get_post_meta( 10, 'evil', true ) ); + } + + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/postmeta.php + */ + public function test_serialized_postmeta_with_slashes() { + $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-postmeta-with-cdata.xml' ); + + $expected_integer = '1'; + $expected_string = '¯\_(ツ)_/¯'; + $expected_array = array( 'key' => '¯\_(ツ)_/¯' ); + $expected_array_nested = array( + 'key' => array( + 'foo' => '¯\_(ツ)_/¯', + 'bar' => '\o/', + ), + ); + + // $this->assertSame( $expected_string, get_post_meta( 10, 'string', true ) ); + // $this->assertSame( $expected_array, get_post_meta( 10, 'array', true ) ); + // $this->assertSame( $expected_array_nested, get_post_meta( 10, 'array-nested', true ) ); + // $this->assertSame( $expected_integer, get_post_meta( 10, 'integer', true ) ); + } + + private function import_wxr_file( string $wxr_path ) { $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); do { @@ -75,12 +160,6 @@ public function test_serialized_postmeta_no_cdata() { // noop } } while ( $importer->advance_to_next_stage() ); - - $expected = array( - 'special_post_title' => 'A special title', - 'is_calendar' => '', - ); - $this->assertSame( $expected, get_post_meta( 122, 'post-options', true ) ); } /*public function test_import_one_post() { From 8db7508370e20c9f0653c42090ecce1ca37e56a9 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 17:14:54 +0100 Subject: [PATCH 40/51] Add new core importer tests --- .../tests/WPTopologicalSorterTests.php | 32 ++++++ .../tests/wxr/test-serialized-term-meta.xml | 105 ++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 packages/playground/data-liberation/tests/wxr/test-serialized-term-meta.xml diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 7d1799e162..3bec454e39 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -152,6 +152,38 @@ public function test_serialized_postmeta_with_slashes() { // $this->assertSame( $expected_integer, get_post_meta( 10, 'integer', true ) ); } + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/term-meta.php + */ + public function test_serialized_term_meta() { + register_taxonomy( 'custom_taxonomy', array( 'post' ) ); + + $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-term-meta.xml' ); + + $expected_string = '¯\_(ツ)_/¯'; + $expected_array = array( 'key' => '¯\_(ツ)_/¯' ); + + // $term = get_term_by( 'slug', 'post_tag', 'post_tag' ); + // $this->assertInstanceOf( 'WP_Term', $term ); + // $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); + // $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); + + // $term = get_term_by( 'slug', 'category', 'category' ); + // $this->assertInstanceOf( 'WP_Term', $term ); + // $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); + // $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); + + // $term = get_term_by( 'slug', 'custom_taxonomy', 'custom_taxonomy' ); + // $this->assertInstanceOf( 'WP_Term', $term ); + // $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); + // $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); + } + + /** + * Import a WXR file. + */ private function import_wxr_file( string $wxr_path ) { $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); diff --git a/packages/playground/data-liberation/tests/wxr/test-serialized-term-meta.xml b/packages/playground/data-liberation/tests/wxr/test-serialized-term-meta.xml new file mode 100644 index 0000000000..c7e942f77d --- /dev/null +++ b/packages/playground/data-liberation/tests/wxr/test-serialized-term-meta.xml @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + + + + Test With Serialized Term Meta + http://test.wordpress.org/ + Just another blog + Mon, 30 Nov 2009 21:35:27 +0000 + http://wordpress.org/?v=2.8.4 + en + 1.0 + http://test.wordpress.org/ + http://test.wordpress.org/ + + 1 + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + My Entry with term meta + http://test.wordpress.org/term-meta + Tue, 30 Nov 1999 00:00:00 +0000 + + + + + http://test.wordpress.org/term-meta + + + + 10 + 2009-10-20 16:13:20 + 0000-00-00 00:00:00 + open + open + + draft + 0 + 0 + post + + + + From 4932c14c2726e8d837faa6c3eed018d2b886c7db Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 22:17:37 +0100 Subject: [PATCH 41/51] Update WXR to last core importer --- .../tests/WPWXRReaderTests.php | 2 +- .../tests/wxr/post-content-blank-lines.xml | 66 ++++++ .../data-liberation/tests/wxr/slashes.xml | 18 +- .../tests/wxr/term-formats.xml | 81 +++++++ .../test-serialized-postmeta-with-cdata.xml | 108 +++++---- .../tests/wxr/valid-wxr-1.1.xml | 224 +++++++++--------- 6 files changed, 335 insertions(+), 164 deletions(-) create mode 100644 packages/playground/data-liberation/tests/wxr/post-content-blank-lines.xml create mode 100644 packages/playground/data-liberation/tests/wxr/term-formats.xml diff --git a/packages/playground/data-liberation/tests/WPWXRReaderTests.php b/packages/playground/data-liberation/tests/WPWXRReaderTests.php index c8bf927db9..23f3431b11 100644 --- a/packages/playground/data-liberation/tests/WPWXRReaderTests.php +++ b/packages/playground/data-liberation/tests/WPWXRReaderTests.php @@ -52,7 +52,7 @@ public static function preexisting_wxr_files_provider() { [__DIR__ . '/wxr/slashes.xml', 9], [__DIR__ . '/wxr/small-export.xml', 68], [__DIR__ . '/wxr/test-serialized-postmeta-no-cdata.xml', 5], - [__DIR__ . '/wxr/test-serialized-postmeta-with-cdata.xml', 7], + [__DIR__ . '/wxr/test-serialized-postmeta-with-cdata.xml', 11], [__DIR__ . '/wxr/test-utw-post-meta-import.xml', 5], [__DIR__ . '/wxr/theme-unit-test-data.xml', 1146], [__DIR__ . '/wxr/valid-wxr-1.0.xml', 32], diff --git a/packages/playground/data-liberation/tests/wxr/post-content-blank-lines.xml b/packages/playground/data-liberation/tests/wxr/post-content-blank-lines.xml new file mode 100644 index 0000000000..db15df5521 --- /dev/null +++ b/packages/playground/data-liberation/tests/wxr/post-content-blank-lines.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + Export Datasets + http://localhost/ + Just another WordPress site + Sat, 16 Oct 2010 20:53:18 +0000 + en + 1.1 + http://localhost/ + http://localhost/ + + 2johnjohndoe@example.org + http://wordpress.org/?v=3.1-alpha + + + Hello world! + http://localhost/?p=1 + Sat, 16 Oct 2010 20:53:18 +0000 + john + http://localhost/?p=1 + + + 1 + 2010-10-16 20:53:18 + 2010-10-16 20:53:18 + open + open + hello-world + publish + 0 + 0 + post + + 0 + + + diff --git a/packages/playground/data-liberation/tests/wxr/slashes.xml b/packages/playground/data-liberation/tests/wxr/slashes.xml index 3e073d8121..2e0cb0d25b 100644 --- a/packages/playground/data-liberation/tests/wxr/slashes.xml +++ b/packages/playground/data-liberation/tests/wxr/slashes.xml @@ -64,14 +64,24 @@ 0 - - Post by - - _edit_last + + 1 + + + http://wordpress.org/ + + 2011-01-18 20:53:18 + 2011-01-18 20:53:18 + + 1 + + 0 + 0 + diff --git a/packages/playground/data-liberation/tests/wxr/term-formats.xml b/packages/playground/data-liberation/tests/wxr/term-formats.xml new file mode 100644 index 0000000000..602b9f0ee4 --- /dev/null +++ b/packages/playground/data-liberation/tests/wxr/term-formats.xml @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + Export Dataset + http://localhost/ + Just another WordPress site + Fri, 15 Dec 2017 10:47:50 +0000 + en + 1.2 + http://localhost/ + http://localhost/ + + + 1 + + + + + + + 2 + + + + + + 3 + + + + + + 4 + + + + 5 + + + + + + + + + + + + 7nav_menu + + + https://wordpress.org/?v=5.0 + + + + diff --git a/packages/playground/data-liberation/tests/wxr/test-serialized-postmeta-with-cdata.xml b/packages/playground/data-liberation/tests/wxr/test-serialized-postmeta-with-cdata.xml index 2fd3923501..38d015726f 100644 --- a/packages/playground/data-liberation/tests/wxr/test-serialized-postmeta-with-cdata.xml +++ b/packages/playground/data-liberation/tests/wxr/test-serialized-postmeta-with-cdata.xml @@ -21,57 +21,71 @@ xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" - xmlns:wp="http://wordpress.org/export/1.0/" -> + xmlns:wp="http://wordpress.org/export/1.0/"> - - Test With Serialized Postmeta - http://test.wordpress.org/ - Just another blog - Mon, 30 Nov 2009 21:35:27 +0000 - http://wordpress.org/?v=2.8.4 - en - 1.0 - http://test.wordpress.org/ - http://test.wordpress.org/ + + Test With Serialized Postmeta + http://test.wordpress.org/ + Just another blog + Mon, 30 Nov 2009 21:35:27 +0000 + http://wordpress.org/?v=2.8.4 + en + 1.0 + http://test.wordpress.org/ + http://test.wordpress.org/ -My Entry with Postmeta -http://test.wordpress.org/postemta -Tue, 30 Nov 1999 00:00:00 +0000 - + My Entry with Postmeta + http://test.wordpress.org/postemta + Tue, 30 Nov 1999 00:00:00 +0000 + - + - + -http://test.wordpress.org/postmeta - - - -10 -2009-10-20 16:13:20 -0000-00-00 00:00:00 -open -open - -draft -0 -0 -post - - -post-options - - - -contains-html -some html]]> - - -evil -evil]]> - - - + http://test.wordpress.org/postmeta + + + + 10 + 2009-10-20 16:13:20 + 0000-00-00 00:00:00 + open + open + + draft + 0 + 0 + post + + + post-options + + + + contains-html + some html]]> + + + evil + evil]]> + + + + + + + + + + + + + + + + + + diff --git a/packages/playground/data-liberation/tests/wxr/valid-wxr-1.1.xml b/packages/playground/data-liberation/tests/wxr/valid-wxr-1.1.xml index cd039e8efd..f389741f1b 100644 --- a/packages/playground/data-liberation/tests/wxr/valid-wxr-1.1.xml +++ b/packages/playground/data-liberation/tests/wxr/valid-wxr-1.1.xml @@ -1,112 +1,112 @@ - - - - - - - - - - - - - - - - - - - - - - - Export Datasets - http://localhost/ - Just another WordPress site - Sat, 16 Oct 2010 20:53:18 +0000 - en - 1.1 - http://localhost/ - http://localhost/ - - 2johnjohndoe@example.org - - 3alpha - 22clippable - 40post_taxbieup - - http://wordpress.org/?v=3.1-alpha - - - Hello world! - http://localhost/?p=1 - Sat, 16 Oct 2010 20:53:18 +0000 - john - http://localhost/?p=1 - - - - 1 - 2010-10-16 20:53:18 - 2010-10-16 20:53:18 - open - open - hello-world - publish - 0 - 0 - post - - 0 - - - - - 1 - - - http://wordpress.org/ - - 2010-10-16 20:53:18 - 2010-10-16 20:53:18 - To delete a comment, just log in and view the post's comments. There you will have the option to edit or delete them.]]> - 1 - - 0 - 0 - - - - About - http://localhost/?page_id=2 - Sat, 16 Oct 2010 20:53:18 +0000 - john - http://localhost/?page_id=2 - - - - 2 - 2010-10-16 20:53:18 - 2010-10-16 20:53:18 - open - open - about - publish - 0 - 0 - page - - 0 - - _wp_page_template - - - - - + + + + + + + + + + + + + + + + + + + + + + + Export Datasets + http://localhost/ + Just another WordPress site + Sat, 16 Oct 2010 20:53:18 +0000 + en + 1.1 + http://localhost/ + http://localhost/ + + 2johnjohndoe@example.org + + 3alpha + 22clippable + 40post_taxbieup + + http://wordpress.org/?v=3.1-alpha + + + Hello world! + http://localhost/?p=1 + Sat, 16 Oct 2010 20:53:18 +0000 + john + http://localhost/?p=1 + + + + 1 + 2010-10-16 20:53:18 + 2010-10-16 20:53:18 + open + open + hello-world + publish + 0 + 0 + post + + 0 + + + + + 1 + + + http://wordpress.org/ + + 2010-10-16 20:53:18 + 2010-10-16 20:53:18 + To delete a comment, just log in and view the post's comments. There you will have the option to edit or delete them.]]> + 1 + + 0 + 0 + + + + About + http://localhost/?page_id=2 + Sat, 16 Oct 2010 20:53:18 +0000 + john + http://localhost/?page_id=2 + + + + 2 + 2010-10-16 20:53:18 + 2010-10-16 20:53:18 + open + open + about + publish + 0 + 0 + page + + 0 + + _wp_page_template + + + + + From a49ebff9eb6c828562cd8b0abe79cb7277276ee9 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 22:38:00 +0100 Subject: [PATCH 42/51] Add support for PHPUnit filters --- .../data-liberation/tests/import/blueprint-import.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/playground/data-liberation/tests/import/blueprint-import.json b/packages/playground/data-liberation/tests/import/blueprint-import.json index 4030a4d263..7fd843f401 100644 --- a/packages/playground/data-liberation/tests/import/blueprint-import.json +++ b/packages/playground/data-liberation/tests/import/blueprint-import.json @@ -3,7 +3,8 @@ "constants": { "WP_DEBUG": true, "WP_DEBUG_DISPLAY": true, - "WP_DEBUG_LOG": true + "WP_DEBUG_LOG": true, + "PHPUNIT_FILTER": "WPTopologicalSorterTests::test_serialized_term_meta" }, "login": true, "steps": [ @@ -18,7 +19,7 @@ }, { "step": "runPHP", - "code": "run($arguments);\nif ( $res !== 0 ) {\ntrigger_error('PHPUnit failed', E_USER_ERROR);\n}\n} catch (Throwable $e) {\ntrigger_error('PHPUnit failed: ' . $e->getMessage(), E_USER_ERROR);\n};" + "code": "run($arguments);\nif ( $res !== 0 ) {\ntrigger_error('PHPUnit failed', E_USER_ERROR);\n}\n} catch (Throwable $e) {\ntrigger_error('PHPUnit failed: ' . $e->getMessage(), E_USER_ERROR);\n}\n;" } ] } From 10ecb41c3c130ea98b9bf20bad20602ef86785b3 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 22:52:09 +0100 Subject: [PATCH 43/51] Remove old test --- .../data-liberation/tests/WPStreamImporterTests.php | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index 3d815f461f..70200eafd9 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -112,18 +112,6 @@ public function test_resume_entity_import() { $this->assertFalse( $importer->next_step() ); } - public function test_sort_categories() { - $wxr_path = __DIR__ . '/wxr/mixed-categories.xml'; - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); - $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_TOPOLOGICAL_SORT ); - - while ( $importer->next_step() ) { - if ( $importer->get_next_stage() === WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ) { - break; - } - } - } - private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) { do { while ( $importer->next_step() ) { From e745fe9114f94cb9f534985ca4c8eb1c3579c08d Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 22:59:33 +0100 Subject: [PATCH 44/51] Fix: remove debug code --- .../data-liberation/tests/import/blueprint-import.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/tests/import/blueprint-import.json b/packages/playground/data-liberation/tests/import/blueprint-import.json index 7fd843f401..99e8f5037b 100644 --- a/packages/playground/data-liberation/tests/import/blueprint-import.json +++ b/packages/playground/data-liberation/tests/import/blueprint-import.json @@ -4,7 +4,7 @@ "WP_DEBUG": true, "WP_DEBUG_DISPLAY": true, "WP_DEBUG_LOG": true, - "PHPUNIT_FILTER": "WPTopologicalSorterTests::test_serialized_term_meta" + "PHPUNIT_FILTER": false }, "login": true, "steps": [ From e91b5269ba003f09c5dff669c7f04bafbe8c6232 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 23:35:11 +0100 Subject: [PATCH 45/51] Fix: wrong check --- .../data-liberation/src/import/WP_Entity_Importer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index d8eafa580e..b4076b9ef9 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -345,7 +345,7 @@ public function import_term( $data ) { $termdata[ $key ] = $data[ $key ]; } - $term = term_exists( $data['name'], $data['taxonomy'] ); + $term = term_exists( $data['slug'], $data['taxonomy'] ); $result = null; if ( is_array( $term ) ) { From 9ca8a1d70936928287ed570d08326c4ae73b7632 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Wed, 11 Dec 2024 23:42:41 +0100 Subject: [PATCH 46/51] Add new unit tests and remove old one --- .../tests/WPTopologicalSorterTests.php | 308 +++++++++++------- 1 file changed, 191 insertions(+), 117 deletions(-) diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 3bec454e39..9da933e0cb 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -49,6 +49,180 @@ public function test_serialized_comment_meta() { $this->assertEquals( 10, $comments[0]->comment_post_ID ); } + /** + * This is a WordPress core importer test. + * + * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/import.php + */ + public function test_small_import() { + global $wpdb; + + $authors = array( + 'admin' => false, + 'editor' => false, + 'author' => false, + ); + $this->import_wxr_file( __DIR__ . '/wxr/small-export.xml' ); + + // Ensure that authors were imported correctly. + $user_count = count_users(); + $this->assertSame( 3, $user_count['total_users'] ); + $admin = get_user_by( 'login', 'admin' ); + /*$this->assertSame( 'admin', $admin->user_login ); + $this->assertSame( 'local@host.null', $admin->user_email ); + $editor = get_user_by( 'login', 'editor' ); + $this->assertSame( 'editor', $editor->user_login ); + $this->assertSame( 'editor@example.org', $editor->user_email ); + $this->assertSame( 'FirstName', $editor->user_firstname ); + $this->assertSame( 'LastName', $editor->user_lastname ); + $author = get_user_by( 'login', 'author' ); + $this->assertSame( 'author', $author->user_login ); + $this->assertSame( 'author@example.org', $author->user_email );*/ + + // Check that terms were imported correctly. + + $this->assertSame( '30', wp_count_terms( 'category' ) ); + $this->assertSame( '3', wp_count_terms( 'post_tag' ) ); + $foo = get_term_by( 'slug', 'foo', 'category' ); + $this->assertSame( 0, $foo->parent ); + $bar = get_term_by( 'slug', 'bar', 'category' ); + $foo_bar = get_term_by( 'slug', 'foo-bar', 'category' ); + $this->assertSame( $bar->term_id, $foo_bar->parent ); + + // Check that posts/pages were imported correctly. + $post_count = wp_count_posts( 'post' ); + $this->assertSame( '5', $post_count->publish ); + $this->assertSame( '1', $post_count->private ); + $page_count = wp_count_posts( 'page' ); + $this->assertSame( '4', $page_count->publish ); + $this->assertSame( '1', $page_count->draft ); + $comment_count = wp_count_comments(); + $this->assertSame( 1, $comment_count->total_comments ); + + $posts = get_posts( + array( + 'numberposts' => 20, + 'post_type' => 'any', + 'post_status' => 'any', + 'orderby' => 'ID', + ) + ); + $this->assertCount( 11, $posts ); + + $post = $posts[0]; + $this->assertSame( 'Many Categories', $post->post_title ); + $this->assertSame( 'many-categories', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'post', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $cats = wp_get_post_categories( $post->ID ); + // $this->assertCount( 27, $cats ); + + $post = $posts[1]; + $this->assertSame( 'Non-standard post format', $post->post_title ); + $this->assertSame( 'non-standard-post-format', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'post', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $cats = wp_get_post_categories( $post->ID ); + $this->assertCount( 1, $cats ); + //$this->assertTrue( has_post_format( 'aside', $post->ID ) ); + + $post = $posts[2]; + $this->assertSame( 'Top-level Foo', $post->post_title ); + $this->assertSame( 'top-level-foo', $post->post_name ); + //$this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'post', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $cats = wp_get_post_categories( $post->ID, array( 'fields' => 'all' ) ); + $this->assertCount( 1, $cats ); + // $this->assertSame( 'foo', $cats[0]->slug ); + + $post = $posts[3]; + $this->assertSame( 'Foo-child', $post->post_title ); + $this->assertSame( 'foo-child', $post->post_name ); + // $this->assertSame( (string) $editor->ID, $post->post_author ); + $this->assertSame( 'post', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $cats = wp_get_post_categories( $post->ID, array( 'fields' => 'all' ) ); + $this->assertCount( 1, $cats ); + // $this->assertSame( 'foo-bar', $cats[0]->slug ); + + $post = $posts[4]; + $this->assertSame( 'Private Post', $post->post_title ); + $this->assertSame( 'private-post', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'post', $post->post_type ); + $this->assertSame( 'private', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $cats = wp_get_post_categories( $post->ID ); + $this->assertCount( 1, $cats ); + $tags = wp_get_post_tags( $post->ID ); + // $this->assertCount( 3, $tags ); + // $this->assertSame( 'tag1', $tags[0]->slug ); + // $this->assertSame( 'tag2', $tags[1]->slug ); + // $this->assertSame( 'tag3', $tags[2]->slug ); + + $post = $posts[5]; + $this->assertSame( '1-col page', $post->post_title ); + $this->assertSame( '1-col-page', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'page', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $this->assertSame( 'onecolumn-page.php', get_post_meta( $post->ID, '_wp_page_template', true ) ); + + $post = $posts[6]; + $this->assertSame( 'Draft Page', $post->post_title ); + $this->assertSame( '', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'page', $post->post_type ); + $this->assertSame( 'draft', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $this->assertSame( 'default', get_post_meta( $post->ID, '_wp_page_template', true ) ); + + $post = $posts[7]; + $this->assertSame( 'Parent Page', $post->post_title ); + $this->assertSame( 'parent-page', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'page', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $this->assertSame( 'default', get_post_meta( $post->ID, '_wp_page_template', true ) ); + + $post = $posts[8]; + $this->assertSame( 'Child Page', $post->post_title ); + $this->assertSame( 'child-page', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'page', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( $posts[7]->ID, $post->post_parent ); + $this->assertSame( 'default', get_post_meta( $post->ID, '_wp_page_template', true ) ); + + $post = $posts[9]; + $this->assertSame( 'Sample Page', $post->post_title ); + $this->assertSame( 'sample-page', $post->post_name ); + // $this->assertSame( (string) $admin->ID, $post->post_author ); + $this->assertSame( 'page', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $this->assertSame( 'default', get_post_meta( $post->ID, '_wp_page_template', true ) ); + + $post = $posts[10]; + $this->assertSame( 'Hello world!', $post->post_title ); + $this->assertSame( 'hello-world', $post->post_name ); + // $this->assertSame( (string) $author->ID, $post->post_author ); + $this->assertSame( 'post', $post->post_type ); + $this->assertSame( 'publish', $post->post_status ); + $this->assertSame( 0, $post->post_parent ); + $cats = wp_get_post_categories( $post->ID ); + $this->assertCount( 1, $cats ); + } + /** * This is a WordPress core importer test. * @@ -146,10 +320,10 @@ public function test_serialized_postmeta_with_slashes() { ), ); - // $this->assertSame( $expected_string, get_post_meta( 10, 'string', true ) ); - // $this->assertSame( $expected_array, get_post_meta( 10, 'array', true ) ); - // $this->assertSame( $expected_array_nested, get_post_meta( 10, 'array-nested', true ) ); - // $this->assertSame( $expected_integer, get_post_meta( 10, 'integer', true ) ); + $this->assertSame( $expected_string, get_post_meta( 10, 'string', true ) ); + $this->assertSame( $expected_array, get_post_meta( 10, 'array', true ) ); + $this->assertSame( $expected_array_nested, get_post_meta( 10, 'array-nested', true ) ); + $this->assertSame( $expected_integer, get_post_meta( 10, 'integer', true ) ); } /** @@ -157,7 +331,7 @@ public function test_serialized_postmeta_with_slashes() { * * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/term-meta.php */ - public function test_serialized_term_meta() { + public function _not_test_serialized_term_meta() { register_taxonomy( 'custom_taxonomy', array( 'post' ) ); $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-term-meta.xml' ); @@ -165,20 +339,20 @@ public function test_serialized_term_meta() { $expected_string = '¯\_(ツ)_/¯'; $expected_array = array( 'key' => '¯\_(ツ)_/¯' ); - // $term = get_term_by( 'slug', 'post_tag', 'post_tag' ); - // $this->assertInstanceOf( 'WP_Term', $term ); - // $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); - // $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); + $term = get_term_by( 'slug', 'post_tag', 'post_tag' ); + $this->assertInstanceOf( 'WP_Term', $term ); + $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); + $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); - // $term = get_term_by( 'slug', 'category', 'category' ); - // $this->assertInstanceOf( 'WP_Term', $term ); - // $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); - // $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); + $term = get_term_by( 'slug', 'category', 'category' ); + $this->assertInstanceOf( 'WP_Term', $term ); + $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); + $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); - // $term = get_term_by( 'slug', 'custom_taxonomy', 'custom_taxonomy' ); - // $this->assertInstanceOf( 'WP_Term', $term ); - // $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); - // $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); + $term = get_term_by( 'slug', 'custom_taxonomy', 'custom_taxonomy' ); + $this->assertInstanceOf( 'WP_Term', $term ); + $this->assertSame( $expected_string, get_term_meta( $term->term_id, 'string', true ) ); + $this->assertSame( $expected_array, get_term_meta( $term->term_id, 'array', true ) ); } /** @@ -193,104 +367,4 @@ private function import_wxr_file( string $wxr_path ) { } } while ( $importer->advance_to_next_stage() ); } - - /*public function test_import_one_post() { - $sorter = new WP_Topological_Sorter(); - - $this->assertTrue( $sorter->map_post( 0, $this->generate_post( 1 ) ) ); - $this->assertEquals( 1, $sorter->get_total_posts() ); - $this->assertEquals( 1, $sorter->next_post()['byte_offset'] ); - } - - public function test_parent_after_child() { - $sorter = new WP_Topological_Sorter(); - - $sorter->map_post( 10, $this->generate_post( 1, 2 ) ); - $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); - $sorter->sort_topologically(); - - // $this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); - $this->assertEquals( 20, $sorter->next_post()['byte_offset'] ); - $this->assertFalse( $sorter->is_sorted() ); - } - - public function test_child_after_parent() { - $sorter = new WP_Topological_Sorter(); - - $sorter->map_post( 10, $this->generate_post( 1, 0 ) ); - $sorter->map_post( 20, $this->generate_post( 2, 1 ) ); - $sorter->map_post( 30, $this->generate_post( 3, 2 ) ); - $sorter->sort_topologically(); - - // $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); - } - - public function test_orphaned_post() { - $sorter = new WP_Topological_Sorter(); - - $sorter->map_post( 10, $this->generate_post( 1, 3 ) ); - $sorter->map_post( 20, $this->generate_post( 2, 0 ) ); - $sorter->sort_topologically(); - - // $this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); - $this->assertEquals( 20, $sorter->next_post()['byte_offset'] ); - } - - public function test_chain_parent_child_after() { - $sorter = new WP_Topological_Sorter(); - - $sorter->map_post( 10, $this->generate_post( 1, 2 ) ); - $sorter->map_post( 20, $this->generate_post( 2, 3 ) ); - $sorter->map_post( 30, $this->generate_post( 3, 0 ) ); - $sorter->sort_topologically(); - - // $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); - } - - public function test_reverse_order() { - $sorter = new WP_Topological_Sorter(); - - $this->multiple_map_posts( $sorter, array( 3, 2, 1 ) ); - $sorter->sort_topologically(); - - // $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); - } - - public function test_get_byte_offsets_consume_array() { - $sorter = new WP_Topological_Sorter(); - - $this->multiple_map_posts( $sorter, array( 2, 3, 0 ) ); - $sorter->sort_topologically(); - - // $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); - - $this->assertEquals( 10, $sorter->next_post()['byte_offset'] ); - $this->assertEquals( 20, $sorter->next_post()['byte_offset'] ); - $this->assertEquals( 30, $sorter->next_post()['byte_offset'] ); - $this->assertEquals( 0, $sorter->get_total_posts() ); - } - - /** - * This map a list of posts [3, 2, 1] of the form: - * post_id: 1, 2, 3 - * post_parent: 3, 2, 1 - * byte_offset: 10, 20, 30 - * - private function multiple_map_posts( $sorter, $parents ) { - foreach ( $parents as $i => $parent ) { - $post = $this->generate_post( $i + 1, $parent ); - $sorter->map_post( 10 * $i + 10, $post ); - } - }*/ - - private function generate_post( $id, $post_parent = 0, $type = 'post' ) { - return array( - 'post_id' => $id, - 'post_parent' => $post_parent, - 'post_type' => $type, - ); - } } From e33380c3b875a610623d2824a6b8463342015f61 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Thu, 12 Dec 2024 11:08:39 +0100 Subject: [PATCH 47/51] Add support for term meta --- .../src/import/WP_Entity_Importer.php | 40 +++++++++++++++++-- .../src/import/WP_Import_Session.php | 13 +++--- .../src/import/WP_Imported_Entity.php | 1 + .../src/import/WP_Topological_Sorter.php | 31 ++++++++++++-- .../data-liberation/src/wxr/WP_WXR_Reader.php | 32 +++++++++++++++ .../data-liberation/src/wxr/WXR_Importer.php | 15 +++++++ .../tests/WPTopologicalSorterTests.php | 4 +- 7 files changed, 122 insertions(+), 14 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index b4076b9ef9..86057bca0b 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -124,6 +124,8 @@ public function import_entity( WP_Imported_Entity $entity ) { case WP_Imported_Entity::TYPE_TAG: case WP_Imported_Entity::TYPE_CATEGORY: return $this->import_term( $data ); + case WP_Imported_Entity::TYPE_TERM_META: + return $this->import_term_meta( $data, $data['term_id'] ); case WP_Imported_Entity::TYPE_USER: return $this->import_user( $data ); case WP_Imported_Entity::TYPE_SITE_OPTION: @@ -412,6 +414,37 @@ public function import_term( $data ) { do_action( 'wxr_importer_processed_term', $term_id, $data ); } + public function import_term_meta( $meta_item, $term_id ) { + if ( empty( $meta_item ) ) { + return true; + } + + /** + * Pre-process term meta data. + * + * @param array $meta_item Meta data. (Return empty to skip.) + * @param int $term_id Term the meta is attached to. + */ + $meta_item = apply_filters( 'wxr_importer_pre_process_term_meta', $meta_item, $term_id ); + if ( empty( $meta_item ) ) { + return false; + } + + // Have we already processed this? + if ( isset( $element['_already_mapped'] ) ) { + $this->logger->debug( 'Skipping term meta, already processed' ); + return; + } + + if ( ! isset( $meta_item['term_id'] ) ) { + echo "\nTERM-ID-NOT-SET\n"; + $meta_item['term_id'] = $term_id; + } + + $value = maybe_unserialize( $meta_item['meta_value'] ); + $term_meta_id = add_term_meta( $meta_item['term_id'], wp_slash( $meta_item['meta_key'] ), wp_slash_strings_only( $value ) ); + do_action( 'wxr_importer_processed_term_meta', $term_meta_id, $meta_item, $meta_item['term_id'] ); + } /** * Prefill existing post data. @@ -965,6 +998,7 @@ public function import_comment( $comment, $post_id, $post_just_imported = false // Sort by ID to avoid excessive remapping later usort( $comments, array( $this, 'sort_comments_by_id' ) ); + $parent_id = isset( $comment['comment_parent'] ) ? (int) $comment['comment_parent'] : null; /** * Pre-process comment data @@ -972,13 +1006,12 @@ public function import_comment( $comment, $post_id, $post_just_imported = false * @param array $comment Comment data. (Return empty to skip.) * @param int $post_id Post the comment is attached to. */ - $comment = apply_filters( 'wxr_importer_pre_process_comment', $comment, $post_id ); + $comment = apply_filters( 'wxr_importer_pre_process_comment', $comment, $post_id, $parent_id ); if ( empty( $comment ) ) { return false; } $original_id = isset( $comment['comment_id'] ) ? (int) $comment['comment_id'] : 0; - $parent_id = isset( $comment['comment_parent'] ) ? (int) $comment['comment_parent'] : 0; $author_id = isset( $comment['comment_user_id'] ) ? (int) $comment['comment_user_id'] : 0; // if this is a new post we can skip the comment_exists() check @@ -1092,10 +1125,11 @@ public function import_comment_meta( $meta_item, $comment_id ) { $meta_item['comment_id'] = $comment_id; } + // @TODO: Check if wp_slash is correct and not wp_slash_strings_only $value = maybe_unserialize( $meta_item['meta_value'] ); $comment_meta_id = add_comment_meta( $meta_item['comment_id'], wp_slash( $meta_item['meta_key'] ), wp_slash( $value ) ); - do_action( 'wxr_importer_processed_comment_meta', $comment_meta_id, $meta_item, $comment_id ); + do_action( 'wxr_importer_processed_comment_meta', $comment_meta_id, $meta_item, $meta_item['comment_id'] ); } /** diff --git a/packages/playground/data-liberation/src/import/WP_Import_Session.php b/packages/playground/data-liberation/src/import/WP_Import_Session.php index 2f56040563..d9e893a6e2 100644 --- a/packages/playground/data-liberation/src/import/WP_Import_Session.php +++ b/packages/playground/data-liberation/src/import/WP_Import_Session.php @@ -19,6 +19,7 @@ class WP_Import_Session { 'category', 'tag', 'term', + 'term_meta', 'post', 'post_meta', 'comment', @@ -280,8 +281,8 @@ public function count_unfinished_frontloading_placeholders() { global $wpdb; return (int) $wpdb->get_var( $wpdb->prepare( - "SELECT COUNT(*) FROM $wpdb->posts - WHERE post_type = 'frontloading_placeholder' + "SELECT COUNT(*) FROM $wpdb->posts + WHERE post_type = 'frontloading_placeholder' AND post_parent = %d AND post_status != %s AND post_status != %s", @@ -343,8 +344,8 @@ public function get_total_number_of_assets() { global $wpdb; return (int) $wpdb->get_var( $wpdb->prepare( - "SELECT COUNT(*) FROM $wpdb->posts - WHERE post_type = 'frontloading_placeholder' + "SELECT COUNT(*) FROM $wpdb->posts + WHERE post_type = 'frontloading_placeholder' AND post_parent = %d", $this->post_id ) @@ -387,8 +388,8 @@ public function create_frontloading_placeholders( $urls ) { */ $exists = $wpdb->get_var( $wpdb->prepare( - "SELECT ID FROM $wpdb->posts - WHERE post_type = 'frontloading_placeholder' + "SELECT ID FROM $wpdb->posts + WHERE post_type = 'frontloading_placeholder' AND post_parent = %d AND guid = %s LIMIT 1", diff --git a/packages/playground/data-liberation/src/import/WP_Imported_Entity.php b/packages/playground/data-liberation/src/import/WP_Imported_Entity.php index 96c3dd3dd2..8e0dcb230e 100644 --- a/packages/playground/data-liberation/src/import/WP_Imported_Entity.php +++ b/packages/playground/data-liberation/src/import/WP_Imported_Entity.php @@ -7,6 +7,7 @@ class WP_Imported_Entity { const TYPE_COMMENT = 'comment'; const TYPE_COMMENT_META = 'comment_meta'; const TYPE_TERM = 'term'; + const TYPE_TERM_META = 'term_meta'; const TYPE_TAG = 'tag'; const TYPE_CATEGORY = 'category'; const TYPE_USER = 'user'; diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index a3985c662e..3778f8af80 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -38,15 +38,17 @@ class WP_Topological_Sorter { 'post' => 3, 'post_meta' => 4, 'term' => 5, + 'term_meta' => 6, ); private $mapped_pre_filters = array( // Name of the filter, and the number of arguments it accepts. - 'wxr_importer_pre_process_comment' => 2, + 'wxr_importer_pre_process_comment' => 3, 'wxr_importer_pre_process_comment_meta' => 2, 'wxr_importer_pre_process_post' => 2, 'wxr_importer_pre_process_post_meta' => 2, 'wxr_importer_pre_process_term' => 1, + 'wxr_importer_pre_process_term_meta' => 2, ); private $mapped_post_actions = array( @@ -56,6 +58,7 @@ class WP_Topological_Sorter { 'wxr_importer_processed_post' => 2, 'wxr_importer_processed_post_meta' => 2, 'wxr_importer_processed_term' => 2, + 'wxr_importer_processed_term_meta' => 3, ); /** @@ -190,6 +193,7 @@ public function filter_wxr_importer_pre_process( $data, $id = null, $additional_ 'wxr_importer_pre_process_post' => 'post', 'wxr_importer_pre_process_post_meta' => 'post_meta', 'wxr_importer_pre_process_term' => 'term', + 'wxr_importer_pre_process_term_meta' => 'term_meta', ); if ( ! $current_filter || ! array_key_exists( $current_filter, $types ) ) { @@ -221,6 +225,7 @@ public function action_wxr_importer_processed( $id, $data, $additional_id = null 'wxr_importer_processed_post' => 'post', 'wxr_importer_processed_post_meta' => 'post_meta', 'wxr_importer_processed_term' => 'term', + 'wxr_importer_processed_term_meta' => 'term_meta', ); if ( ! $current_filter || ! array_key_exists( $current_filter, $types ) ) { @@ -261,7 +266,7 @@ public function map_element( $element_type, $data, $id = null, $additional_id = // Items with a parent has at least a sort order of 2. 'sort_order' => 1, ); - $element_id = null; + $element_id = null; switch ( $element_type ) { case 'comment': @@ -284,6 +289,18 @@ public function map_element( $element_type, $data, $id = null, $additional_id = $element_id = (string) $data['post_id']; break; case 'post_meta': + $element_id = (string) $data['meta_key']; + + if ( array_key_exists( 'post_id', $data ) ) { + $new_element['parent_id'] = $data['post_id']; + } + break; + case 'term_meta': + $element_id = (string) $data['meta_key']; + + if ( array_key_exists( 'term_id', $data ) ) { + $new_element['parent_id'] = $data['term_id']; + } break; case 'term': $element_id = (string) $data['term_id']; @@ -372,7 +389,15 @@ public function get_mapped_element( $element_type, $element, $id, $additional_id } break; case 'term': - // Not ID provided. + // No ID provided. + break; + case 'term_meta': + // The ID is the term ID. + $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['term'] ); + + if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { + $element['term_id'] = $mapped_ids['mapped_id']; + } break; } diff --git a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php index d70727bc17..4337638f47 100644 --- a/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php +++ b/packages/playground/data-liberation/src/wxr/WP_WXR_Reader.php @@ -213,6 +213,14 @@ class WP_WXR_Reader implements Iterator { */ private $last_comment_id = null; + /** + * The ID of the last processed term. + * + * @since WP_VERSION + * @var int|null + */ + private $last_term_id = null; + /** * Buffer for accumulating text content between tags. * @@ -328,6 +336,13 @@ class WP_WXR_Reader implements Iterator { 'wp:term_name' => 'name', ), ), + 'wp:termmeta' => array( + 'type' => 'term_meta', + 'fields' => array( + 'wp:meta_key' => 'meta_key', + 'wp:meta_value' => 'meta_value', + ), + ), 'wp:tag' => array( 'type' => 'tag', 'fields' => array( @@ -369,6 +384,7 @@ public static function create( WP_Byte_Reader $upstream = null, $cursor = null ) if ( null !== $cursor ) { $reader->last_post_id = $cursor['last_post_id']; $reader->last_comment_id = $cursor['last_comment_id']; + $reader->last_term_id = $cursor['last_term_id']; } if ( null !== $upstream ) { $reader->connect_upstream( $upstream ); @@ -418,6 +434,7 @@ public function get_reentrancy_cursor() { 'upstream' => $this->last_xml_byte_offset_outside_of_entity, 'last_post_id' => $this->last_post_id, 'last_comment_id' => $this->last_comment_id, + 'last_term_id' => $this->last_term_id, ) ); } @@ -478,6 +495,17 @@ public function get_last_comment_id() { return $this->last_comment_id; } + /** + * Gets the ID of the last processed term. + * + * @since WP_VERSION + * + * @return int|null The term ID, or null if no terms have been processed. + */ + public function get_last_term_id() { + return $this->last_term_id; + } + /** * Appends bytes to the input stream. * @@ -872,8 +900,12 @@ private function emit_entity() { $this->entity_data['comment_id'] = $this->last_comment_id; } elseif ( $this->entity_type === 'tag' ) { $this->entity_data['taxonomy'] = 'post_tag'; + $this->last_term_id = $this->entity_data['term_id']; } elseif ( $this->entity_type === 'category' ) { $this->entity_data['taxonomy'] = 'category'; + $this->last_term_id = $this->entity_data['term_id']; + } elseif ( $this->entity_type === 'term_meta' ) { + $this->entity_data['term_id'] = $this->last_term_id; } $this->entity_finished = true; ++$this->entities_read_so_far; diff --git a/packages/playground/data-liberation/src/wxr/WXR_Importer.php b/packages/playground/data-liberation/src/wxr/WXR_Importer.php index 1f67973efb..ee7eb8ee49 100644 --- a/packages/playground/data-liberation/src/wxr/WXR_Importer.php +++ b/packages/playground/data-liberation/src/wxr/WXR_Importer.php @@ -1678,6 +1678,21 @@ protected function parse_term_node( $node, $type = 'term' ) { continue; } + switch ( $child->tagName ) { + case 'wp:termmeta': + $meta_item = $this->parse_meta_node( $child ); + if ( ! empty( $meta_item ) ) { + $meta[] = $meta_item; + } + break; + default: + // All other tags are term data + $key = array_search( $child->tagName, $tag_name ); + if ( $key ) { + $data[ $key ] = $child->textContent; + } + } + $key = array_search( $child->tagName, $tag_name ); if ( $key ) { $data[ $key ] = $child->textContent; diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 9da933e0cb..e37933bc2f 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -128,7 +128,7 @@ public function test_small_import() { $this->assertSame( 0, $post->post_parent ); $cats = wp_get_post_categories( $post->ID ); $this->assertCount( 1, $cats ); - //$this->assertTrue( has_post_format( 'aside', $post->ID ) ); + // $this->assertTrue( has_post_format( 'aside', $post->ID ) ); $post = $posts[2]; $this->assertSame( 'Top-level Foo', $post->post_title ); @@ -331,7 +331,7 @@ public function test_serialized_postmeta_with_slashes() { * * @see https://github.com/WordPress/wordpress-importer/blob/master/phpunit/tests/term-meta.php */ - public function _not_test_serialized_term_meta() { + public function _no_test_serialized_term_meta() { register_taxonomy( 'custom_taxonomy', array( 'post' ) ); $this->import_wxr_file( __DIR__ . '/wxr/test-serialized-term-meta.xml' ); From 24ae4021493dd35072ce44a0d105b377c481df0a Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Thu, 12 Dec 2024 11:24:44 +0100 Subject: [PATCH 48/51] Add comment --- .../src/import/WP_Topological_Sorter.php | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 3778f8af80..b9e8166e7b 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -1,9 +1,22 @@ prepare( 'CREATE TABLE IF NOT EXISTS %i ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, @@ -122,6 +147,7 @@ public static function activate() { element_id text NOT NULL, mapped_id text DEFAULT NULL, parent_id text DEFAULT NULL, + additional_id text DEFAULT NULL, byte_offset bigint(20) unsigned NOT NULL, sort_order int DEFAULT 1, PRIMARY KEY (id), From 416b2946fe5a49ec0de5de244ed220fb6c47c25f Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Thu, 12 Dec 2024 11:29:50 +0100 Subject: [PATCH 49/51] Rename "elements" to "entities" to match name convention --- .../src/import/WP_Stream_Importer.php | 2 +- .../src/import/WP_Topological_Sorter.php | 144 +++++++++--------- 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 9696c6c92e..e4ce0577d8 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -451,7 +451,7 @@ private function topological_sort_next_entity( $count = 10000 ) { $entity = $this->entity_iterator->current(); $data = $entity->get_data(); // $offset = $this->entity_iterator->get_last_xml_byte_offset_outside_of_entity(); - $this->topological_sorter->map_element( $entity->get_type(), $data ); + $this->topological_sorter->map_entity( $entity->get_type(), $data ); $this->entity_iterator->next(); } diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index b9e8166e7b..10044f0995 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -2,8 +2,8 @@ /** * The topological sorter class. We create a custom table that contains the WXR - * IDs and the mapped IDs. Everytime an element is processed, we add it to the - * table. The first time we process an element, it is mapped to the original ID + * IDs and the mapped IDs. Everytime an entity is processed, we add it to the + * table. The first time we process an entity, it is mapped to the original ID * and no mapped ID. From the second time, it is mapped to the mapped ID. * * When the WP_Entity_Importer class read raw data from the source stream it @@ -11,7 +11,7 @@ * to map the original IDs to the mapped IDs. This can change in the future and * have the entity importer call the sorter directly. * - * The first STAGE_TOPOLOGICAL_SORT stage do save all the elements with no mapped + * The first STAGE_TOPOLOGICAL_SORT stage do save all the entities with no mapped * IDs. So during the STAGE_IMPORT_ENTITIES step the WP_Entity_Importer class * read already inserted data and save them. From that moment all the entities * have the IDs created using wp_insert_post(), wp_insert_comment(), @@ -127,24 +127,24 @@ public static function activate() { $max_index_length = 191; /** - * This is a table used to map the IDs of the imported elements. It is used to map all the IDs of the elements. + * This is a table used to map the IDs of the imported entities. It is used to map all the IDs of the entities. * - * @param int $id The ID of the element. + * @param int $id The ID of the entity. * @param int $session_id The current session ID. - * @param int $element_type The type of the element, comment, comment_meta, post, post_meta, term, or term_meta. - * @param string $element_id The ID of the element before the import. - * @param string $mapped_id The mapped ID of the element after the import. - * @param string $parent_id The parent ID of the element. - * @param string $additional_id The additional ID of the element. Used for comments and terms. Comments have a comment_parent, and the post. - * @param int $byte_offset The byte offset of the element inside the WXR file. Not used now. - * @param int $sort_order The sort order of the element. Not used now. + * @param int $entity_type The type of the entity, comment, comment_meta, post, post_meta, term, or term_meta. + * @param string $entity_id The ID of the entity before the import. + * @param string $mapped_id The mapped ID of the entity after the import. + * @param string $parent_id The parent ID of the entity. + * @param string $additional_id The additional ID of the entity. Used for comments and terms. Comments have a comment_parent, and the post. + * @param int $byte_offset The byte offset of the entity inside the WXR file. Not used now. + * @param int $sort_order The sort order of the entity. Not used now. */ $sql = $wpdb->prepare( 'CREATE TABLE IF NOT EXISTS %i ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, session_id bigint(20) unsigned NOT NULL, - element_type tinyint(1) NOT NULL, - element_id text NOT NULL, + entity_type tinyint(1) NOT NULL, + entity_id text NOT NULL, mapped_id text DEFAULT NULL, parent_id text DEFAULT NULL, additional_id text DEFAULT NULL, @@ -152,7 +152,7 @@ public static function activate() { sort_order int DEFAULT 1, PRIMARY KEY (id), KEY session_id (session_id), - KEY element_id (element_id(%d)), + KEY entity_id (entity_id(%d)), KEY parent_id (parent_id(%d)), KEY byte_offset (byte_offset) ) ' . $wpdb->get_charset_collate(), @@ -207,8 +207,8 @@ public function delete_session( $session_id ) { * object with the mapped IDs. * * @param array $data The data to map. - * @param int|null $id The ID of the element. - * @param int|null $additional_id The additional ID of the element. + * @param int|null $id The ID of the entity. + * @param int|null $additional_id The additional ID of the entity. */ public function filter_wxr_importer_pre_process( $data, $id = null, $additional_id = null ) { $current_session = $this->current_session; @@ -232,16 +232,16 @@ public function filter_wxr_importer_pre_process( $data, $id = null, $additional_ return false; } - return $this->get_mapped_element( $types[ $current_filter ], $data, $id, $additional_id ); + return $this->get_mapped_entity( $types[ $current_filter ], $data, $id, $additional_id ); } /** * Called by 'wxr_importer_processed_*' actions. This adds the entity to the * sorter table. * - * @param int|null $id The ID of the element. + * @param int|null $id The ID of the entity. * @param array $data The data to map. - * @param int|null $additional_id The additional ID of the element. + * @param int|null $additional_id The additional ID of the entity. */ public function action_wxr_importer_processed( $id, $data, $additional_id = null ) { $current_filter = current_action(); @@ -264,123 +264,123 @@ public function action_wxr_importer_processed( $id, $data, $additional_id = null return false; } - $this->map_element( $types[ $current_filter ], $data, $id, $additional_id ); + $this->map_entity( $types[ $current_filter ], $data, $id, $additional_id ); } /** - * Map an element to the index. If $id is provided, it will be used to map the element. + * Map an entity to the index. If $id is provided, it will be used to map the entity. * - * @param string $element_type The type of the element. + * @param string $entity_type The type of the entity. * @param array $data The data to map. - * @param int|null $id The ID of the element. - * @param int|null $additional_id The additional ID of the element. + * @param int|null $id The ID of the entity. + * @param int|null $additional_id The additional ID of the entity. */ - public function map_element( $element_type, $data, $id = null, $additional_id = null ) { + public function map_entity( $entity_type, $data, $id = null, $additional_id = null ) { global $wpdb; - if ( ! array_key_exists( $element_type, self::ENTITY_TYPES ) ) { + if ( ! array_key_exists( $entity_type, self::ENTITY_TYPES ) ) { return; } - $new_element = array( + $new_entity = array( 'session_id' => $this->current_session, - 'element_type' => self::ENTITY_TYPES[ $element_type ], - 'element_id' => null, + 'entity_type' => self::ENTITY_TYPES[ $entity_type ], + 'entity_id' => null, 'mapped_id' => is_null( $id ) ? null : (string) $id, 'parent_id' => null, 'byte_offset' => 0, // Items with a parent has at least a sort order of 2. 'sort_order' => 1, ); - $element_id = null; + $entity_id = null; - switch ( $element_type ) { + switch ( $entity_type ) { case 'comment': - $element_id = (string) $data['comment_id']; + $entity_id = (string) $data['comment_id']; break; case 'comment_meta': - $element_id = (string) $data['meta_key']; + $entity_id = (string) $data['meta_key']; if ( array_key_exists( 'comment_id', $data ) ) { - $new_element['parent_id'] = $data['comment_id']; + $new_entity['parent_id'] = $data['comment_id']; } break; case 'post': if ( 'post' === $data['post_type'] || 'page' === $data['post_type'] ) { if ( array_key_exists( 'post_parent', $data ) && '0' !== $data['post_parent'] ) { - $new_element['parent_id'] = $data['post_parent']; + $new_entity['parent_id'] = $data['post_parent']; } } - $element_id = (string) $data['post_id']; + $entity_id = (string) $data['post_id']; break; case 'post_meta': - $element_id = (string) $data['meta_key']; + $entity_id = (string) $data['meta_key']; if ( array_key_exists( 'post_id', $data ) ) { - $new_element['parent_id'] = $data['post_id']; + $new_entity['parent_id'] = $data['post_id']; } break; case 'term_meta': - $element_id = (string) $data['meta_key']; + $entity_id = (string) $data['meta_key']; if ( array_key_exists( 'term_id', $data ) ) { - $new_element['parent_id'] = $data['term_id']; + $new_entity['parent_id'] = $data['term_id']; } break; case 'term': - $element_id = (string) $data['term_id']; + $entity_id = (string) $data['term_id']; if ( array_key_exists( 'parent', $data ) ) { - $new_element['parent_id'] = $data['parent']; + $new_entity['parent_id'] = $data['parent']; } break; } - // The element has been imported, so we can use the ID. + // The entity has been imported, so we can use the ID. if ( $id ) { - $existing_element = $this->get_mapped_ids( $element_id, self::ENTITY_TYPES[ $element_type ] ); + $existing_entity = $this->get_mapped_ids( $entity_id, self::ENTITY_TYPES[ $entity_type ] ); - if ( $existing_element && is_null( $existing_element['mapped_id'] ) ) { - $new_element['mapped_id'] = (string) $id; + if ( $existing_entity && is_null( $existing_entity['mapped_id'] ) ) { + $new_entity['mapped_id'] = (string) $id; - // Update the element if it already exists. + // Update the entity if it already exists. $wpdb->update( self::get_table_name(), array( 'mapped_id' => (string) $id ), array( - 'element_id' => (string) $element_id, - 'element_type' => self::ENTITY_TYPES[ $element_type ], + 'entity_id' => (string) $entity_id, + 'entity_type' => self::ENTITY_TYPES[ $entity_type ], ), array( '%s' ) ); } } else { - // Insert the element if it doesn't exist. - $new_element['element_id'] = $element_id; - $wpdb->insert( self::get_table_name(), $new_element ); + // Insert the entity if it doesn't exist. + $new_entity['entity_id'] = $entity_id; + $wpdb->insert( self::get_table_name(), $new_entity ); } } /** - * Get a mapped element. Called from 'wxr_importer_pre_process_*' filter. + * Get a mapped entity. Called from 'wxr_importer_pre_process_*' filter. * * @param int $entity The entity to get the mapped ID for. - * @param int $id The ID of the element. + * @param int $id The ID of the entity. * - * @return mixed|bool The mapped element or false if the post is not found. + * @return mixed|bool The mapped entity or false if the post is not found. */ - public function get_mapped_element( $element_type, $element, $id, $additional_id = null ) { + public function get_mapped_entity( $entity_type, $entity, $id, $additional_id = null ) { $current_session = $this->current_session; $already_mapped = false; - switch ( $element_type ) { + switch ( $entity_type ) { case 'comment': // The ID is the post ID. $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['post'] ); if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { - $element['comment_post_ID'] = $mapped_ids['mapped_id']; + $entity['comment_post_ID'] = $mapped_ids['mapped_id']; } break; case 'comment_meta': @@ -388,7 +388,7 @@ public function get_mapped_element( $element_type, $element, $id, $additional_id $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['comment'] ); if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { - $element['comment_id'] = $mapped_ids['mapped_id']; + $entity['comment_id'] = $mapped_ids['mapped_id']; } break; case 'post': @@ -396,13 +396,13 @@ public function get_mapped_element( $element_type, $element, $id, $additional_id $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['post'] ); if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { - $element['post_parent'] = $mapped_ids['mapped_id']; + $entity['post_parent'] = $mapped_ids['mapped_id']; } - $mapped_ids = $this->get_mapped_ids( $element['post_id'], self::ENTITY_TYPES['post'] ); + $mapped_ids = $this->get_mapped_ids( $entity['post_id'], self::ENTITY_TYPES['post'] ); if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { - $element['post_id'] = $mapped_ids['mapped_id']; + $entity['post_id'] = $mapped_ids['mapped_id']; $already_mapped = true; } break; @@ -411,7 +411,7 @@ public function get_mapped_element( $element_type, $element, $id, $additional_id $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['post'] ); if ( $mapped_ids ) { - $element['post_id'] = $mapped_ids['mapped_id']; + $entity['post_id'] = $mapped_ids['mapped_id']; } break; case 'term': @@ -422,26 +422,26 @@ public function get_mapped_element( $element_type, $element, $id, $additional_id $mapped_ids = $this->get_mapped_ids( $id, self::ENTITY_TYPES['term'] ); if ( $mapped_ids && ! is_null( $mapped_ids['mapped_id'] ) ) { - $element['term_id'] = $mapped_ids['mapped_id']; + $entity['term_id'] = $mapped_ids['mapped_id']; } break; } if ( $already_mapped ) { // This is used to skip the post if it has already been mapped. - $element['_already_mapped'] = true; + $entity['_already_mapped'] = true; } - return $element; + return $entity; } /** - * Get the mapped ID for an element. + * Get the mapped ID for an entity. * - * @param int $id The ID of the element. - * @param int $type The type of the element. + * @param int $id The ID of the entity. + * @param int $type The type of the entity. * - * @return int|false The mapped ID or null if the element is not found. + * @return int|false The mapped ID or null if the entity is not found. */ private function get_mapped_ids( $id, $type ) { global $wpdb; @@ -452,7 +452,7 @@ private function get_mapped_ids( $id, $type ) { $results = $wpdb->get_results( $wpdb->prepare( - 'SELECT element_id, mapped_id FROM %i WHERE element_id = %s AND element_type = %d LIMIT 1', + 'SELECT entity_id, mapped_id FROM %i WHERE entity_id = %s AND entity_type = %d LIMIT 1', self::get_table_name(), (string) $id, $type From b631a8a7fabc8d5f50816e1409b35a24c928046d Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Thu, 12 Dec 2024 11:52:12 +0100 Subject: [PATCH 50/51] Remove filters and actions and move mapping to WP_Entity_Importer --- .../src/import/WP_Entity_Importer.php | 23 ++++ .../src/import/WP_Topological_Sorter.php | 109 +----------------- 2 files changed, 24 insertions(+), 108 deletions(-) diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index 86057bca0b..a2386ff4db 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -67,6 +67,11 @@ class=[\'"].*?\b(wp-image-\d+|attachment-[\w\-]+)\b protected $url_remap = array(); protected $featured_images = array(); + /** + * @var WP_Topological_Sorter + */ + private $topological_sorter; + /** * Constructor * @@ -106,6 +111,9 @@ public function __construct( $options = array() ) { 'default_author' => null, ) ); + + WP_Topological_Sorter::activate(); + $this->topological_sorter = new WP_Topological_Sorter( $this->options ); } public function import_entity( WP_Imported_Entity $entity ) { @@ -257,6 +265,7 @@ public function import_user( $data ) { * @param array $userdata Raw data imported for the user. */ do_action( 'wxr_importer_processed_user', $user_id, $userdata ); + // $this->topological_sorter->map_entity( 'user', $userdata, $user_id ); } public function import_term( $data ) { @@ -267,6 +276,7 @@ public function import_term( $data ) { * @param array $meta Meta data. */ $data = apply_filters( 'wxr_importer_pre_process_term', $data ); + $data = $this->topological_sorter->get_mapped_entity( 'term', $data ); if ( empty( $data ) ) { return false; } @@ -412,6 +422,7 @@ public function import_term( $data ) { * @param array $data Raw data imported for the term. */ do_action( 'wxr_importer_processed_term', $term_id, $data ); + $this->topological_sorter->map_entity( 'term', $data, $term_id ); } public function import_term_meta( $meta_item, $term_id ) { @@ -426,6 +437,7 @@ public function import_term_meta( $meta_item, $term_id ) { * @param int $term_id Term the meta is attached to. */ $meta_item = apply_filters( 'wxr_importer_pre_process_term_meta', $meta_item, $term_id ); + $meta_item = $this->topological_sorter->get_mapped_entity( 'term_meta', $meta_item, $term_id ); if ( empty( $meta_item ) ) { return false; } @@ -443,7 +455,9 @@ public function import_term_meta( $meta_item, $term_id ) { $value = maybe_unserialize( $meta_item['meta_value'] ); $term_meta_id = add_term_meta( $meta_item['term_id'], wp_slash( $meta_item['meta_key'] ), wp_slash_strings_only( $value ) ); + do_action( 'wxr_importer_processed_term_meta', $term_meta_id, $meta_item, $meta_item['term_id'] ); + $this->topological_sorter->map_entity( 'term_meta', $meta_item, $term_meta_id, $meta_item['term_id'] ); } /** @@ -513,6 +527,7 @@ public function import_post( $data ) { * @param array $terms Terms on the post. */ $data = apply_filters( 'wxr_importer_pre_process_post', $data, $parent_id ); + $data = $this->topological_sorter->get_mapped_entity( 'post', $data, $parent_id ); if ( empty( $data ) ) { $this->logger->debug( 'Skipping post, empty data' ); return false; @@ -708,6 +723,7 @@ public function import_post( $data ) { * @param array $terms Raw term data, already processed. */ do_action( 'wxr_importer_processed_post', $post_id, $data ); + $this->topological_sorter->map_entity( 'post', $data, $post_id ); return $post_id; } @@ -941,6 +957,7 @@ public function import_post_meta( $meta_item, $post_id ) { * @param int $post_id Post the meta is attached to. */ $meta_item = apply_filters( 'wxr_importer_pre_process_post_meta', $meta_item, $post_id ); + $meta_item = $this->topological_sorter->get_mapped_entity( 'post_meta', $meta_item, $post_id ); if ( empty( $meta_item ) ) { return false; } @@ -975,6 +992,8 @@ public function import_post_meta( $meta_item, $post_id ) { } do_action( 'wxr_importer_processed_post_meta', $post_id, $meta_item ); + // @TODO: Check if post_id as ID is correct + $this->topological_sorter->map_entity( 'post_meta', $meta_item, $post_id ); return true; } @@ -1007,6 +1026,7 @@ public function import_comment( $comment, $post_id, $post_just_imported = false * @param int $post_id Post the comment is attached to. */ $comment = apply_filters( 'wxr_importer_pre_process_comment', $comment, $post_id, $parent_id ); + $comment = $this->topological_sorter->get_mapped_entity( 'comment', $comment, $post_id, $parent_id ); if ( empty( $comment ) ) { return false; } @@ -1113,10 +1133,12 @@ public function import_comment( $comment, $post_id, $post_just_imported = false * @param array $post_id Parent post ID. */ do_action( 'wxr_importer_processed_comment', $comment_id, $comment, $post_id ); + $this->topological_sorter->map_entity( 'comment', $comment, $comment_id, $post_id ); } public function import_comment_meta( $meta_item, $comment_id ) { $meta_item = apply_filters( 'wxr_importer_pre_process_comment_meta', $meta_item, $comment_id ); + $meta_item = $this->topological_sorter->get_mapped_entity( 'comment_meta', $meta_item, $comment_id ); if ( empty( $meta_item ) ) { return false; } @@ -1130,6 +1152,7 @@ public function import_comment_meta( $meta_item, $comment_id ) { $comment_meta_id = add_comment_meta( $meta_item['comment_id'], wp_slash( $meta_item['meta_key'] ), wp_slash( $value ) ); do_action( 'wxr_importer_processed_comment_meta', $comment_meta_id, $meta_item, $meta_item['comment_id'] ); + $this->topological_sorter->map_entity( 'comment_meta', $meta_item, $comment_meta_id, $meta_item['comment_id'] ); } /** diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index 10044f0995..a8348907ac 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -54,26 +54,6 @@ class WP_Topological_Sorter { 'term_meta' => 6, ); - private $mapped_pre_filters = array( - // Name of the filter, and the number of arguments it accepts. - 'wxr_importer_pre_process_comment' => 3, - 'wxr_importer_pre_process_comment_meta' => 2, - 'wxr_importer_pre_process_post' => 2, - 'wxr_importer_pre_process_post_meta' => 2, - 'wxr_importer_pre_process_term' => 1, - 'wxr_importer_pre_process_term_meta' => 2, - ); - - private $mapped_post_actions = array( - // Name of the filter, and the number of arguments it accepts. - 'wxr_importer_processed_comment' => 3, - 'wxr_importer_processed_comment_meta' => 3, - 'wxr_importer_processed_post' => 2, - 'wxr_importer_processed_post_meta' => 2, - 'wxr_importer_processed_term' => 2, - 'wxr_importer_processed_term_meta' => 3, - ); - /** * Set the current session ID and add the filters and actions. */ @@ -81,28 +61,6 @@ public function __construct( $options = array() ) { if ( array_key_exists( 'session_id', $options ) ) { $this->current_session = $options['session_id']; } - - // The topological sorter needs to know about the mapped IDs for comments, terms, and posts. - foreach ( $this->mapped_pre_filters as $name => $accepted_args ) { - add_filter( $name, array( $this, 'filter_wxr_importer_pre_process' ), 10, $accepted_args ); - } - - foreach ( $this->mapped_post_actions as $name => $accepted_args ) { - add_action( $name, array( $this, 'action_wxr_importer_processed' ), 10, $accepted_args ); - } - } - - /** - * Remove the filters. - */ - public function __destruct() { - foreach ( $this->mapped_pre_filters as $name => $accepted_args ) { - remove_filter( $name, array( $this, 'filter_wxr_importer_pre_process' ) ); - } - - foreach ( $this->mapped_post_actions as $name => $accepted_args ) { - remove_action( $name, array( $this, 'action_wxr_importer_processed' ) ); - } } /** @@ -202,71 +160,6 @@ public function delete_session( $session_id ) { ); } - /** - * Called by 'wxr_importer_pre_process_*' filters. This populates the entity - * object with the mapped IDs. - * - * @param array $data The data to map. - * @param int|null $id The ID of the entity. - * @param int|null $additional_id The additional ID of the entity. - */ - public function filter_wxr_importer_pre_process( $data, $id = null, $additional_id = null ) { - $current_session = $this->current_session; - $current_filter = current_filter(); - $types = array( - 'wxr_importer_pre_process_comment' => 'comment', - 'wxr_importer_pre_process_comment_meta' => 'comment_meta', - 'wxr_importer_pre_process_post' => 'post', - 'wxr_importer_pre_process_post_meta' => 'post_meta', - 'wxr_importer_pre_process_term' => 'term', - 'wxr_importer_pre_process_term_meta' => 'term_meta', - ); - - if ( ! $current_filter || ! array_key_exists( $current_filter, $types ) ) { - _doing_it_wrong( - __METHOD__, - 'This method should be called by the wxr_importer_pre_process_* filters.', - '1.0.0' - ); - - return false; - } - - return $this->get_mapped_entity( $types[ $current_filter ], $data, $id, $additional_id ); - } - - /** - * Called by 'wxr_importer_processed_*' actions. This adds the entity to the - * sorter table. - * - * @param int|null $id The ID of the entity. - * @param array $data The data to map. - * @param int|null $additional_id The additional ID of the entity. - */ - public function action_wxr_importer_processed( $id, $data, $additional_id = null ) { - $current_filter = current_action(); - $types = array( - 'wxr_importer_processed_comment' => 'comment', - 'wxr_importer_processed_comment_meta' => 'comment_meta', - 'wxr_importer_processed_post' => 'post', - 'wxr_importer_processed_post_meta' => 'post_meta', - 'wxr_importer_processed_term' => 'term', - 'wxr_importer_processed_term_meta' => 'term_meta', - ); - - if ( ! $current_filter || ! array_key_exists( $current_filter, $types ) ) { - _doing_it_wrong( - __METHOD__, - 'This method should be called by the wxr_importer_processed_* filters.', - '1.0.0' - ); - - return false; - } - - $this->map_entity( $types[ $current_filter ], $data, $id, $additional_id ); - } - /** * Map an entity to the index. If $id is provided, it will be used to map the entity. * @@ -370,7 +263,7 @@ public function map_entity( $entity_type, $data, $id = null, $additional_id = nu * * @return mixed|bool The mapped entity or false if the post is not found. */ - public function get_mapped_entity( $entity_type, $entity, $id, $additional_id = null ) { + public function get_mapped_entity( $entity_type, $entity, $id = null, $additional_id = null ) { $current_session = $this->current_session; $already_mapped = false; From b86295d38da45cb254737e2b8c7985364e2eb82a Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Fri, 13 Dec 2024 14:10:39 +0100 Subject: [PATCH 51/51] Fix: remove NOT NULL --- .../data-liberation/src/import/WP_Topological_Sorter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index a8348907ac..6d349585d2 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -100,7 +100,7 @@ public static function activate() { $sql = $wpdb->prepare( 'CREATE TABLE IF NOT EXISTS %i ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, - session_id bigint(20) unsigned NOT NULL, + session_id bigint(20) unsigned, entity_type tinyint(1) NOT NULL, entity_id text NOT NULL, mapped_id text DEFAULT NULL,