Skip to content

Commit

Permalink
fix(slb-495): post import plugin updates
Browse files Browse the repository at this point in the history
  • Loading branch information
dspachos committed Dec 20, 2024
1 parent 9f23693 commit c524171
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 8 deletions.
2 changes: 1 addition & 1 deletion apps/converter/htmlToMarkdown.js
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ export async function htmlToMarkdown(url) {

return {
markdownPath: mdPath,
warnings: warnings, // You could add warnings for failed image downloads etc.
warnings: warnings,
outputDir,
};
}
1 change: 0 additions & 1 deletion apps/converter/wordToMarkdown.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ export async function wordToMarkdown(filePath) {
}

const folderName = generateFolderName(filePath);
// const outputDir = path.join(__dirname, folderName);
const outputDir = path.join(__dirname, folderName);
const imagesDir = path.join(outputDir, 'images');

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
services:
silverback_ai_import.content:
class: Drupal\silverback_ai_import\ContentImportAiService
arguments: ['@current_route_match', '@current_user', '@entity_type.manager', '@logger.factory', '@config.factory', '@silverback_ai.openai_http_client', '@plugin.manager.ai.import']
arguments: ['@current_route_match', '@current_user', '@entity_type.manager', '@logger.factory', '@config.factory', '@silverback_ai.openai_http_client', '@plugin.manager.ai.import', '@plugin.manager.ai.post.import']
silverback_ai_import.batch.import:
class: 'Drupal\silverback_ai_import\ContentImportBatch'
arguments:
- '@logger.factory'
plugin.manager.ai.import:
class: Drupal\silverback_ai_import\AiImportPluginManager
arguments: ['@container.namespaces', '@cache.default', '@module_handler']
plugin.manager.ai.post.import:
class: Drupal\silverback_ai_import\AiPostImportPluginManager
arguments: ['@container.namespaces', '@cache.default', '@module_handler']
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?php

namespace Drupal\silverback_ai_import;

use Drupal\Core\Cache\CacheBackendInterface;
use Drupal\Core\Extension\ModuleHandlerInterface;
use Drupal\Core\Plugin\DefaultPluginManager;

/**
* Manages sandwich plugins.
*/
class AiPostImportPluginManager extends DefaultPluginManager {

/**
* Creates the discovery object.
*
* @param \Traversable $namespaces
* An object that implements \Traversable which contains the root paths
* keyed by the corresponding namespace to look for plugin implementations.
* @param \Drupal\Core\Cache\CacheBackendInterface $cache_backend
* Cache backend instance to use.
* @param \Drupal\Core\Extension\ModuleHandlerInterface $module_handler
* The module handler to invoke the alter hook with.
*/
public function __construct(\Traversable $namespaces, CacheBackendInterface $cache_backend, ModuleHandlerInterface $module_handler) {
$subdir = 'Plugin/AiPostImport';
$plugin_interface = 'Drupal\silverback_ai_import\AiPostImportPluginManagerInterface';
$plugin_definition_annotation_name = 'Drupal\Component\Annotation\Plugin';
parent::__construct($subdir, $namespaces, $module_handler, $plugin_interface, $plugin_definition_annotation_name);
$this->alterInfo('ai_post_import_info');
$this->setCacheBackend($cache_backend, 'ai_post_import_info');
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

namespace Drupal\silverback_ai_import;

/**
* An interface for all Sandwich type plugins.
*/
interface AiPostImportPluginManagerInterface {

/**
* If the current plugin matches the given chunk.
*
* @return array
* An array of chunks.
*/
public function convert(array $chunks);

}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public function __construct(
private readonly ConfigFactoryInterface $configFactory,
private readonly OpenAiHttpClient $silverbackAiOpenaiHttpClient,
private readonly AiImportPluginManager $pluginManager,
private readonly AiPostImportPluginManager $pluginManagerPost,
) {}

/**
Expand All @@ -56,7 +57,6 @@ public function __construct(
* When no appropriate plugin is found for the chunk.
*
* @see getPlugin()
* @see \Drupal\[module_name]\Plugin\ChunkConverterInterface::convert()
*/
public function processChunk($chunk) {
// Convert to array.
Expand Down Expand Up @@ -723,4 +723,25 @@ public function createFileEntityFromDropzoneData($file_data) {
return $file;
}

/**
*
*/
public function getPostImportPlugins() {
$definitions = $this->pluginManagerPost->getDefinitions();
$plugins = [];
foreach ($definitions as $definition) {
$plugins[] = $definition['id'];

}
return $plugins;
}

/**
*
*/
public function postProcessChunks($plugin_id, $chunks,) {
$plugin = $this->pluginManagerPost->createInstance($plugin_id);
return $plugin->convert($chunks);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,21 @@ public function create(array $chunks, $entity): void {
];
$batchBuilder->addOperation([ContentImportBatch::class, 'process'], [$batch]);
}

// @todo Add DI
$service = \Drupal::service('silverback_ai_import.content');
$post_import_plugins = $service->getPostImportPlugins();
foreach ($post_import_plugins as $plugin) {
$batch = [
'plugin_id' => $plugin,
'entity' => $entity,
'count' => $count++,
'total' => $total,
];
$batchBuilder->addOperation([ContentImportBatch::class, 'postProcess'], [$batch]);
}

// @todo Here, discover all post import plugins and add an operation at the end of this array.
batch_set($batchBuilder->toArray());
}

Expand All @@ -96,6 +111,25 @@ public static function process(array $batch, array &$context) {
]);
}

/**
* Batch operation callback.
*
* @param array $batch
* Information about batch (items, size, total, ...).
* @param array $context
* Batch context.
*/
public static function postProcess(array $batch, array &$context) {
$service = \Drupal::service('silverback_ai_import.content');
$processed_chunks = $service->postProcessChunks($batch['plugin_id'], $context['results']['content']);
$context['results']['content'] = $processed_chunks;
$processed = !empty($context['results']) ? count($context['results']) : $batch['count'];
$context['message'] = t('Processing chunk @processed/@total', [
'@processed' => $processed,
'@total' => $batch['total'],
]);
}

/**
* Finish batch.
*
Expand Down Expand Up @@ -129,7 +163,7 @@ public static function finish(bool $success, array $results, array $operations)
<!-- /wp:custom/content -->
EOD;

// @todo Surround with try catch here
// @todo Add post import process here
try {
$node->body->value = $content;
$node->save();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ public function buildForm(array $form, FormStateInterface $form_state): array {
'#description' => $this->t('<em>* Experimental, use with caution</em>'),
'#options' => [
'docx' => $this->t('Microsoft Word file'),
'url' => $this->t("Remote HTML page (*)"),
'pdf' => $this->t("PDF file (*)"),
'url' => $this->t("Remote web page (*)"),
],
'#default_value' => 'none',
'#required' => TRUE,
Expand Down Expand Up @@ -242,7 +242,6 @@ public function submitForm(array &$form, FormStateInterface $form_state): void {
$form_state->setRedirectUrl($entity->toUrl('edit-form'));
}
}

}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
*/
class ImageImportPlugin extends PluginBase implements AiImportPluginManagerInterface {

private const PUBLISHED = 1;

/**
* The schema to use.
*
Expand Down Expand Up @@ -181,7 +183,7 @@ public function createMediaImageFromPath($image_path, $media_bundle = 'image', $
'bundle' => $media_bundle,
'name' => $file->getFilename(),
'uid' => $user_id,
'status' => 1,
'status' => self::PUBLISHED,
'field_media_image' => [
'target_id' => $file->id(),
// @todo Improve alt text generation.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<?php

namespace Drupal\silverback_ai_import\Plugin\AiPostImport;

use Drupal\Core\Plugin\PluginBase;
use Drupal\silverback_ai_import\AiPostImportPluginManagerInterface;

/**
* Provides a default gutenberg block converter plugin.
*
* @Plugin(
* id = "ai_empty_chunks",
* label = @Translation("Filters empty chunks"),
* weight = 0,
* )
*/
class EmptyChunksRemovePostImportPlugin extends PluginBase implements AiPostImportPluginManagerInterface {

/**
* Constructs a \Drupal\Component\Plugin\PluginBase object.
*
* @param array $configuration
* A configuration array containing information about the plugin instance.
* @param string $plugin_id
* The plugin ID for the plugin instance.
* @param mixed $plugin_definition
* The plugin implementation definition.
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition) {
$this->configuration = $configuration;
$this->pluginId = $plugin_id;
$this->pluginDefinition = $plugin_definition;
}

/**
* Get a description if the plugin.
*/
public function description() {
return $this->t('Filters empty chunks.');
}

/**
* {@inheritDoc}
*/
public function convert(array $chunks) {
$chunks = array_filter($chunks, function ($item) {
if (str_starts_with(trim($item), '<!-- wp:paragraph -->')) {
$cleaned = $cleaned = preg_replace('/[\r\n]+/', '', strip_tags($item));
return strlen($cleaned) > 0;
}
return TRUE;
});
return $chunks;
}

}

0 comments on commit c524171

Please sign in to comment.