From 0937b812b6cbcc8962e88921172921bb44004114 Mon Sep 17 00:00:00 2001 From: GuySartorelli <36352093+GuySartorelli@users.noreply.github.com> Date: Wed, 7 Jul 2021 14:14:44 +1200 Subject: [PATCH] ENH: Provide more control over elemental block indexing. Provides a new configuration variable to exclude specific elemental block classes from being indexed in search. Provides a new extension point for modifying exactly what gets indexed for each block. Provides a new configuration variable to define the delimiter used between blocks in the search index. A single space delimiter is non-intrusive and will not require any changes within existing projects to avoid changes to the way content is indexed - but providing an option for configuring the delimiter can help avoid false-positive results in phrase queries. --- docs/en/searching-blocks.md | 23 ++++++++++++++ src/Extensions/ElementalPageExtension.php | 23 ++++++++++++-- src/Models/BaseElement.php | 37 +++++++++++++++++++++++ 3 files changed, 80 insertions(+), 3 deletions(-) diff --git a/docs/en/searching-blocks.md b/docs/en/searching-blocks.md index 07fb0c80f..981517070 100644 --- a/docs/en/searching-blocks.md +++ b/docs/en/searching-blocks.md @@ -29,4 +29,27 @@ You can disable it via YAML config in favour of your own index definition: SilverStripe\FullTextSearch\Search\FullTextSearch: indexes: - MyCustomIndex +``` + +You can define whether each block is included in your search index using the +`search_indexable` configuration variable, which is `true` by default: + +```yml +App\Models\MyCustomElementalBlock: + search_indexable: false +``` + +You can also customise the content that is indexed for your blocks. By default +the block is rendered in full using the templating engine, and the resultant +markup is included in the index. You can override the `getContentForSearchIndex` +method on your elemental blocks to change that. This is useful, for example, if +your templates include hardcoded text or references to other content you don't +want to be indexed. + +If you want to use a specific delimiter between each block, that can be configured +as well. The default is a space, but you might for example want to use an ellipses +to make it clear in search results where one piece of content ends and another begins. +```yml +DNADesign\Elemental\Extensions\ElementalPageExtension: + search_index_element_delimiter: ' ... ' ``` \ No newline at end of file diff --git a/src/Extensions/ElementalPageExtension.php b/src/Extensions/ElementalPageExtension.php index 8f4ef92d8..35e50b1bb 100644 --- a/src/Extensions/ElementalPageExtension.php +++ b/src/Extensions/ElementalPageExtension.php @@ -25,6 +25,17 @@ class ElementalPageExtension extends ElementalAreasExtension 'ElementalArea', ]; + /** + * The delimiter to separate distinct elements in indexed content. + * + * When using the getElementsForSearch() method to index all elements in a single field, + * a custom delimiter can be used help to avoid false positive results for phrase queries. + * + * @config + * @var string + */ + private static $search_index_element_delimiter = ' '; + /** * Returns the contents of each ElementalArea has_one's markup for use in Solr or Elastic search indexing * @@ -43,8 +54,14 @@ public function getElementsForSearch() /** @var ElementalArea $area */ $area = $this->owner->$key(); if ($area) { - // Replace HTML tags with spaces - $output[] = strip_tags(str_replace('<', ' <', $area->forTemplate())); + foreach ($area->Elements() as $element) { + if ($element->getSearchIndexable()) { + $content = $element->getContentForSearchIndex(); + if ($content) { + $output[] = $content; + } + } + } } } } finally { @@ -53,7 +70,7 @@ public function getElementsForSearch() // CMS layout can break on the response. (SilverStripe 4.1.1) SSViewer::set_themes($oldThemes); } - return implode($output); + return implode($this->config()->get('search_index_element_delimiter'), $output); } public function MetaTags(&$tags) diff --git a/src/Models/BaseElement.php b/src/Models/BaseElement.php index 121e744af..21b6993e3 100644 --- a/src/Models/BaseElement.php +++ b/src/Models/BaseElement.php @@ -32,6 +32,7 @@ use SilverStripe\View\Parsers\URLSegmentFilter; use SilverStripe\View\Requirements; use SilverStripe\ORM\CMSPreviewable; +use SilverStripe\Core\Config\Config; /** * Class BaseElement @@ -174,6 +175,15 @@ class BaseElement extends DataObject implements CMSPreviewable */ private static $displays_title_in_template = true; + /** + * Determines whether a block should be indexable in search. + * + * @config + * @var boolean + * @see ElementalPageExtension::getElementsForSearch() + */ + private static $search_indexable = true; + /** * Store used anchor names, this is to avoid title clashes * when calling 'getAnchor' @@ -476,6 +486,33 @@ public function Top() return (Controller::has_curr()) ? Controller::curr() : null; } + /** + * Determines whether this elemental block is indexable in search. + * + * By default, this uses the configurable variable search_indexable, but + * this method can be overridden to provide more complex logic if required. + * + * @return boolean + */ + public function getSearchIndexable(): bool + { + return (bool)$this->config()->get('search_indexable', Config::UNINHERITED); + } + + /** + * Provides content to be indexed in search. + * + * @return string|null + */ + public function getContentForSearchIndex(): ?string + { + // Strips tags but be sure there's a space between words. + $content = trim(strip_tags(str_replace('<', ' <', $this->forTemplate()))); + // Allow projects to update indexable content of third-party elements. + $this->extend('updateContentForSearchIndex', $content); + return $content; + } + /** * Default way to render element in templates. Note that all blocks should * be rendered through their {@link ElementController} class as this