diff --git a/docs/en/searching-blocks.md b/docs/en/searching-blocks.md index 07fb0c80..35165c33 100644 --- a/docs/en/searching-blocks.md +++ b/docs/en/searching-blocks.md @@ -29,4 +29,27 @@ You can disable it via YAML config in favour of your own index definition: SilverStripe\FullTextSearch\Search\FullTextSearch: indexes: - MyCustomIndex +``` + +You can define whether each block is included in your search index using the +`search_indexable` configuration variable, which is `true` by default: + +```yml +App\Models\MyCustomElementalBlock: + search_indexable: false +``` + +You can also customise the content that is indexed for your blocks. By default +the block is rendered in full using the templating engine, and the resultant +markup is included in the index. You can override the `getContentForSearchIndex` +method on your elemental blocks to change that. This is useful, for example, if +your templates include hardcoded text or references to other content you don't +want to be indexed. + +If you want to use a specific delimiter between each block, that can be configured +as well. The default is a space, but you might for example want to use an ellipses +to make it clear in search results where one piece of content ends and another begins. +```yml +Page: + search_index_element_delimiter: ' ... ' ``` \ No newline at end of file diff --git a/src/Extensions/ElementalPageExtension.php b/src/Extensions/ElementalPageExtension.php index 8f4ef92d..34881dee 100644 --- a/src/Extensions/ElementalPageExtension.php +++ b/src/Extensions/ElementalPageExtension.php @@ -25,6 +25,17 @@ class ElementalPageExtension extends ElementalAreasExtension 'ElementalArea', ]; + /** + * The delimiter to separate distinct elements in indexed content. + * + * When using the getElementsForSearch() method to index all elements in a single field, + * a custom delimiter can be used help to avoid false positive results for phrase queries. + * + * @config + * @var string + */ + private static $search_index_element_delimiter = ' '; + /** * Returns the contents of each ElementalArea has_one's markup for use in Solr or Elastic search indexing * @@ -43,8 +54,14 @@ public function getElementsForSearch() /** @var ElementalArea $area */ $area = $this->owner->$key(); if ($area) { - // Replace HTML tags with spaces - $output[] = strip_tags(str_replace('<', ' <', $area->forTemplate())); + foreach ($area->Elements() as $element) { + if ($element->getSearchIndexable()) { + $content = $element->getContentForSearchIndex(); + if ($content) { + $output[] = $content; + } + } + } } } } finally { @@ -53,7 +70,7 @@ public function getElementsForSearch() // CMS layout can break on the response. (SilverStripe 4.1.1) SSViewer::set_themes($oldThemes); } - return implode($output); + return implode($this->owner->config()->get('search_index_element_delimiter'), $output); } public function MetaTags(&$tags) diff --git a/src/Models/BaseElement.php b/src/Models/BaseElement.php index 121e744a..ca3cfebc 100644 --- a/src/Models/BaseElement.php +++ b/src/Models/BaseElement.php @@ -174,6 +174,15 @@ class BaseElement extends DataObject implements CMSPreviewable */ private static $displays_title_in_template = true; + /** + * Determines whether a block should be indexable in search. + * + * @config + * @var boolean + * @see ElementalPageExtension::getElementsForSearch() + */ + private static $search_indexable = true; + /** * Store used anchor names, this is to avoid title clashes * when calling 'getAnchor' @@ -476,6 +485,33 @@ public function Top() return (Controller::has_curr()) ? Controller::curr() : null; } + /** + * Determines whether this elemental block is indexable in search. + * + * By default, this uses the configurable variable search_indexable, but + * this method can be overridden to provide more complex logic if required. + * + * @return boolean + */ + public function getSearchIndexable(): bool + { + return (bool) $this->config()->get('search_indexable'); + } + + /** + * Provides content to be indexed in search. + * + * @return string + */ + public function getContentForSearchIndex(): string + { + // Strips tags but be sure there's a space between words. + $content = trim(strip_tags(str_replace('<', ' <', $this->forTemplate()))); + // Allow projects to update indexable content of third-party elements. + $this->extend('updateContentForSearchIndex', $content); + return $content; + } + /** * Default way to render element in templates. Note that all blocks should * be rendered through their {@link ElementController} class as this diff --git a/tests/BaseElementTest.php b/tests/BaseElementTest.php index dee09114..efc679e1 100644 --- a/tests/BaseElementTest.php +++ b/tests/BaseElementTest.php @@ -7,6 +7,7 @@ use DNADesign\Elemental\Models\BaseElement; use DNADesign\Elemental\Models\ElementalArea; use DNADesign\Elemental\Models\ElementContent; +use DNADesign\Elemental\Tests\Src\TestContentForSearchIndexExtension; use DNADesign\Elemental\Tests\Src\TestElement; use DNADesign\Elemental\Tests\Src\TestPage; use Page; @@ -236,4 +237,21 @@ public function testOnBeforeWriteNoParent() $this->assertEquals(0, (int) $element1->Sort); } + + public function testGetContentForSearchIndex() + { + $element = $this->objFromFixture(ElementContent::class, 'content4'); + // Content should have tags stripped with a space before what were the < characters + // One closing tag plus one opening tag means there should be two spaced between paragraphs + $this->assertEquals('One paragraph And another one', $element->getContentForSearchIndex()); + } + + public function testUpdateContentForSearchIndex() + { + ElementContent::add_extension(TestContentForSearchIndexExtension::class); + $element = $this->objFromFixture(ElementContent::class, 'content4'); + // Content should be updated by the extension + $this->assertEquals('This is the updated content.', $element->getContentForSearchIndex()); + ElementContent::remove_extension(TestContentForSearchIndexExtension::class); + } } diff --git a/tests/ElementalPageExtensionTest.php b/tests/ElementalPageExtensionTest.php index 8b9acb16..bf9af182 100644 --- a/tests/ElementalPageExtensionTest.php +++ b/tests/ElementalPageExtensionTest.php @@ -114,4 +114,19 @@ public function testGetElementsForSearch() $this->assertStringNotContainsString('oneMore', $output); $this->assertStringNotContainsString('paragraphsAnd', $output); } + + public function testSearchIndexElementDelimiter() + { + /** @var TestPage $page */ + $page = $this->objFromFixture(TestPage::class, 'page_with_html_elements'); + + // Confirm default delimiter of a single space is applied between elements + $output = $page->getElementsForSearch(); + $this->assertStringContainsString('another one More paragraphs', $output); + + // Confirm configured delimiter is applied between elements + Config::modify()->set(TestPage::class, 'search_index_element_delimiter', ' ... '); + $output = $page->getElementsForSearch(); + $this->assertStringContainsString('another one ... More paragraphs', $output); + } } diff --git a/tests/ElementalPageExtensionTest.yml b/tests/ElementalPageExtensionTest.yml index 2f474e6c..850288df 100644 --- a/tests/ElementalPageExtensionTest.yml +++ b/tests/ElementalPageExtensionTest.yml @@ -25,7 +25,7 @@ DNADesign\Elemental\Models\ElementContent: ParentID: =>DNADesign\Elemental\Models\ElementalArea.area53 content5: Title: More paragraph content - Sort: 1 + Sort: 2 HTML: '
More paragraphs
And yet more
' ParentID: =>DNADesign\Elemental\Models\ElementalArea.area53 DNADesign\Elemental\Tests\Src\TestPage: diff --git a/tests/Src/TestContentForSearchIndexExtension.php b/tests/Src/TestContentForSearchIndexExtension.php new file mode 100644 index 00000000..f1ba7e75 --- /dev/null +++ b/tests/Src/TestContentForSearchIndexExtension.php @@ -0,0 +1,14 @@ +