Skip to content

Commit

Permalink
Add Arabic search support
Browse files Browse the repository at this point in the history
Behind a feature flag.
Previous approach: 1d537b5
  • Loading branch information
abdnh committed Aug 9, 2023
1 parent 56c225b commit 2caf9ae
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 3 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ default = ["watch", "serve", "search"]
watch = ["dep:notify", "dep:notify-debouncer-mini", "dep:ignore"]
serve = ["dep:futures-util", "dep:tokio", "dep:warp"]
search = ["dep:elasticlunr-rs", "dep:ammonia"]
arabic-search = ["search", "elasticlunr-rs?/ar"]

[[bin]]
doc = false
Expand Down
9 changes: 9 additions & 0 deletions src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,15 @@ fn make_data(
"search_js".to_owned(),
json!(search.enable && search.copy_js),
);
let ar_search = if cfg!(feature = "arabic-search") {
true
} else {
false
};
data.insert(
"ar_search".to_owned(),
json!(ar_search),
);
} else if search.is_some() {
warn!("mdBook compiled without search support, ignoring `output.html.search` table");
warn!(
Expand Down
12 changes: 9 additions & 3 deletions src/renderer/html_handlebars/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::path::Path;

use elasticlunr::{Index, IndexBuilder};
use elasticlunr::{Index, IndexBuilder, lang::from_code};
use once_cell::sync::Lazy;
use pulldown_cmark::*;

Expand All @@ -27,11 +27,15 @@ fn tokenize(text: &str) -> Vec<String> {

/// Creates all files required for search.
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
let mut index = IndexBuilder::new()
let mut index = if cfg!(feature = "arabic-search") {
Index::with_language(from_code("ar").unwrap(), &["title", "body", "breadcrumbs"])
} else {
IndexBuilder::new()
.add_field_with_tokenizer("title", Box::new(&tokenize))
.add_field_with_tokenizer("body", Box::new(&tokenize))
.add_field_with_tokenizer("breadcrumbs", Box::new(&tokenize))
.build();
.build()
};

let mut doc_urls = Vec::with_capacity(book.sections.len());

Expand All @@ -55,6 +59,8 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) ->
utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
#[cfg(feature = "arabic-search")]
utils::fs::write_file(destination, "lunr.ar.js", searcher::LUNR_AR_JS)?;
debug!("Copying search files ✓");
}

Expand Down
3 changes: 3 additions & 0 deletions src/theme/index.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,9 @@

{{#if search_js}}
<script src="{{ path_to_root }}elasticlunr.min.js"></script>
{{#if ar_search}}
<script src="{{ path_to_root }}lunr.ar.js" type="text/javascript"></script>
{{/if}}
<script src="{{ path_to_root }}mark.min.js"></script>
<script src="{{ path_to_root }}searcher.js"></script>
{{/if}}
Expand Down
95 changes: 95 additions & 0 deletions src/theme/searcher/lunr.ar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*!
* Simple Arabic stemmer based on lunr.ar.js from https://github.com/MihaiValentin/lunr-languages
*
* Copyright 2018, Dalia Al-Shahrabi
* http://www.mozilla.org/MPL/
*/

/**
* export the module via AMD, CommonJS or as a browser global
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
*/
;
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define(factory)
} else if (typeof exports === 'object') {
/**
* Node. Does not work with strict CommonJS, but
* only CommonJS-like environments that support module.exports,
* like Node.
*/
module.exports = factory()
} else {
// Browser globals (root is window)
factory()(root.lunr);
}
}(this, function () {
/**
* Just return a value to define the module export.
* This example returns an object, but the module
* can return a function as the exported value.
*/
return function (lunr) {
/* throw error if lunr is not yet included */
if ('undefined' === typeof lunr) {
throw new Error('Lunr is not present. Please include / require Lunr before this script.');
}

/* register specific locale function */
lunr.ar = function () {
this.pipeline.reset();
this.pipeline.add(
lunr.ar.stemmer
);

// for lunr version 2
// this is necessary so that every searched word is also stemmed before
// in lunr <= 1 this is not needed, as it is done using the normal pipeline
if (this.searchPipeline) {
this.searchPipeline.reset();
this.searchPipeline.add(lunr.ar.stemmer)
}
};

/* lunr stemmer function */
lunr.ar.stemmer = (function () {

/* remove elongating character */
self.removeElongating = function (word) {
return word.replace(/[\u0640]/gi, '');
}

self.removeDiacritics = function (word) {
return word.replace(/[\u064b-\u065b]/gi, '');
}

/*Replace all variations of alef (آأإٱى) to a plain alef (ا)*/
self.cleanAlef = function (word) {
return word.replace(/[\u0622\u0623\u0625\u0671\u0649]/gi, "\u0627");
}

self.execArray = [
'removeElongating',
'removeDiacritics',
'cleanAlef'
];

self.stem = function (word) {
var counter = 0;
while (counter < self.execArray.length) {
word = self[self.execArray[counter]](word);
counter++;
}
return word;
}

return function (word) {
return self.stem(word);
}
})();

lunr.Pipeline.registerFunction(lunr.ar.stemmer, 'stemmer-ar');
};
}))
2 changes: 2 additions & 0 deletions src/theme/searcher/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
pub static JS: &[u8] = include_bytes!("searcher.js");
pub static MARK_JS: &[u8] = include_bytes!("mark.min.js");
pub static ELASTICLUNR_JS: &[u8] = include_bytes!("elasticlunr.min.js");
#[cfg(feature = "arabic-search")]
pub static LUNR_AR_JS: &[u8] = include_bytes!("lunr.ar.js");

0 comments on commit 2caf9ae

Please sign in to comment.