Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Boyer-Moore algorithm #259

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/searching/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Searching

## [Boyer-Moore algorithm](./src/bm_search.cairo)

The Boyer-Moore algorithm is a string-searching algorithm that finds the position of a pattern in a string. It preprocesses the pattern to create two lookup tables: one for the bad character rule and one for the good suffix rule. The bad character rule shifts the pattern to align with the last occurrence of the mismatched character in the pattern. The good suffix rule shifts the pattern to align with the last occurrence of the suffix of the pattern that matches the suffix of the text.

The Boyer-Moore algorithm has a best-case time complexity of O(n/m) and a worst-case time complexity of O(nm), where n is the length of the text and m is the length of the pattern. It is the most efficient string-searching algorithm in practice.

## [Binary search](./src/binary_search.cairo)

The binary search algorithm is a simple search in an ordered array-like compound. It starts by comparing the value we are looking for to the middle of the array. If it's not a match, the function calls itself recursively on the right or left half of the array until it does(n't) find the value in the array.
Expand Down
72 changes: 72 additions & 0 deletions src/searching/src/bm_search.cairo
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// The Boyer-Moore string search algorithm
use dict::Felt252DictTrait;

// Find `pattern` in `text` and return the index of every match.
Soptq marked this conversation as resolved.
Show resolved Hide resolved
/// * `text` - The text to search in.
/// * `pattern` - The pattern to search for.
/// # Returns
/// * `Array<usize>` - The index of every match.
fn bm_search(text: @ByteArray, pattern: @ByteArray) -> Array<usize> {
let mut positions: Array<usize> = array![];
let n = text.len();
let m = pattern.len();
if n == 0 || m == 0 || m > n {
return positions;
}

Soptq marked this conversation as resolved.
Show resolved Hide resolved
let mut collection = felt252_dict_new::<usize>();
let mut collect_id = 0;
loop {
if collect_id == m {
break;
}
let c = pattern.at(collect_id).unwrap();
collection
.insert(
c.into(), collect_id + 1
); // avoid 0 since felt252_dict init every entry to 0 by default
collect_id += 1;
};

let mut shift: usize = 0;
loop {
if shift > n - m {
break;
}

let mut j = m;
loop {
if j == 0 || @pattern.at(j - 1).unwrap() != @text.at(shift + j - 1).unwrap() {
break;
}
j -= 1;
};
if j == 0 {
positions.append(shift);
let add_to_shift = {
if shift + m < n {
let c = text.at(shift + m).unwrap();
let index = collection.get(c.into());
if index == 0 {
m + 1
} else {
m - index + 1
}
} else {
1
}
};
shift += add_to_shift;
} else {
let c = text.at(shift + j - 1).unwrap();
let index = collection.get(c.into());
if j <= (index + 1) {
shift += 1;
} else {
shift += j - index;
}
}
};

positions
}
1 change: 1 addition & 0 deletions src/searching/src/lib.cairo
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod binary_search;
mod bm_search;
mod dijkstra;

#[cfg(test)]
Expand Down
1 change: 1 addition & 0 deletions src/searching/src/tests.cairo
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
mod binary_search_test;
mod bm_search_test;
mod dijkstra_test;
150 changes: 150 additions & 0 deletions src/searching/src/tests/bm_search_test.cairo
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
use alexandria_searching::bm_search::bm_search;


// Check if two arrays are equal.
/// * `a` - The first array.
/// * `b` - The second array.
/// # Returns
/// * `bool` - True if the arrays are equal, false otherwise.
fn is_equal(mut a: Span<u32>, mut b: Span<u32>) -> bool {
if a.len() != b.len() {
return false;
}
loop {
match a.pop_front() {
Option::Some(val1) => {
let val2 = b.pop_front().unwrap();
if *val1 != *val2 {
break false;
}
},
Option::None => { break true; },
};
}
}


#[test]
#[available_gas(5000000)]
fn bm_search_test_1() {
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42
let mut text: ByteArray = Default::default();
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x31_u8);
text.append_byte(0x32_u8);
text.append_byte(0x41_u8);
text.append_byte(0x46_u8);
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x46_u8);
text.append_byte(0x46_u8);
text.append_byte(0x45_u8);
text.append_byte(0x47_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
// ABCAB -> 41,42,43,41,42
let mut pattern: ByteArray = Default::default();
pattern.append_byte(0x41_u8);
pattern.append_byte(0x42_u8);
pattern.append_byte(0x43_u8);
pattern.append_byte(0x41_u8);
pattern.append_byte(0x42_u8);

let positions = bm_search(@text, @pattern);
let ground_truth: Array<usize> = array![1, 11, 20];
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_2() {
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42
let mut text: ByteArray = Default::default();
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x31_u8);
text.append_byte(0x32_u8);
text.append_byte(0x41_u8);
text.append_byte(0x46_u8);
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x46_u8);
text.append_byte(0x46_u8);
text.append_byte(0x45_u8);
text.append_byte(0x47_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
// FFF -> 46,46,46
let mut pattern: ByteArray = Default::default();
pattern.append_byte(0x46_u8);
pattern.append_byte(0x46_u8);
pattern.append_byte(0x46_u8);

let positions = bm_search(@text, @pattern);
let ground_truth: Array<usize> = array![];
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_3() {
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42
let mut text: ByteArray = Default::default();
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x31_u8);
text.append_byte(0x32_u8);
text.append_byte(0x41_u8);
text.append_byte(0x46_u8);
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x46_u8);
text.append_byte(0x46_u8);
text.append_byte(0x45_u8);
text.append_byte(0x47_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
// CAB -> 43,41,42
let mut pattern: ByteArray = Default::default();
pattern.append_byte(0x43_u8);
pattern.append_byte(0x41_u8);
pattern.append_byte(0x42_u8);

let positions = bm_search(@text, @pattern);
let ground_truth: Array<usize> = array![3, 13, 22];
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result');
}