generated from eigerco/beerus
-
Notifications
You must be signed in to change notification settings - Fork 104
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add Boyer-Moore algorithm (#259)
This PR adds Boyer-Moore algorithm. ## Pull Request type <!-- Please try to limit your pull request to one type; submit multiple pull requests if needed. --> Please check the type of change your PR introduces: - [ ] Bugfix - [x] Feature - [ ] Code style update (formatting, renaming) - [ ] Refactoring (no functional changes, no API changes) - [ ] Build-related changes - [ ] Documentation content changes - [ ] Other (please describe): ## What is the current behavior? <!-- Please describe the current behavior that you are modifying, or link to a relevant issue. --> Issue Number: N/A ## What is the new behavior? <!-- Please describe the behavior or changes that are being added by this PR. --> - searching pattern ByteArray in a text ByteArray using Boyer-Moore algorithm. - all tests are passed. ## Does this introduce a breaking change? - [ ] Yes - [x] No <!-- If this does introduce a breaking change, please describe the impact and migration path for existing applications below. --> ## Other information <!-- Any other information that is important to this PR, such as screenshots of how the component looks before and after the change. -->
- Loading branch information
Showing
5 changed files
with
251 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
// The Boyer-Moore string search algorithm | ||
use dict::Felt252DictTrait; | ||
|
||
/// Find `pattern` in `text` and return the index of every match. | ||
/// * `text` - The text to search in. | ||
/// * `pattern` - The pattern to search for. | ||
/// # Returns | ||
/// * `Array<usize>` - The index of every match. | ||
fn bm_search(text: @ByteArray, pattern: @ByteArray) -> Array<usize> { | ||
let mut positions: Array<usize> = array![]; // Array to store the indices of every match | ||
let text_len = text.len(); // Length of the text | ||
let pattern_len = pattern.len(); // Length of the pattern | ||
|
||
// Check for invalid inputs or if the pattern is longer than the text | ||
if text_len == 0 || pattern_len == 0 || pattern_len > text_len { | ||
return positions; | ||
} | ||
|
||
let mut char_dict = felt252_dict_new::< | ||
usize | ||
>(); // Dictionary to store the last occurrence of each character in the pattern | ||
let mut pattern_index = 0; // Index of the current character in the pattern | ||
|
||
// Build the character dictionary | ||
loop { | ||
if pattern_index == pattern_len { | ||
break; | ||
} | ||
let current_char = pattern.at(pattern_index).unwrap(); | ||
char_dict | ||
.insert( | ||
current_char.into(), pattern_index + 1 | ||
); // Avoid 0 since felt252_dict initializes every entry to 0 by default | ||
pattern_index += 1; | ||
}; | ||
|
||
let mut shift: usize = 0; // Shift value for pattern matching | ||
|
||
// Perform pattern matching | ||
loop { | ||
if shift > text_len - pattern_len { | ||
break; | ||
} | ||
|
||
let mut pattern_index = pattern_len; | ||
|
||
// Compare characters from right to left | ||
loop { | ||
if pattern_index == 0 | ||
|| @pattern | ||
.at(pattern_index - 1) | ||
.unwrap() != @text | ||
.at(shift + pattern_index - 1) | ||
.unwrap() { | ||
break; | ||
} | ||
pattern_index -= 1; | ||
}; | ||
|
||
// If the pattern is found at the current shift position | ||
if pattern_index == 0 { | ||
positions.append(shift); // Add the current shift position to the positions array | ||
|
||
// Calculate the next shift value | ||
let add_to_shift = { | ||
if shift + pattern_len < text_len { | ||
let next_char = text.at(shift + pattern_len).unwrap(); | ||
let index = char_dict.get(next_char.into()); | ||
if index == 0 { | ||
pattern_len + 1 | ||
} else { | ||
pattern_len - index + 1 | ||
} | ||
} else { | ||
1 | ||
} | ||
}; | ||
shift += add_to_shift; | ||
} else { | ||
let current_char = text.at(shift + pattern_index - 1).unwrap(); | ||
let index = char_dict.get(current_char.into()); | ||
|
||
// Calculate the next shift value based on the last occurrence of the current character in the pattern | ||
if pattern_index <= (index + 1) { | ||
shift += 1; | ||
} else { | ||
shift += pattern_index - index; | ||
} | ||
} | ||
}; | ||
|
||
positions // Return the array of positions | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
mod binary_search; | ||
mod bm_search; | ||
mod dijkstra; | ||
|
||
#[cfg(test)] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
mod binary_search_test; | ||
mod bm_search_test; | ||
mod dijkstra_test; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
use alexandria_searching::bm_search::bm_search; | ||
|
||
|
||
// Check if two arrays are equal. | ||
/// * `a` - The first array. | ||
/// * `b` - The second array. | ||
/// # Returns | ||
/// * `bool` - True if the arrays are equal, false otherwise. | ||
fn is_equal(mut a: Span<u32>, mut b: Span<u32>) -> bool { | ||
if a.len() != b.len() { | ||
return false; | ||
} | ||
loop { | ||
match a.pop_front() { | ||
Option::Some(val1) => { | ||
let val2 = b.pop_front().unwrap(); | ||
if *val1 != *val2 { | ||
break false; | ||
} | ||
}, | ||
Option::None => { break true; }, | ||
}; | ||
} | ||
} | ||
|
||
|
||
#[test] | ||
#[available_gas(5000000)] | ||
fn bm_search_test_1() { | ||
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42 | ||
let mut text: ByteArray = Default::default(); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x31_u8); | ||
text.append_byte(0x32_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x45_u8); | ||
text.append_byte(0x47_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
// ABCAB -> 41,42,43,41,42 | ||
let mut pattern: ByteArray = Default::default(); | ||
pattern.append_byte(0x41_u8); | ||
pattern.append_byte(0x42_u8); | ||
pattern.append_byte(0x43_u8); | ||
pattern.append_byte(0x41_u8); | ||
pattern.append_byte(0x42_u8); | ||
|
||
let positions = bm_search(@text, @pattern); | ||
let ground_truth: Array<usize> = array![1, 11, 20]; | ||
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result'); | ||
} | ||
|
||
#[test] | ||
#[available_gas(5000000)] | ||
fn bm_search_test_2() { | ||
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42 | ||
let mut text: ByteArray = Default::default(); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x31_u8); | ||
text.append_byte(0x32_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x45_u8); | ||
text.append_byte(0x47_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
// FFF -> 46,46,46 | ||
let mut pattern: ByteArray = Default::default(); | ||
pattern.append_byte(0x46_u8); | ||
pattern.append_byte(0x46_u8); | ||
pattern.append_byte(0x46_u8); | ||
|
||
let positions = bm_search(@text, @pattern); | ||
let ground_truth: Array<usize> = array![]; | ||
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result'); | ||
} | ||
|
||
#[test] | ||
#[available_gas(5000000)] | ||
fn bm_search_test_3() { | ||
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42 | ||
let mut text: ByteArray = Default::default(); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x31_u8); | ||
text.append_byte(0x32_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x46_u8); | ||
text.append_byte(0x45_u8); | ||
text.append_byte(0x47_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
text.append_byte(0x43_u8); | ||
text.append_byte(0x41_u8); | ||
text.append_byte(0x42_u8); | ||
// CAB -> 43,41,42 | ||
let mut pattern: ByteArray = Default::default(); | ||
pattern.append_byte(0x43_u8); | ||
pattern.append_byte(0x41_u8); | ||
pattern.append_byte(0x42_u8); | ||
|
||
let positions = bm_search(@text, @pattern); | ||
let ground_truth: Array<usize> = array![3, 13, 22]; | ||
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result'); | ||
} |