diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..ab5618a --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,42 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ${{ format('{0}-latest', matrix.os) }} + + strategy: + matrix: + os: [ ubuntu, windows, macos ] + + steps: + - uses: actions/checkout@v3 + + - uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - uses: dtolnay/rust-toolchain@stable + + - name: Test + run: cargo test + + - name: Build + run: cargo build --release diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfbb9b1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/target +/Cargo.lock +.idea +*.iml \ No newline at end of file diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..6f2e075 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1 @@ +tab_spaces = 2 \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..9893ae4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "jenkins" +description = "Native rust implementation of the hash algorithms from Bob Jenkins." +license = "CC0-1.0" +repository = "https://github.com/offsetting/jenkins.git" +homepage = "https://github.com/offsetting/jenkins.git" +version = "0.1.0" +edition = "2021" + +[profile.dev] +overflow-checks = false \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/README.md b/README.md new file mode 100644 index 0000000..fd48a06 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# jenkins + +Native rust implementation of the hash algorithms from Bob Jenkins. \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..ae0bc60 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +#![doc = include_str!("../README.md")] + +pub use lookup2::lookup2; + +mod lookup2; diff --git a/src/lookup2.rs b/src/lookup2.rs new file mode 100644 index 0000000..2d47b45 --- /dev/null +++ b/src/lookup2.rs @@ -0,0 +1,242 @@ +const START: u32 = 0x9e3779b9; + +/// see: https://www.burtleburtle.net/bob/hash/doobs.html +/// ```c +/// typedef unsigned long int ub4; /* unsigned 4-byte quantities */ +/// typedef unsigned char ub1; /* unsigned 1-byte quantities */ +/// +/// #define hashsize(n) ((ub4)1<<(n)) +/// #define hashmask(n) (hashsize(n)-1) +/// +/// /* +/// -------------------------------------------------------------------- +/// mix -- mix 3 32-bit values reversibly. +/// For every delta with one or two bits set, and the deltas of all three +/// high bits or all three low bits, whether the original value of a,b,c +/// is almost all zero or is uniformly distributed, +/// * If mix() is run forward or backward, at least 32 bits in a,b,c +/// have at least 1/4 probability of changing. +/// * If mix() is run forward, every bit of c will change between 1/3 and +/// 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) +/// mix() was built out of 36 single-cycle latency instructions in a +/// structure that could supported 2x parallelism, like so: +/// a -= b; +/// a -= c; x = (c>>13); +/// b -= c; a ^= x; +/// b -= a; x = (a<<8); +/// c -= a; b ^= x; +/// c -= b; x = (b>>13); +/// ... +/// Unfortunately, superscalar Pentiums and Sparcs can't take advantage +/// of that parallelism. They've also turned some of those single-cycle +/// latency instructions into multi-cycle latency instructions. Still, +/// this is the fastest good hash I could find. There were about 2^^68 +/// to choose from. I only looked at a billion or so. +/// -------------------------------------------------------------------- +/// */ +/// #define mix(a,b,c) \ +/// { \ +/// a -= b; a -= c; a ^= (c>>13); \ +/// b -= c; b -= a; b ^= (a<<8); \ +/// c -= a; c -= b; c ^= (b>>13); \ +/// a -= b; a -= c; a ^= (c>>12); \ +/// b -= c; b -= a; b ^= (a<<16); \ +/// c -= a; c -= b; c ^= (b>>5); \ +/// a -= b; a -= c; a ^= (c>>3); \ +/// b -= c; b -= a; b ^= (a<<10); \ +/// c -= a; c -= b; c ^= (b>>15); \ +/// } +/// +/// /* +/// -------------------------------------------------------------------- +/// hash() -- hash a variable-length key into a 32-bit value +/// k : the key (the unaligned variable-length array of bytes) +/// len : the length of the key, counting by bytes +/// initval : can be any 4-byte value +/// Returns a 32-bit value. Every bit of the key affects every bit of +/// the return value. Every 1-bit and 2-bit delta achieves avalanche. +/// About 6*len+35 instructions. +/// +/// The best hash table sizes are powers of 2. There is no need to do +/// mod a prime (mod is sooo slow!). If you need less than 32 bits, +/// use a bitmask. For example, if you need only 10 bits, do +/// h = (h & hashmask(10)); +/// In which case, the hash table should have hashsize(10) elements. +/// +/// If you are hashing n strings (ub1 **)k, do it like this: +/// for (i=0, h=0; i= 12) +/// { +/// a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24)); +/// b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24)); +/// c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24)); +/// mix(a,b,c); +/// k += 12; len -= 12; +/// } +/// +/// /*------------------------------------- handle the last 11 bytes */ +/// c += length; +/// switch(len) /* all the case statements fall through */ +/// { +/// case 11: c+=((ub4)k[10]<<24); +/// case 10: c+=((ub4)k[9]<<16); +/// case 9 : c+=((ub4)k[8]<<8); +/// /* the first byte of c is reserved for the length */ +/// case 8 : b+=((ub4)k[7]<<24); +/// case 7 : b+=((ub4)k[6]<<16); +/// case 6 : b+=((ub4)k[5]<<8); +/// case 5 : b+=k[4]; +/// case 4 : a+=((ub4)k[3]<<24); +/// case 3 : a+=((ub4)k[2]<<16); +/// case 2 : a+=((ub4)k[1]<<8); +/// case 1 : a+=k[0]; +/// /* case 0: nothing left to add */ +/// } +/// mix(a,b,c); +/// /*-------------------------------------------- report the result */ +/// return c; +/// } +/// ``` +pub fn lookup2(data: &[u8], initval: u32) -> u32 { + let len = data.len(); + + let mut a = START; + let mut b = START; + let mut c = initval; + + let mut i = 0; + + while (len - i) >= 12 { + a += data[i] as u32 + + ((data[i + 1] as u32) << 8) + + ((data[i + 2] as u32) << 16) + + ((data[i + 3] as u32) << 24); + b += data[i + 4] as u32 + + ((data[i + 5] as u32) << 8) + + ((data[i + 6] as u32) << 16) + + ((data[i + 7] as u32) << 24); + c += data[i + 8] as u32 + + ((data[i + 9] as u32) << 8) + + ((data[i + 10] as u32) << 16) + + ((data[i + 11] as u32) << 24); + (a, b, c) = mix(a, b, c); + i += 12; + } + + c += len as u32; + + let x = len - i; + + if x >= 11 { + c += (data[i + 10] as u32) << 24 + } + if x >= 10 { + c += (data[i + 9] as u32) << 16 + } + if x >= 9 { + c += (data[i + 8] as u32) << 8 + } + if x >= 8 { + b += (data[i + 7] as u32) << 24 + } + if x >= 7 { + b += (data[i + 6] as u32) << 16 + } + if x >= 6 { + b += (data[i + 5] as u32) << 8 + } + if x >= 5 { + b += data[i + 4] as u32 + } + if x >= 4 { + a += (data[i + 3] as u32) << 24 + } + if x >= 3 { + a += (data[i + 2] as u32) << 16 + } + if x >= 2 { + a += (data[i + 1] as u32) << 8 + } + if x >= 1 { + a += data[i] as u32 + } + + mix(a, b, c).2 +} + +fn mix(mut a: u32, mut b: u32, mut c: u32) -> (u32, u32, u32) { + a -= b; + a -= c; + a ^= c >> 13; + b -= c; + b -= a; + b ^= a << 8; + c -= a; + c -= b; + c ^= b >> 13; + a -= b; + a -= c; + a ^= c >> 12; + b -= c; + b -= a; + b ^= a << 16; + c -= a; + c -= b; + c ^= b >> 5; + a -= b; + a -= c; + a ^= c >> 3; + b -= c; + b -= a; + b ^= a << 10; + c -= a; + c -= b; + c ^= b >> 15; + (a, b, c) +} + +#[cfg(test)] +mod tests { + use crate::lookup2; + use crate::lookup2::mix; + + #[test] + fn test_lookup2() { + let in1 = "Yo mama is so fat that her bellybuttongs got an echo.".as_bytes(); + let in2 = "Yo mama is so stupid that she failed a survey.".as_bytes(); + + assert_eq!(lookup2(in1, 0), 0x2CEB1226); + assert_eq!(lookup2(in2, 0xDEADBEEF), 0xD1215833); + } + + #[test] + fn test_mix() { + assert_eq!( + mix(346972, 5874, 5287068), + (1151851378, 1918881843, 2302927392) + ) + } +}