-
Notifications
You must be signed in to change notification settings - Fork 0
/
noomnem.py
219 lines (176 loc) · 8.31 KB
/
noomnem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# Copyright (C) 2022 Anton Shevchenko
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""Reference implementation of Noo-Mnemonic (no-order mnemonic) entropy encoding."""
import vocab
import hashlib # sha256, used only for checksum
# in python all ints are the same. when porting to other language it's better to know whether int means usual 32-bit
# integer (hinted as int in this module) or bigint data expected (bigint must hold up to 1024 bits).
bigint = int
# returns number of possible combinations to pick m elements from the set of n.
# for example, from the set of 5 elements {1, 2, 3, 4, 5} there are 5 options to pick 1 element (obviously);
# 10 options to pick 2 elements: [1, 2], [1, 3], [1, 4], [1, 5], [2, 3], [2, 4], [2, 5], [3, 4], [3, 5], [4, 5].
# mathematical notation for this kind of function is capital C with subscript n and superscript m.
# the formula is: C(n, m) = n! / ((n - m)! * m!) where x! is factorial of x.
# this is a pure function meaning result of the function is dependent only of arguments. this makes possible
# to precalc all data (and store it in a table) before first use (not implemented).
def combi(n: int, m: int) -> bigint:
assert n > 0 and m >= 0
assert m <= n <= 2048
result = 1
div = 1
while div <= m:
result = result * n // div
n -= 1
div += 1
return result
# some inline tests
assert combi(5, 1) == 5
assert combi(5, 2) == 10
assert combi(10, 3) == 120
def combi2(n: int, m: int) -> bigint:
if n < m:
return 0
return combi(n, m)
# returns truncated value of base 2 logarithm - meaning for values 5, 6 and 7 this function returns 2.
# logarithm answers the following question: to what exponent we need to raise base (in this case 2) to get the value.
# log2(4) == 2 as we need to use exponent 2 for the base 2 to obtain 4.
# log2(8) == 3 because 2 to the power of 3 equals 8.
def log2(n: bigint) -> int:
result = 0
while n >= 256:
n >>= 8
result += 8
while n > 1:
n //= 2
result += 1
return result
# some inline tests
assert log2(2048) == 11
assert log2(8) == 3
assert log2(7) == 2
assert log2(6) == 2
assert log2(5) == 2
assert log2(4) == 2
# returns upper bound value for logarithm of n with respect to base 2.
# upper bound means that for values between 4 and 8 we get the answer 3, hence upper bound.
def log2upper(n: int):
result = 0
while n > 1:
n += 1
n //= 2
result += 1
return result
assert log2upper(2048) == 11
assert log2upper(8) == 3
assert log2upper(7) == 3
assert log2upper(6) == 3
assert log2upper(5) == 3
assert log2upper(4) == 2
# encodes entropy (value parameter) into list of sorted ints. m defines number of elements in a pick.
# we don't need to know n for this scheme. what is scheme23? see a comment in scheme14.py
def scheme23_encode_ints(value: bigint, m: int) -> list[int]:
result = []
for i in range(m, 0, -1):
cur = i
test = combi2(cur, i)
while test <= value:
cur += 1
test = combi2(cur, i)
cur -= 1
result.append(cur)
value -= combi2(cur, i)
return result[::-1]
# decodes given list of sorted ints into entropy value. scheme 23 is used. elems must be already sorted.
# why named scheme23? see a comment in scheme14.py
def scheme23_decode_ints(elems: list[int]) -> bigint:
count = len(elems)
result = 0
while count > 0:
result += combi2(elems[count-1], count)
count -= 1
return result
# returns number of words (from the standard 2048 words vocabulary) enough to derive given number of bits of entropy.
def calc_nwords(desired_entropy: int):
for i in range(1, 1024):
entropy = log2(combi(2048, i))
if entropy >= desired_entropy:
return i
return None
assert calc_nwords(128) == 16 # 16 words enough to encode 131 bits of entropy
assert calc_nwords(192) == 26 # 26 words enough to encode 197 bits of entropy
assert calc_nwords(256) == 36 # 36 words enough to encode 257 bits of entropy
SUPPORTED_LENGTHS = {16: 128, 26: 192, 37: 256} # key is number of words, value is bits of entropy encoded.
# for 256 bits we use 37 words. 36 words is enough to encode 257 bits but that means checksum is a single bit.
# 37th word adds extra 6 bits. so checksums are of size 3, 5, 7 bits for entropy length 128, 192, 256 bits respectively.
CHECKSUM_LENGTHS = {16: 3, 26: 5, 37: 7}
# reverse dictionary is used when we need to find key by knowing value.
# in this dictionary key is bits of entropy, and value is number of words.
SUPPORTED_LENGTHS_REVERSED = {v: k for k, v in SUPPORTED_LENGTHS.items()}
def _calc_checksum(data_without_checksum: bigint, len_words: int) -> int:
data_len = SUPPORTED_LENGTHS[len_words] // 8
data_bytes = data_without_checksum.to_bytes(data_len, 'big')
hash = hashlib.sha256(data_bytes).digest()
hashint = int.from_bytes(hash, 'big')
mask = (1 << CHECKSUM_LENGTHS[len_words]) - 1
return hashint & mask
# splits data into two components: data_without_checksum and checksum.
# input - data with checksum; returns tuple of (data_without_checksum, checksum)
def _extract_checksum(data: bigint, len_words: int) -> (bigint, int):
checksum_len = CHECKSUM_LENGTHS[len_words]
mask = (1 << checksum_len) - 1
data_without_checksum = data >> checksum_len
return data_without_checksum, data & mask
def _combine_checksum(data_without_checksum: bigint, len_words: int) -> bigint:
checksum = _calc_checksum(data_without_checksum, len_words)
data = (data_without_checksum << CHECKSUM_LENGTHS[len_words]) | checksum
return data
def noomnem_decode(words: list[str]) -> bytes:
"""Decodes given list of words into bigint data."""
# check input parameters
len_words = len(words)
if len_words not in SUPPORTED_LENGTHS.keys():
raise ValueError('unsupported number of words ' + str(len_words + 1))
# check words are in vocabulary
ints = sorted([vocab.mnemonic_dict[w] for w in words])
# check for repetitions
for i in range(len(ints)-1):
if ints[i] == ints[i+1]:
raise ValueError('duplicate word at pos ' + str(i+1))
# decode from array of ints into one bigint
result = scheme23_decode_ints(ints)
if log2(result) >= SUPPORTED_LENGTHS[len_words] + CHECKSUM_LENGTHS[len_words]:
raise ValueError('decoded value is exceeding the limit')
# extract, calculate and compare checksum
result_without_checksum, checksum = _extract_checksum(result, len_words)
calculated = _calc_checksum(result_without_checksum, len_words)
if checksum != calculated:
raise ValueError('checksum validation failed')
return result_without_checksum.to_bytes(SUPPORTED_LENGTHS[len_words] // 8, 'big')
def noomnem_encode(data: bytes) -> list[str]:
"""Encodes given bigint data into list of words."""
len_words = SUPPORTED_LENGTHS_REVERSED[len(data) * 8]
if len_words not in SUPPORTED_LENGTHS.keys():
raise ValueError('unsupported number of words ' + str(len_words + 1))
intdata = int.from_bytes(data, 'big')
assert log2(intdata) < SUPPORTED_LENGTHS[len_words]
data_with_checksum = _combine_checksum(intdata, len_words)
assert log2(data_with_checksum) < SUPPORTED_LENGTHS[len_words] + CHECKSUM_LENGTHS[len_words]
ints = scheme23_encode_ints(data_with_checksum, len_words)
return [vocab.mnemonic[i] for i in ints]