-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathitercools.py
489 lines (391 loc) · 12.1 KB
/
itercools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
"""
See https://docs.python.org/3/library/itertools.html#itertools-recipes
for more cool recipes!
See also the toolz project for more cool functional programming helpers:
https://github.com/pytoolz/toolz
"""
import operator as op
from collections import defaultdict, deque
from collections.abc import Iterator
from functools import lru_cache, partial, wraps
from itertools import chain, filterfalse, islice, tee
def filters(iterable, *predicates):
"""Filter the iterable on each given predicate.
>>> div_by_two = lambda x: not x % 2
>>> div_by_three = lambda x: not x % 3
>>> twos, threes = filters(range(10), div_by_two, div_by_three)
>>> list(twos)
[0, 2, 4, 6, 8]
>>> list(threes)
[0, 3, 6, 9]
"""
tees = tee(iterable, len(predicates))
return tuple(filter(pred, t) for pred, t in zip(predicates, tees))
def partition(iterable, predicate):
"""Divide the iterable into two iterables according to the predicate.
>>> evens, odds = partition(range(10), lambda x: not x % 2)
>>> list(evens)
[0, 2, 4, 6, 8]
>>> list(odds)
[1, 3, 5, 7, 9]
"""
t1, t2 = tee(iterable)
return filter(predicate, t1), filterfalse(predicate, t2)
def matches(iterable, predicate, value):
"""Yield elements of iterable for which predicate(element) == value.
>>> list(matches(range(10), lambda x: x % 3, 2))
[2, 5, 8]
"""
for element in iterable:
if predicate(element) == value:
yield element
def batches(iterable, batch_size):
"""Lazily divide an iterable into batches of the given size."""
while batch := list(islice(iterable, batch_size)):
yield batch
def lazydivvy(iterable, predicate, values):
"""Map values to iterables of elements whose predicate returns that value.
See divvy for an eager version that doesn't require specifying values.
>>> remainders = lazydivvy(range(10), lambda x: x % 3, range(3))
>>> for remainder, values in sorted(remainders.items()):
... print(remainder, list(values))
0 [0, 3, 6, 9]
1 [1, 4, 7]
2 [2, 5, 8]
"""
tees = tee(iterable, len(values))
return {value: matches(t, predicate, value)
for t, value in zip(tees, values)}
def divvy(iterable, predicate):
"""Divvy up an iterable into a dict of sets.
>>> remainders = divvy(range(10), lambda x: x % 3)
>>> for remainder, divisors in sorted(remainders.items()):
... print(remainder, sorted(divisors))
0 [0, 3, 6, 9]
1 [1, 4, 7]
2 [2, 5, 8]
"""
results = {}
for item in iterable:
result = predicate(item)
try:
bag = results[result]
except KeyError:
results[result] = bag = set()
bag.add(item)
return results
def defaultdivvy(iterable, predicate):
"""Divvy up an iterable into a defaultdict of sets.
>>> remainders = defaultdivvy(range(10), lambda x: x % 3)
>>> for remainder, divisors in sorted(remainders.items()):
... print(remainder, sorted(divisors))
0 [0, 3, 6, 9]
1 [1, 4, 7]
2 [2, 5, 8]
"""
results = defaultdict(set)
for item in iterable:
results[predicate(item)].add(item)
return results
def invert(dct):
"""Invert a dictionary.
>>> dct = {'ayy': True, 'lmao': True, 'foo': False, 'bar': False}
>>> inv = {True: {'ayy', 'lmao'}, False: {'foo', 'bar'}}
>>> assert invert(dct) == inv, inv
"""
return divvy(dct, dct.__getitem__)
def dict_zip(keys, *dicts):
"""
>>> a = dict.fromkeys([1, 2], 'a')
>>> b = dict.fromkeys([1, 2, 3], 'b')
>>> c = dict.fromkeys([2, 3], 'c')
>>> for key, values in dict_zip([1, 2], a, b): print(key, values)
1 ('a', 'b')
2 ('a', 'b')
>>> for key, values in dict_zip([2, 3], b, c): print(key, values)
2 ('b', 'c')
3 ('b', 'c')
>>> for key, values in dict_zip([2], a, b, c): print(key, values)
2 ('a', 'b', 'c')
"""
for key in keys:
yield key, tuple(d[key] for d in dicts)
def dict_zip_default(keys, *dicts, default=None):
"""
>>> a = dict.fromkeys([1, 2], 'a')
>>> b = dict.fromkeys([1, 2, 3], 'b')
>>> c = dict.fromkeys([2, 3], 'c')
>>> for k, v in dict_zip_default([1, 2, 3], a, b): print(k, v)
1 ('a', 'b')
2 ('a', 'b')
3 (None, 'b')
>>> for k, v in dict_zip_default([1, 2, 3], b, c): print(k, v)
1 ('b', None)
2 ('b', 'c')
3 ('b', 'c')
>>> for k, v in dict_zip_default([1, 2, 3], a, b, c): print(k, v)
1 ('a', 'b', None)
2 ('a', 'b', 'c')
3 (None, 'b', 'c')
"""
for key in keys:
yield key, tuple(d.get(key, default) for d in dicts)
def dict_zip_all(*dicts):
"""Yield keys common to all dicts and tuples of values.
>>> a = dict.fromkeys([1, 2], 'a')
>>> b = dict.fromkeys([1, 2, 3], 'b')
>>> c = dict.fromkeys([2, 3], 'c')
>>> for key, values in dict_zip_all(a, b): print(key, values)
1 ('a', 'b')
2 ('a', 'b')
>>> for key, values in dict_zip_all(b, c): print(key, values)
2 ('b', 'c')
3 ('b', 'c')
>>> for key, values in dict_zip_all(a, b, c): print(key, values)
2 ('a', 'b', 'c')
"""
keys = set.intersection(*[set(d.keys()) for d in dicts])
for key in keys:
yield key, tuple(d[key] for d in dicts)
# return dict_zip(keys, *dicts)
def dict_zip_any(*dicts, default=None):
"""Yield all keys from all dicts and tuples of values.
>>> a = dict.fromkeys([1, 2], 'a')
>>> b = dict.fromkeys([1, 2, 3], 'b')
>>> c = dict.fromkeys([2, 3], 'c')
>>> for key, values in dict_zip_any(a, b): print(key, values)
1 ('a', 'b')
2 ('a', 'b')
3 (None, 'b')
>>> for key, values in dict_zip_any(b, c): print(key, values)
1 ('b', None)
2 ('b', 'c')
3 ('b', 'c')
>>> for key, values in dict_zip_any(a, b, c): print(key, values)
1 ('a', 'b', None)
2 ('a', 'b', 'c')
3 (None, 'b', 'c')
"""
keys = set.union(*[set(d.keys()) for d in dicts])
for key in keys:
yield key, tuple(d.get(key, default) for d in dicts)
# return dict_zip_default(keys, *dicts, default=default)
def unique(*iterables, key=None):
"""Yield unique elements, preserving order.
>>> ''.join(unique('AAAABBBCCDAABBB'))
'ABCD'
>>> ''.join(unique('AAAA', 'BBBC', 'CDA', 'ABBB'))
'ABCD'
>>> ''.join(unique('ABBCcAD', key=str.casefold))
'ABCD'
"""
combined = chain.from_iterable(iterables)
yielded = set()
# Avoid inner-loop name lookups
already_yielded = yielded.__contains__
remember = yielded.add
if key is None:
for element in filterfalse(already_yielded, combined):
remember(element)
yield element
else:
for element in combined:
k = key(element)
if not already_yielded(k):
remember(k)
yield element
def reuse(func=None, *, cache=lru_cache()):
"""Cache and reuse a generator function across multiple calls."""
# Allow this decorator to work with or without being called
if func is None:
return partial(reuse, cache=cache)
# Either initialize an empty history and start a new generator, or
# retrieve an existing history and the already-started generator
# that produced it
@cache
def resume(*args, **kwargs):
return [], func(*args, **kwargs)
@wraps(func)
def reuser(*args, **kwargs):
history, gen = resume(*args, **kwargs)
yield from history
record = history.append # Avoid inner-loop name lookup
for x in gen:
record(x)
yield x
return reuser
class Peekable(Iterator):
"""
>>> p = Peekable(range(3))
>>> next(p)
0
>>> next(p)
1
>>> p.peek()
2
>>> p.peek(default=None)
2
>>> bool(p)
True
>>> next(p)
2
>>> bool(p)
False
>>> p.peek()
Traceback (most recent call last):
...
StopIteration
>>> p.peek(default=None)
>>> next(p)
Traceback (most recent call last):
...
StopIteration
>>> list(Peekable(range(3)))
[0, 1, 2]
"""
def __init__(self, it):
self.__it = iter(it)
self.__advance()
def __advance(self):
try:
self.__next_val = next(self.__it)
except StopIteration:
self.__empty = True
else:
self.__empty = False
def peek(self, **kwargs):
"""Return the next item without advancing the iterator.
Raises StopIteration if the iterator is empty, unless a default
value is provided as a kwarg.
"""
if self.__empty:
try:
return kwargs['default']
except KeyError:
raise StopIteration
return self.__next_val
def __next__(self):
if self.__empty:
raise StopIteration
val = self.__next_val
self.__advance()
return val
def __bool__(self):
return not self.__empty
def last(iterator):
"""Consume an iterator and return the last element.
>>> it = iter(range(10))
>>> last(it)
9
>>> last(it)
Traceback (most recent call last):
...
StopIteration
"""
q = deque(iterator, maxlen=1)
try:
return q[0]
except IndexError:
raise StopIteration
def tail(n, iterator):
"""Consume an iterator and return the last n element(s) as a deque.
>>> it = iter(range(10))
>>> tail(2, it)
deque([8, 9], maxlen=2)
>>> tail(1, it)
deque([], maxlen=1)
"""
return deque(iterator, maxlen=n)
def advance(n, iterator):
"""Consume and discard the next n element(s) of the iterator.
>>> it = (print(i) for i in range(3))
>>> advance(2, it)
0
1
>>> advance(1, it)
2
>>> advance(1, it)
Traceback (most recent call last):
...
StopIteration
"""
if not deque(islice(iterator, n), maxlen=1):
raise StopIteration
def exhaust(iterator):
"""Call next on an iterator until it stops.
>>> it = (print(i) for i in range(2))
>>> exhaust(it)
0
1
>>> exhaust(it)
"""
deque(iterator, maxlen=0)
def genlen(iterator):
"""Exhaust the iterator and return the number of values yielded.
>>> it = iter(range(10))
>>> genlen(it)
10
>>> genlen(it)
0
"""
return sum(1 for _ in iterator)
def intersperse(sep, it):
"""Like str.join for iterators.
Raises StopIteration if the iterator is initially empty.
"""
it = iter(it)
yield next(it)
for elem in it:
yield sep
yield elem
def check(func, seq, *, exc=ValueError, allow_empty=True):
"""Wrap a sequence, raising `exc` if the function returns False."""
i = None
for i, item in enumerate(seq):
if not func(item):
raise exc(f"{item!r} at position {i}")
yield item
if i is None and not allow_empty:
raise exc("empty sequence")
def check_cmp(cmp, seq, *, allow_empty=False):
"""Wrap a sequence, raising ValueError if the comparison fails.
>>> list(check_cmp(op.lt, [1, 2]))
[1, 2]
>>> list(check_cmp(op.le, [1, 1]))
[1, 1]
>>> list(check_cmp(op.lt, [1, 1]))
Traceback (most recent call last):
...
ValueError: 1 followed by 1 at position 1
>>> list(check_cmp(op.lt, []))
Traceback (most recent call last):
...
ValueError: empty sequence
>>> list(check_cmp(op.lt, [], allow_empty=True))
[]
"""
it = iter(seq)
try:
prev = next(it)
except StopIteration:
if allow_empty:
return
else:
raise ValueError("empty sequence")
yield prev
for i, item in enumerate(it, start=1):
if not cmp(prev, item):
raise ValueError(
f"{prev!r} followed by {item!r} at position {i}"
)
prev = item
yield item
ensure_monotonic_increasing = partial(check_cmp, op.le)
ensure_strict_monotonic_increasing = partial(check_cmp, op.lt)
ensure_monotonic_decreasing = partial(check_cmp, op.ge)
ensure_strict_monotonic_decreasing = partial(check_cmp, op.gt)
def check_monotonic_increasing(*args, **kwargs):
return exhaust(check_cmp(*args, **kwargs))
if __name__ == '__main__':
import doctest
doctest.testmod()