-
Notifications
You must be signed in to change notification settings - Fork 76
/
strtransform.py
226 lines (182 loc) · 6.85 KB
/
strtransform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# -*- coding: utf-8 -*-
# vim: sw=4:ts=4:expandtab
"""
riko.modules.strtransform
~~~~~~~~~~~~~~~~~~~~~~~~~
Provides functions for performing string transformations on text, e.g.,
capitalize, uppercase, etc.
Examples:
basic usage::
>>> from riko.modules.strtransform import pipe
>>>
>>> conf = {'rule': {'transform': 'title'}}
>>> item = {'content': 'hello world'}
>>> next(pipe(item, conf=conf))['strtransform'] == 'Hello World'
True
Attributes:
OPTS (dict): The default pipe options
DEFAULTS (dict): The default parser options
"""
import pygogo as gogo
from functools import reduce
from . import processor
from riko.bado import coroutine, return_value, itertools as ait
OPTS = {"listize": True, "ftype": "text", "field": "content", "extract": "rule"}
DEFAULTS = {}
logger = gogo.Gogo(__name__, monolog=True).logger
ATTRS = {
"capitalize",
"lower",
"upper",
"swapcase",
"title",
"strip",
"rstrip",
"lstrip",
"zfill",
"replace",
"count",
"find",
}
def reducer(word, rule):
if rule.transform in ATTRS:
args = rule.args.split(",") if rule.args else []
result = getattr(word, rule.transform)(*args)
else:
logger.warning("Invalid transformation: %s", rule.transform)
result = word
return result
@coroutine
def async_parser(word, rules, skip=False, **kwargs):
"""Asynchronously parses the pipe content
Args:
word (str): The string to transform
rules (List[obj]): the parsed rules (Objectify instances).
skip (bool): Don't parse the content
kwargs (dict): Keyword arguments
Kwargs:
assign (str): Attribute to assign parsed content (default: strtransform)
stream (dict): The original item
Returns:
Deferred: twisted.internet.defer.Deferred item
Examples:
>>> from riko.bado import react
>>> from riko.bado.mock import FakeReactor
>>> from meza.fntools import Objectify
>>>
>>> def run(reactor):
... item = {'content': 'hello world'}
... conf = {'rule': {'transform': 'title'}}
... rule = Objectify(conf['rule'])
... kwargs = {'stream': item, 'conf': conf}
... d = async_parser(item['content'], [rule], **kwargs)
... return d.addCallbacks(print, logger.error)
>>>
>>> try:
... react(run, _reactor=FakeReactor())
... except SystemExit:
... pass
...
Hello World
"""
if skip:
value = kwargs["stream"]
else:
value = yield ait.coop_reduce(reducer, rules, word)
return_value(value)
def parser(word, rules, skip=False, **kwargs):
"""Parses the pipe content
Args:
word (str): The string to transform
rules (List[obj]): the parsed rules (Objectify instances).
skip (bool): Don't parse the content
kwargs (dict): Keyword arguments
Kwargs:
assign (str): Attribute to assign parsed content (default: strtransform)
stream (dict): The original item
Returns:
dict: The item
Examples:
>>> from meza.fntools import Objectify
>>>
>>> item = {'content': 'hello world'}
>>> conf = {'rule': {'transform': 'title'}}
>>> rule = Objectify(conf['rule'])
>>> args = item['content'], [rule], False
>>> kwargs = {'stream': item, 'conf': conf}
>>> parser(*args, **kwargs) == 'Hello World'
True
"""
return kwargs["stream"] if skip else reduce(reducer, rules, word)
@processor(DEFAULTS, isasync=True, **OPTS)
def async_pipe(*args, **kwargs):
"""A processor module that asynchronously performs string transformations
on the field of an item.
Args:
item (dict): The entry to process
kwargs (dict): The keyword arguments passed to the wrapper
Kwargs:
conf (dict): The pipe configuration. Must contain the key 'rule'.
rule (dict): can be either a dict or list of dicts. Must contain
the key 'transform'. May contain the key 'args'
transform (str): The string transformation to apply. Must be
one of: 'capitalize', 'lower', 'upper', 'swapcase',
'title', 'strip', 'rstrip', 'lstrip', 'zfill', 'replace',
'count', or 'find'
args (str): A comma separated list of arguments to supply the
transformer.
assign (str): Attribute to assign parsed content (default: strtransform)
field (str): Item attribute to operate on (default: 'content')
Returns:
Deferred: twisted.internet.defer.Deferred item with transformed content
Examples:
>>> from riko.bado import react
>>> from riko.bado.mock import FakeReactor
>>>
>>> def run(reactor):
... callback = lambda x: print(next(x)['strtransform'])
... conf = {'rule': {'transform': 'title'}}
... d = async_pipe({'content': 'hello world'}, conf=conf)
... return d.addCallbacks(callback, logger.error)
>>>
>>> try:
... react(run, _reactor=FakeReactor())
... except SystemExit:
... pass
...
Hello World
"""
return async_parser(*args, **kwargs)
@processor(**OPTS)
def pipe(*args, **kwargs):
"""A processor that performs string transformations on the field of an item.
Args:
item (dict): The entry to process
kwargs (dict): The keyword arguments passed to the wrapper
Kwargs:
conf (dict): The pipe configuration. Must contain the key 'rule'.
rule (dict): can be either a dict or list of dicts. Must contain
the key 'transform'. May contain the key 'args'
transform (str): The string transformation to apply. Must be
one of: 'capitalize', 'lower', 'upper', 'swapcase',
'title', 'strip', 'rstrip', 'lstrip', 'zfill', 'replace',
'count', or 'find'
args (str): A comma separated list of arguments to supply the
transformer.
assign (str): Attribute to assign parsed content (default: strtransform)
field (str): Item attribute to operate on (default: 'content')
Yields:
dict: an item with transformed content
Examples:
>>> conf = {'rule': {'transform': 'title'}}
>>> item = {'content': 'hello world'}
>>> next(pipe(item, conf=conf))['strtransform'] == 'Hello World'
True
>>> rules = [
... {'transform': 'lower'}, {'transform': 'count', 'args': 'g'}]
>>> conf = {'rule': rules}
>>> kwargs = {'conf': conf, 'field': 'title', 'assign': 'result'}
>>> next(pipe({'title': 'Greetings'}, **kwargs))['result']
2
"""
return parser(*args, **kwargs)