forked from seatgeek/fuzzywuzzy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmarks.py
71 lines (56 loc) · 2.5 KB
/
benchmarks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf8 -*-
from timeit import timeit
from fuzzywuzzy import utils
iterations=100000
cirque_strings = [
"cirque du soleil - zarkana - las vegas",
"cirque du soleil ",
"cirque du soleil las vegas",
"zarkana las vegas",
"las vegas cirque du soleil at the bellagio",
"zarakana - cirque du soleil - bellagio"
]
choices = [
"",
"new york yankees vs boston red sox",
"",
"zarakana - cirque du soleil - bellagio",
None,
"cirque du soleil las vegas",
None
]
mixed_strings = [
"Lorem Ipsum is simply dummy text of the printing and typesetting industry.",
"C\\'est la vie",
"Ça va?",
"Cães danados",
u"\xacCamarões assados",
u"a\xac\u1234\u20ac\U00008000"
]
for s in choices:
print 'Test for string: "%s"' % s
# print 'Old: %f' % round(timeit('utils.validate_stringold(\'%s\')' % s, "from fuzzywuzzy import utils",number=iterations),4)
print 'New: %f' % round(timeit('utils.validate_string(\'%s\')' % s, "from fuzzywuzzy import utils",number=iterations),4)
print
for s in mixed_strings:
print 'Test for string: "%s"' % s
#print 'Old: %f' % round(timeit('utils.asciidammitold(\'%s\')' % s, "from fuzzywuzzy import utils",number=iterations),4)
print 'New: %f' % round(timeit('utils.asciidammit(\'%s\')' % s, "from fuzzywuzzy import utils",number=iterations),4)
print
for s in mixed_strings+cirque_strings+choices:
print 'Test for string: "%s"' % s
#print 'Old: %f' % round(timeit('utils.full_processold(\'%s\')' % s, "from fuzzywuzzy import utils",number=iterations),4)
print 'New: %f' % round(timeit('utils.full_process(\'%s\')' % s, "from fuzzywuzzy import utils",number=iterations),4)
### benchmarking the core matching methods...
for s in cirque_strings:
print 'Test fuzz.ratio for string: "%s"' % s
print '-------------------------------'
print 'New: %f' % round(timeit('fuzz.ratio(\'cirque du soleil\', \'%s\')' % s, "from fuzzywuzzy import fuzz",number=iterations/100),4)
for s in cirque_strings:
print 'Test fuzz.partial_ratio for string: "%s"' % s
print '-------------------------------'
print 'New: %f' % round(timeit('fuzz.partial_ratio(\'cirque du soleil\', \'%s\')' % s, "from fuzzywuzzy import fuzz",number=iterations/100),4)
for s in cirque_strings:
print 'Test fuzz.WRatio for string: "%s"' % s
print '-------------------------------'
print 'New: %f' % round(timeit('fuzz.WRatio(\'cirque du soleil\', \'%s\')' % s, "from fuzzywuzzy import fuzz",number=iterations/100),4)