Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle non-ASCII chars correctly #22

Merged
merged 11 commits into from
Jan 24, 2016
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ script:
- python -m unittest discover -v -s gtts/tests/
deploy:
provider: pypi
user: pndurette@gmail.com
user: pndurette
password:
secure: GTbUTja0D/LDnIxLLO4ohTHGugwrnx2nMqAw5n7a9JAsfxCVXz6+vu0fkNBmOSJV0bNsFcoj/Q+lcR1SCNsw2NibWWFLDMoYD/yqP/SrHTSUoTXt+o7H9/qvxR2MDr0t2S5kjAux4bw5uPg8WtYlPpD0VWgWkFCm3JmJG+cG1tA=
on:
tags: true
distributions: sdist
repo: pndurette/gTTS
branch: master
python: '3.4'
6 changes: 5 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,8 @@ Changed the gtts-cli arguments to make it pipeable and more standard:
> usage: gtts-cli.py [-h] [-f FILE] [-o DESTINATION] [-l LANG] [--debug] [text]
Made '<text to speak>' the positional argument (replacing -t / --text)
Made '<destination file>' optional and using -o / --destination. Defaults to STDOUT for piping.
(Thanks to @Dr-Horv!)
(Pull Request #10 -- Thanks to @Dr-Horv!)

v.1.1.0 2016-01-13
Fixed the constant 502 errors (for now): generating http request tokens ('tk') the same way the Google Translate page does
(Pull Requests #16, #17 -- Thanks to @Boudewijn26!)
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include README.md
include CHANGES.txt
1 change: 0 additions & 1 deletion README.txt

This file was deleted.

17 changes: 17 additions & 0 deletions gtts/tests/test_token.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# coding=UTF-8

import unittest

from gtts import gToken
Expand All @@ -20,5 +22,20 @@ def test_work_token(self):
seed = '+-a^+6'
self.assertEqual(415744659, self.tokenizer._work_token(token_key, seed))

def test_token_accentuated(self):
lang = 'en'
text = u'Hé'
self.assertEqual('63792.446860', self.tokenizer.calculate_token(text, seed=403644))

def test_token_special_char(self):
lang = 'en'
text = u'€Hé'
self.assertEqual('535990.918794', self.tokenizer.calculate_token(text, seed=403644))

def test_token_very_special_char(self):
lang = 'en'
text = u"◐"
self.assertEqual('457487.54195', self.tokenizer.calculate_token(text, seed=403644))

if __name__ == '__main__':
unittest.main()
33 changes: 5 additions & 28 deletions gtts/token.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# -*- coding: utf-8 -*-
import calendar, time, math
import calendar
import math
import time


class gToken:
""" gToken (Google Translate Token)
Expand All @@ -19,33 +22,7 @@ def __init__(self):
def calculate_token(self, text, seed=None):
""" Calculate the request token (`tk`) of a string """

e = 0
f = 0
d = [None] * len(text)
for c in text:
g = ord(c)
if 128 > g:
d[e] = g
e += 1
elif 2048 > g:
d[e] = g >> 6 | 192
e += 1
else:
if 55296 == (g & 64512) and f + 1 < len(text) and 56320 == (ord(text[f + 1]) & 64512):
f += 1
g = 65536 + ((g & 1023) << 10) + (ord(text[f]) & 1023)
d[e] = g >> 18 | 240
e += 1
d[e] = g >> 12 & 63 | 128
e += 1
else:
d[e] = g >> 12 | 224
e += 1
d[e] = g >> 6 & 63 | 128
e += 1
d[e] = g & 63 | 128
e += 1

d = list(bytearray(text.encode('UTF-8')))
a = seed if seed is not None else self.token_key
if seed is None:
seed = self.token_key
Expand Down
2 changes: 1 addition & 1 deletion gtts/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.0.7'
__version__ = '1.1.2'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
scripts=['bin/gtts-cli', 'bin/gtts-cli.py'],
license='MIT',
description='Create an mp3 file from spoken text via the Google TTS (Text-to-Speech) API',
long_description=open('README.txt').read(),
long_description=open('README.md').read(),
install_requires=[
"requests"
],
Expand Down
5 changes: 5 additions & 0 deletions token-script.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ fM = function(a) {
eM = Number(window[c.join(b())]) || 0
}
b = eM;

// All this does is convert turn d into the byte values of the utf-8 representation of a
var d = cM(String.fromCharCode(116)),
c = cM(String.fromCharCode(107)),
d = [d(), d()];
Expand All @@ -46,6 +48,9 @@ fM = function(a) {

128 > g ? d[e++] = g : (2048 > g ? d[e++] = g >> 6 | 192 : (55296 == (g & 64512) && f + 1 < a.length && 56320 == (a.charCodeAt(f + 1) & 64512) ? (g = 65536 + ((g & 1023) << 10) + (a.charCodeAt(++f) & 1023), d[e++] = g >> 18 | 240, d[e++] = g >> 12 & 63 | 128) : d[e++] = g >> 12 | 224, d[e++] = g >> 6 & 63 | 128), d[e++] = g & 63 | 128)
}
// So now utf8(d) == a


a = b || 0;
for (e = 0; e < d.length; e++) a += d[e], a = dM(a, Vb);
a = dM(a, Ub);
Expand Down