Skip to content

Commit

Permalink
Use POST requests for translation and detection
Browse files Browse the repository at this point in the history
  • Loading branch information
sloria committed Dec 15, 2013
1 parent 22afc9d commit e7f9cf9
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 13 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,6 @@ text/en/*.pickle

# Readme build
README.html

.ipynb_checkpoints/
*.ipynb
2 changes: 2 additions & 0 deletions textblob/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from itertools import imap, izip
import urllib2 as request
from urllib import quote as urlquote
from urllib import urlencode
text_type = unicode
binary_type = str
string_types = (str, unicode)
Expand All @@ -26,6 +27,7 @@ def implements_to_string(cls):
else: # PY3
from urllib import request
from urllib.parse import quote as urlquote
from urllib.parse import urlencode
text_type = str
binary_type = bytes
string_types = (str,)
Expand Down
26 changes: 13 additions & 13 deletions textblob/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
'''
from __future__ import absolute_import
import re
from textblob.compat import PY2, request, urlquote
import json
from textblob.compat import PY2, request, urlquote, urlencode


class Translator(object):
Expand Down Expand Up @@ -35,8 +36,7 @@ class Translator(object):
detection_pattern = re.compile(
r".*?\,\"([a-z]{2}(\-\w{2})?)\"\,.*?", flags=re.S)

translate_url = ("http://translate.google.com/translate_a/"
"t?client=t&ie=UTF-8&oe=UTF-8&sl={0}&tl={1}&text={2}")
translate_url = "http://translate.google.com/translate_a/t"
detect_url = "http://translate.google.com/translate_a/t?client=t&ie=UTF-8&oe=UTF-8&text={0}"

headers = {'User-Agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) '
Expand All @@ -46,18 +46,17 @@ def translate(self, source, from_lang='en', to_lang='en', host=None, type_=None)
'''Translate the source text from one language to another.'''
if PY2:
source = source.encode('utf-8')
escaped_source = urlquote(source, '')
url = self.translate_url.format(from_lang, to_lang, escaped_source)
json5 = self._get_json5(url, host=host, type_=type_)
return self._unescape(self._get_translation_from_json5(json5))
data = {"client": "t", "ie": "UTF-8", "oe": "UTF-8",
"sl": from_lang, "tl": to_lang, "text": source}
json5 = self._get_json5(self.translate_url, host=host, type_=type_, data=data)
return self._get_translation_from_json5(json5)

def detect(self, source, host=None, type_=None):
'''Detect the source text's language.'''
if PY2:
source = source.encode('utf-8')
escaped_source = urlquote(source, '')
url = self.detect_url.format(escaped_source)
json5 = self._get_json5(url, host=host, type_=type_)
data = {"client": "t", "ie": "UTF-8", "oe": "UTF-8", "text": source}
json5 = self._get_json5(self.detect_url, host=host, type_=type_, data=data)
lang = self._get_language_from_json5(json5)
return lang

Expand All @@ -76,10 +75,11 @@ def _get_translation_from_json5(self, content):
break
result += m.group(1)
pos = m.end()
return result
return self._unescape(result)

def _get_json5(self, url, host=None, type_=None):
req = request.Request(url=url, headers=self.headers)
def _get_json5(self, url, host=None, type_=None, data=None):
encoded_data = urlencode(data)
req = request.Request(url=url, headers=self.headers, data=encoded_data)
if host or type_:
req.set_proxy(host=host, type=type_)
r = request.urlopen(req)
Expand Down

0 comments on commit e7f9cf9

Please sign in to comment.