From 4945361c2db92095f934b92a6c00316243caf3cc Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 25 Nov 2016 19:28:46 +0100 Subject: [PATCH 1/6] Fix #71: make cloudpickle Python 3.6 compatible Also add a cache to the function which extracts global-referencing opcodes, speeding up repetitive pickling of non-tiny functions. --- cloudpickle/cloudpickle.py | 91 ++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 34 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 32701bf3f..dd57f3fce 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -42,17 +42,19 @@ """ from __future__ import print_function -import operator -import io +import dis +from functools import partial import imp +import io +import itertools +import opcode +import operator import pickle import struct import sys -import types -from functools import partial -import itertools -import dis import traceback +import types +import weakref if sys.version < '3': from pickle import Pickler @@ -68,10 +70,10 @@ PY3 = True #relevant opcodes -STORE_GLOBAL = dis.opname.index('STORE_GLOBAL') -DELETE_GLOBAL = dis.opname.index('DELETE_GLOBAL') -LOAD_GLOBAL = dis.opname.index('LOAD_GLOBAL') -GLOBAL_OPS = [STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL] +STORE_GLOBAL = opcode.opmap['STORE_GLOBAL'] +DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL'] +LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL'] +GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL) HAVE_ARGUMENT = dis.HAVE_ARGUMENT EXTENDED_ARG = dis.EXTENDED_ARG @@ -90,6 +92,46 @@ def _builtin_type(name): return getattr(types, name) +if sys.version_info < (3, 4): + def _walk_global_ops_real(code): + code = getattr(code, 'co_code', b'') + if not PY3: + code = map(ord, code) + + n = len(code) + i = 0 + extended_arg = 0 + while i < n: + op = code[i] + i += 1 + if op >= HAVE_ARGUMENT: + oparg = code[i] + code[i + 1] * 256 + extended_arg + extended_arg = 0 + i += 2 + if op == EXTENDED_ARG: + extended_arg = oparg * 65536 + if op in GLOBAL_OPS: + yield op, oparg + +else: + def _walk_global_ops_real(code): + for instr in dis.get_instructions(code): + op = instr.opcode + if op in GLOBAL_OPS: + yield op, instr.arg + + +def _walk_global_ops(code, _cache=weakref.WeakKeyDictionary()): + """ + Return a list of (opcode, argument number) tuples for all + global-referencing instructions in *code*. + """ + res = _cache.get(code) + if res is None: + res = _cache[code] = list(_walk_global_ops_real(code)) + return res + + class CloudPickler(Pickler): dispatch = Pickler.dispatch.copy() @@ -281,41 +323,22 @@ def save_function_tuple(self, func): write(pickle.TUPLE) write(pickle.REDUCE) # applies _fill_function on the tuple - @staticmethod - def extract_code_globals(co): + @classmethod + def extract_code_globals(cls, co): """ Find all globals names read or written to by codeblock co """ - - code = getattr(co, 'co_code', None) - if code is None: - return set() - if not PY3: - code = [ord(c) for c in code] names = co.co_names out_names = set() - n = len(code) - i = 0 - extended_arg = 0 - while i < n: - op = code[i] - - i += 1 - if op >= HAVE_ARGUMENT: - oparg = code[i] + code[i+1] * 256 + extended_arg - extended_arg = 0 - i += 2 - if op == EXTENDED_ARG: - extended_arg = oparg*65536 - if op in GLOBAL_OPS: - out_names.add(names[oparg]) + for op, oparg in _walk_global_ops(co): + out_names.add(names[oparg]) # see if nested function have any global refs if co.co_consts: for const in co.co_consts: if type(const) is types.CodeType: - out_names |= CloudPickler.extract_code_globals(const) + out_names |= cls.extract_code_globals(const) return out_names From 78c68eba66282c7d64f94932d385bc482e794301 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 25 Nov 2016 19:42:48 +0100 Subject: [PATCH 2/6] Fixes for 2.6 and pypy --- cloudpickle/cloudpickle.py | 20 +++++++++++++------- tests/cloudpickle_file_test.py | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index dd57f3fce..7d9063cc2 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -121,14 +121,18 @@ def _walk_global_ops_real(code): yield op, instr.arg -def _walk_global_ops(code, _cache=weakref.WeakKeyDictionary()): +_walk_global_ops_cache = (weakref.WeakKeyDictionary() + if sys.version_info >= (2, 7) + else {}) + +def _walk_global_ops(code): """ Return a list of (opcode, argument number) tuples for all global-referencing instructions in *code*. """ - res = _cache.get(code) + res = _walk_global_ops_cache.get(code) if res is None: - res = _cache[code] = list(_walk_global_ops_real(code)) + res = _walk_global_ops_cache[code] = list(_walk_global_ops_real(code)) return res @@ -328,11 +332,13 @@ def extract_code_globals(cls, co): """ Find all globals names read or written to by codeblock co """ - names = co.co_names - out_names = set() + try: + names = co.co_names + except AttributeError: + # PyPy "builtin-code" object + return set() - for op, oparg in _walk_global_ops(co): - out_names.add(names[oparg]) + out_names = set(names[oparg] for op, oparg in _walk_global_ops(co)) # see if nested function have any global refs if co.co_consts: diff --git a/tests/cloudpickle_file_test.py b/tests/cloudpickle_file_test.py index a5b13fcce..f98cf07b1 100644 --- a/tests/cloudpickle_file_test.py +++ b/tests/cloudpickle_file_test.py @@ -85,7 +85,7 @@ def test_seek(self): self.assertEquals(self.teststring, unpickled.read()) os.remove(self.tmpfilepath) - @pytest.mark.skipif(sys.version_info > (2, 7), + @pytest.mark.skipif(sys.version_info >= (3,), reason="only works on Python 2.x") def test_temp_file(self): with tempfile.NamedTemporaryFile(mode='ab+') as fp: From 020f477d2015b91a4b5962d920eafd2da417056f Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 25 Nov 2016 19:50:08 +0100 Subject: [PATCH 3/6] Put the cache at a strategically better place --- cloudpickle/cloudpickle.py | 59 ++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 7d9063cc2..25865c86a 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -93,7 +93,11 @@ def _builtin_type(name): if sys.version_info < (3, 4): - def _walk_global_ops_real(code): + def _walk_global_ops(code): + """ + Yield (opcode, argument number) tuples for all + global-referencing instructions in *code*. + """ code = getattr(code, 'co_code', b'') if not PY3: code = map(ord, code) @@ -114,28 +118,17 @@ def _walk_global_ops_real(code): yield op, oparg else: - def _walk_global_ops_real(code): + def _walk_global_ops(code): + """ + Yield (opcode, argument number) tuples for all + global-referencing instructions in *code*. + """ for instr in dis.get_instructions(code): op = instr.opcode if op in GLOBAL_OPS: yield op, instr.arg -_walk_global_ops_cache = (weakref.WeakKeyDictionary() - if sys.version_info >= (2, 7) - else {}) - -def _walk_global_ops(code): - """ - Return a list of (opcode, argument number) tuples for all - global-referencing instructions in *code*. - """ - res = _walk_global_ops_cache.get(code) - if res is None: - res = _walk_global_ops_cache[code] = list(_walk_global_ops_real(code)) - return res - - class CloudPickler(Pickler): dispatch = Pickler.dispatch.copy() @@ -327,24 +320,34 @@ def save_function_tuple(self, func): write(pickle.TUPLE) write(pickle.REDUCE) # applies _fill_function on the tuple + + _extract_code_globals_cache = (weakref.WeakKeyDictionary() + if sys.version_info >= (2, 7) + else {}) + @classmethod def extract_code_globals(cls, co): """ Find all globals names read or written to by codeblock co """ - try: - names = co.co_names - except AttributeError: - # PyPy "builtin-code" object - return set() + out_names = cls._extract_code_globals_cache.get(co) + if out_names is None: + try: + names = co.co_names + except AttributeError: + # PyPy "builtin-code" object + out_names = set() + else: + out_names = set(names[oparg] + for op, oparg in _walk_global_ops(co)) - out_names = set(names[oparg] for op, oparg in _walk_global_ops(co)) + # see if nested function have any global refs + if co.co_consts: + for const in co.co_consts: + if type(const) is types.CodeType: + out_names |= cls.extract_code_globals(const) - # see if nested function have any global refs - if co.co_consts: - for const in co.co_consts: - if type(const) is types.CodeType: - out_names |= cls.extract_code_globals(const) + cls._extract_code_globals_cache[co] = out_names return out_names From a910ab5e56a1822ecfd3f538de930547881bb5bc Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 25 Nov 2016 19:55:56 +0100 Subject: [PATCH 4/6] Fix cache on PyPy --- cloudpickle/cloudpickle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 25865c86a..e8f4223b2 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -320,10 +320,10 @@ def save_function_tuple(self, func): write(pickle.TUPLE) write(pickle.REDUCE) # applies _fill_function on the tuple - - _extract_code_globals_cache = (weakref.WeakKeyDictionary() - if sys.version_info >= (2, 7) - else {}) + _extract_code_globals_cache = ( + weakref.WeakKeyDictionary() + if sys.version_info >= (2, 7) and not hasattr(sys, "pypy_version_info") + else {}) @classmethod def extract_code_globals(cls, co): From e26a76fda93315e6874dda3293dbb475698cd9c0 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 25 Nov 2016 20:29:00 +0100 Subject: [PATCH 5/6] Add a test for EXTENDED_ARG --- tests/cloudpickle_test.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 5f55c8f3c..34faf498f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -4,6 +4,7 @@ import pytest import pickle import sys +import random import functools import itertools import platform @@ -333,6 +334,32 @@ def g(y): res = loop.run_sync(functools.partial(g2, 5)) self.assertEqual(res, 7) + def test_extended_arg(self): + # Functions with more than 65535 global vars prefix some global + # variable references with the EXTENDED_ARG opcode. + nvars = 65537 + 258 + names = ['g%d' % i for i in range(1, nvars)] + r = random.Random(42) + d = dict((name, r.randrange(100)) for name in names) + # def f(x): + # x = g1, g2, ... + # return zlib.crc32(bytes(bytearray(x))) + code = """ + import zlib + + def f(): + x = {tup} + return zlib.crc32(bytes(bytearray(x))) + """.format(tup=', '.join(names)) + exec(textwrap.dedent(code), d) + f = d['f'] + res = f() + data = cloudpickle.dumps([f, f]) + d = f = None + f2, f3 = pickle.loads(data) + self.assertTrue(f2 is f3) + self.assertEqual(f2(), res) + if __name__ == '__main__': unittest.main() From 319ece059c10c5268308551624ca672721ce54cc Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 25 Nov 2016 20:33:20 +0100 Subject: [PATCH 6/6] Try to fix 2.6 and pypy --- tests/cloudpickle_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 34faf498f..a1dec1151 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -340,7 +340,7 @@ def test_extended_arg(self): nvars = 65537 + 258 names = ['g%d' % i for i in range(1, nvars)] r = random.Random(42) - d = dict((name, r.randrange(100)) for name in names) + d = dict([(name, r.randrange(100)) for name in names]) # def f(x): # x = g1, g2, ... # return zlib.crc32(bytes(bytearray(x))) @@ -351,7 +351,7 @@ def f(): x = {tup} return zlib.crc32(bytes(bytearray(x))) """.format(tup=', '.join(names)) - exec(textwrap.dedent(code), d) + exec(textwrap.dedent(code), d, d) f = d['f'] res = f() data = cloudpickle.dumps([f, f])