Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support recursive and empty closure cells #443

Merged
merged 46 commits into from
Jan 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
fce797e
Fix #229
anivegesana Dec 11, 2021
ec190ea
.get still fails when index is unhashable
anivegesana Dec 11, 2021
e6decfe
Cells are not allowed to change before 3.7
anivegesana Dec 11, 2021
8e1cda7
Uncomment test cases if Python > 3.7
anivegesana Dec 11, 2021
007f209
Add more complex test case
anivegesana Dec 11, 2021
2199ff6
Spooky edits at a distance
anivegesana Dec 12, 2021
2c5d2fa
Wrap in exec to prevent syntax errors in Python 2
anivegesana Dec 12, 2021
4ae150c
Fix testcase
anivegesana Dec 12, 2021
e33335e
Test impossible in Py2
anivegesana Dec 12, 2021
bb16131
Small correction
anivegesana Dec 12, 2021
56fcc30
Correct the correction
anivegesana Dec 12, 2021
42a93dc
Add Python 2 support
anivegesana Dec 14, 2021
c1566c5
Turn feature on for Python 2
anivegesana Dec 14, 2021
9b56c97
Prefer function over object when possible
anivegesana Dec 14, 2021
419302a
Add changes from review
anivegesana Dec 14, 2021
36aa9ad
Turn off test_circular_reference for Python 2
anivegesana Dec 14, 2021
eb32824
Reformat and support empty cells
anivegesana Dec 15, 2021
64187c9
Solve some more versioning issues
anivegesana Dec 15, 2021
436e499
Add shim that chooses the correct function at unpickling
anivegesana Dec 15, 2021
2c30436
Avoid pickling dill._dill
anivegesana Dec 16, 2021
093d9ca
Small correction
anivegesana Dec 16, 2021
8ea0e26
This is why dill._dill would try to pickle
anivegesana Dec 16, 2021
15908d3
Add shim for reference cells
anivegesana Dec 16, 2021
14bde51
Copy functions for sentinel
anivegesana Dec 16, 2021
9eafe55
Recursive function cells
anivegesana Dec 23, 2021
50bbc91
Turn on test case for Python 3 only
anivegesana Dec 23, 2021
03fde84
Cell manipulation on PyPy 2.7
anivegesana Dec 24, 2021
c034126
Not possible in PyPy 2.7
anivegesana Dec 24, 2021
2921143
Correctly remove test case
anivegesana Dec 24, 2021
fc711c8
Fix coverage
anivegesana Dec 24, 2021
5568e79
PyPy 2.7 Attempt 3
anivegesana Dec 24, 2021
9ae9552
Fix small issue
anivegesana Dec 24, 2021
fe842e0
Clean up _create_cell
anivegesana Dec 25, 2021
ac94321
Empty cells in PyPy2
anivegesana Dec 25, 2021
5b60c92
Only two _create_cell functions
anivegesana Dec 26, 2021
71c9aaa
Fixes from review
anivegesana Dec 28, 2021
dfdfc40
Was probably not a good idea
anivegesana Dec 28, 2021
8692e5d
Split part of Shim into GetAttrShim
anivegesana Dec 29, 2021
980241a
Strange issue with exec in PyPy3.6
anivegesana Dec 29, 2021
7d41c79
Better _shims.py
anivegesana Dec 30, 2021
c6ea843
Rename cell_stack back to postproc
anivegesana Dec 31, 2021
89c1487
Add _CELL_EMPTY to Python 3 in case it makes cPickle implementation e…
anivegesana Dec 31, 2021
bd350b6
Add postproc_list to _save_with_postproc
anivegesana Jan 5, 2022
181aa4a
Recursive functions and warnings
anivegesana Jan 10, 2022
a7797cb
Better warning messages
anivegesana Jan 25, 2022
e2a3b98
Remove words "perfectly" and "would"
anivegesana Jan 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dill/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
from ._dill import dump, dumps, load, loads, dump_session, load_session, \
Pickler, Unpickler, register, copy, pickle, pickles, check, \
HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, UnpicklingError, \
HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE
HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, PickleWarning, \
PicklingWarning, UnpicklingWarning
from . import source, temp, detect

# get global settings
Expand Down
358 changes: 260 additions & 98 deletions dill/_dill.py

Large diffs are not rendered by default.

266 changes: 266 additions & 0 deletions dill/_shims.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Author: Anirudh Vegesana (avegesan@stanford.edu)
# Copyright (c) 2021 The Uncertainty Quantification Foundation.
# License: 3-clause BSD. The full license text is available at:
# - https://github.com/uqfoundation/dill/blob/master/LICENSE
"""
Provides shims for compatibility between versions of dill and Python.

Compatibility shims should be provided in this file. Here are two simple example
use cases.

Deprecation of constructor function:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Assume that we were transitioning _import_module in _dill.py to
the builtin function importlib.import_module when present.

@move_to(_dill)
def _import_module(import_name):
... # code already in _dill.py

_import_module = Getattr(importlib, 'import_module', Getattr(_dill, '_import_module', None))

The code will attempt to find import_module in the importlib module. If not
present, it will use the _import_module function in _dill.

Emulate new Python behavior in older Python versions:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CellType.cell_contents behaves differently in Python 3.6 and 3.7. It is
read-only in Python 3.6 and writable and deletable in 3.7.

if _dill.OLD37 and _dill.HAS_CTYPES and ...:
@move_to(_dill)
def _setattr(object, name, value):
if type(object) is _dill.CellType and name == 'cell_contents':
_PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object)
_PyCell_Set(object, value)
else:
setattr(object, name, value)
... # more cases below

_setattr = Getattr(_dill, '_setattr', setattr)

_dill._setattr will be used when present to emulate Python 3.7 functionality in
older versions of Python while defaulting to the standard setattr in 3.7+.

See this PR for the discussion that lead to this system:
https://github.com/uqfoundation/dill/pull/443
"""

import inspect, sys

_dill = sys.modules['dill._dill']


class Reduce(object):
"""
Reduce objects are wrappers used for compatibility enforcement during
unpickle-time. They should only be used in calls to pickler.save and
other Reduce objects. They are only evaluated within unpickler.load.

Pickling a Reduce object makes the two implementations equivalent:

pickler.save(Reduce(*reduction))

pickler.save_reduce(*reduction, obj=reduction)
"""
__slots__ = ['reduction']
def __new__(cls, *reduction, **kwargs):
"""
Args:
*reduction: a tuple that matches the format given here:
https://docs.python.org/3/library/pickle.html#object.__reduce__
is_callable: a bool to indicate that the object created by
unpickling `reduction` is callable. If true, the current Reduce
is allowed to be used as the function in further save_reduce calls
or Reduce objects.
"""
is_callable = kwargs.get('is_callable', False) # Pleases Py2. Can be removed later
if is_callable:
self = object.__new__(_CallableReduce)
else:
self = object.__new__(Reduce)
self.reduction = reduction
return self
def __repr__(self):
return 'Reduce%s' % (self.reduction,)
def __copy__(self):
return self # pragma: no cover
def __deepcopy__(self, memo):
return self # pragma: no cover
def __reduce__(self):
return self.reduction
def __reduce_ex__(self, protocol):
return self.__reduce__()

class _CallableReduce(Reduce):
# A version of Reduce for functions. Used to trick pickler.save_reduce into
# thinking that Reduce objects of functions are themselves meaningful functions.
def __call__(self, *args, **kwargs):
reduction = self.__reduce__()
func = reduction[0]
f_args = reduction[1]
obj = func(*f_args)
return obj(*args, **kwargs)

__NO_DEFAULT = _dill.Sentinel('Getattr.NO_DEFAULT')

def Getattr(object, name, default=__NO_DEFAULT):
"""
A Reduce object that represents the getattr operation. When unpickled, the
Getattr will access an attribute 'name' of 'object' and return the value
stored there. If the attribute doesn't exist, the default value will be
returned if present.

The following statements are equivalent:

Getattr(collections, 'OrderedDict')
Getattr(collections, 'spam', None)
Getattr(*args)

Reduce(getattr, (collections, 'OrderedDict'))
Reduce(getattr, (collections, 'spam', None))
Reduce(getattr, args)

During unpickling, the first two will result in collections.OrderedDict and
None respectively because the first attribute exists and the second one does
not, forcing it to use the default value given in the third argument.
"""

if default is Getattr.NO_DEFAULT:
reduction = (getattr, (object, name))
else:
reduction = (getattr, (object, name, default))

return Reduce(*reduction, is_callable=callable(default))

Getattr.NO_DEFAULT = __NO_DEFAULT
del __NO_DEFAULT

def move_to(module, name=None):
def decorator(func):
if name is None:
fname = func.__name__
else:
fname = name
module.__dict__[fname] = func
func.__module__ = module.__name__
return func
return decorator

######################
## Compatibility Shims are defined below
######################

_CELL_EMPTY = Getattr(_dill, '_CELL_EMPTY', None)

if _dill.OLD37:
if _dill.HAS_CTYPES and hasattr(_dill.ctypes, 'pythonapi') and hasattr(_dill.ctypes.pythonapi, 'PyCell_Set'):
# CPython
ctypes = _dill.ctypes

_PyCell_Set = ctypes.pythonapi.PyCell_Set

@move_to(_dill)
def _setattr(object, name, value):
if type(object) is _dill.CellType and name == 'cell_contents':
_PyCell_Set.argtypes = (ctypes.py_object, ctypes.py_object)
_PyCell_Set(object, value)
else:
setattr(object, name, value)

@move_to(_dill)
def _delattr(object, name):
if type(object) is _dill.CellType and name == 'cell_contents':
_PyCell_Set.argtypes = (ctypes.py_object, ctypes.c_void_p)
_PyCell_Set(object, None)
else:
delattr(object, name)

# General Python (not CPython) up to 3.6 is in a weird case, where it is
# possible to pickle recursive cells, but we can't assign directly to the
# cell.
elif _dill.PY3:
# Use nonlocal variables to reassign the cell value.
# https://stackoverflow.com/a/59276835
__nonlocal = ('nonlocal cell',)
exec('''def _setattr(cell, name, value):
if type(cell) is _dill.CellType and name == 'cell_contents':
def cell_setter(value):
%s
cell = value # pylint: disable=unused-variable
func = _dill.FunctionType(cell_setter.__code__, globals(), "", None, (cell,)) # same as cell_setter, but with cell being the cell's contents
func(value)
else:
setattr(cell, name, value)''' % __nonlocal)
move_to(_dill)(_setattr)

exec('''def _delattr(cell, name):
if type(cell) is _dill.CellType and name == 'cell_contents':
try:
cell.cell_contents
except:
return
def cell_deleter():
%s
del cell # pylint: disable=unused-variable
func = _dill.FunctionType(cell_deleter.__code__, globals(), "", None, (cell,)) # same as cell_deleter, but with cell being the cell's contents
func()
else:
delattr(cell, name)''' % __nonlocal)
move_to(_dill)(_delattr)

else:
# Likely PyPy 2.7. Simulate the nonlocal keyword with bytecode
# manipulation.

# The following function is based on 'cell_set' from 'cloudpickle'
# https://github.com/cloudpipe/cloudpickle/blob/5d89947288a18029672596a4d719093cc6d5a412/cloudpickle/cloudpickle.py#L393-L482
# Copyright (c) 2012, Regents of the University of California.
# Copyright (c) 2009 `PiCloud, Inc. <http://www.picloud.com>`_.
# License: https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE
@move_to(_dill)
def _setattr(cell, name, value):
if type(cell) is _dill.CellType and name == 'cell_contents':
_cell_set = _dill.FunctionType(
_cell_set_template_code, {}, '_cell_set', (), (cell,),)
_cell_set(value)
else:
setattr(cell, name, value)

def _cell_set_factory(value):
lambda: cell
cell = value

co = _cell_set_factory.__code__

_cell_set_template_code = _dill.CodeType(
co.co_argcount,
co.co_nlocals,
co.co_stacksize,
co.co_flags,
co.co_code,
co.co_consts,
co.co_names,
co.co_varnames,
co.co_filename,
co.co_name,
co.co_firstlineno,
co.co_lnotab,
co.co_cellvars, # co_freevars is initialized with co_cellvars
(), # co_cellvars is made empty
)

del co

@move_to(_dill)
def _delattr(cell, name):
if type(cell) is _dill.CellType and name == 'cell_contents':
pass
else:
delattr(cell, name)

_setattr = Getattr(_dill, '_setattr', setattr)
_delattr = Getattr(_dill, '_delattr', delattr)
22 changes: 18 additions & 4 deletions dill/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,16 @@ def freevars(func):
func = getattr(func, func_code).co_freevars # get freevars
else:
return {}
return dict((name,c.cell_contents) for (name,c) in zip(func,closures))

def get_cell_contents():
for (name,c) in zip(func,closures):
try:
cell_contents = c.cell_contents
except:
continue
yield (name,c.cell_contents)

return dict(get_cell_contents())

# thanks to Davies Liu for recursion of globals
def nestedglobals(func, recurse=True):
Expand Down Expand Up @@ -201,9 +210,14 @@ def globalvars(func, recurse=True, builtin=False):
# get references from within closure
orig_func, func = func, set()
for obj in getattr(orig_func, func_closure) or {}:
_vars = globalvars(obj.cell_contents, recurse, builtin) or {}
func.update(_vars) #XXX: (above) be wary of infinte recursion?
globs.update(_vars)
try:
cell_contents = obj.cell_contents
except:
pass
else:
_vars = globalvars(cell_contents, recurse, builtin) or {}
func.update(_vars) #XXX: (above) be wary of infinte recursion?
globs.update(_vars)
# get globals
globs.update(getattr(orig_func, func_globals) or {})
# get names of references
Expand Down
6 changes: 4 additions & 2 deletions tests/test_classdef.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,10 @@ def test_class_objects():
assert type(_cls).__name__ == "_meta"

# test NoneType
def test_none():
def test_specialtypes():
assert dill.pickles(type(None))
assert dill.pickles(type(NotImplemented))
assert dill.pickles(type(Ellipsis))

if hex(sys.hexversion) >= '0x20600f0':
from collections import namedtuple
Expand Down Expand Up @@ -204,7 +206,7 @@ def test_slots():
if __name__ == '__main__':
test_class_instances()
test_class_objects()
test_none()
test_specialtypes()
test_namedtuple()
test_dtype()
test_array_nested()
Expand Down
18 changes: 17 additions & 1 deletion tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def function_e(e, *e1, e2=1, e3=2):
return e + sum(e1) + e2 + e3''')


def function_with_unassigned_variable():
if False:
value = None
return (lambda: value)


def test_functions():
dumped_func_a = dill.dumps(function_a)
assert dill.loads(dumped_func_a)(0) == 0
Expand All @@ -52,6 +58,17 @@ def test_functions():
assert dill.loads(dumped_func_d)(1, 2, 3) == 6
assert dill.loads(dumped_func_d)(1, 2, d2=3) == 6

empty_cell = function_with_unassigned_variable()
cell_copy = dill.loads(dill.dumps(empty_cell))
assert 'empty' in str(cell_copy.__closure__[0])
try:
cell_copy()
except:
# this is good
pass
else:
raise AssertionError('cell_copy() did not read an empty cell')

if is_py3():
exec('''
dumped_func_e = dill.dumps(function_e)
Expand All @@ -62,6 +79,5 @@ def test_functions():
assert dill.loads(dumped_func_e)(1, 2, 3, e2=4) == 12
assert dill.loads(dumped_func_e)(1, 2, 3, e2=4, e3=5) == 15''')


if __name__ == '__main__':
test_functions()
Loading