From 9c3c589993e47ae255e0832c1bfa58f81740f0d4 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Sun, 8 May 2022 14:15:00 -0300 Subject: [PATCH 01/14] Add detailed trace mode showing dumped object size and visual depeth level --- dill/_dill.py | 279 +++++++++++++++++++++++++------------------------ dill/detect.py | 2 +- dill/logger.py | 140 +++++++++++++++++++++++++ 3 files changed, 283 insertions(+), 138 deletions(-) create mode 100644 dill/logger.py diff --git a/dill/_dill.py b/dill/_dill.py index 1c3caaed..a0bd37f7 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -21,14 +21,7 @@ 'UnpicklingError','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE', 'PickleError','PickleWarning','PicklingWarning','UnpicklingWarning'] -import logging -log = logging.getLogger("dill") -log.addHandler(logging.StreamHandler()) -def _trace(boolean): - """print a trace through the stack when pickling; useful for debugging""" - if boolean: log.setLevel(logging.INFO) - else: log.setLevel(logging.WARN) - return +from .logger import adapter as logger import warnings import os @@ -491,6 +484,7 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds): f = open(filename, 'wb') try: pickler = Pickler(f, protocol, **kwds) + pickler._file = f pickler._original_main = main if byref: main = _stash_modules(main) @@ -557,13 +551,13 @@ class Pickler(StockPickler): _session = False from .settings import settings - def __init__(self, *args, **kwds): + def __init__(self, file, *args, **kwds): settings = Pickler.settings _byref = kwds.pop('byref', None) #_strictio = kwds.pop('strictio', None) _fmode = kwds.pop('fmode', None) _recurse = kwds.pop('recurse', None) - StockPickler.__init__(self, *args, **kwds) + StockPickler.__init__(self, file, *args, **kwds) self._main = _main_module self._diff_cache = {} self._byref = settings['byref'] if _byref is None else _byref @@ -572,6 +566,7 @@ def __init__(self, *args, **kwds): self._recurse = settings['recurse'] if _recurse is None else _recurse from collections import OrderedDict self._postproc = OrderedDict() + self._file = file def dump(self, obj): #NOTE: if settings change, need to update attributes # register if the object is a numpy ufunc @@ -579,10 +574,10 @@ def dump(self, obj): #NOTE: if settings change, need to update attributes if NumpyUfuncType and numpyufunc(obj): @register(type(obj)) def save_numpy_ufunc(pickler, obj): - log.info("Nu: %s" % obj) + _log_trace(pickler, "Nu: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - log.info("# Nu") + _log_trace(pickler, "# Nu") return # NOTE: the above 'save' performs like: # import copy_reg @@ -593,9 +588,9 @@ def save_numpy_ufunc(pickler, obj): if NumpyDType and numpydtype(obj): @register(type(obj)) def save_numpy_dtype(pickler, obj): - log.info("Dt: %s" % obj) + _log_trace(pickler, "Dt: %s", obj) pickler.save_reduce(_create_dtypemeta, (obj.type,), obj=obj) - log.info("# Dt") + _log_trace(pickler, "# Dt") return # NOTE: the above 'save' performs like: # import copy_reg @@ -606,21 +601,25 @@ def save_numpy_dtype(pickler, obj): if NumpyArrayType and ndarraysubclassinstance(obj): @register(type(obj)) def save_numpy_array(pickler, obj): - log.info("Nu: (%s, %s)" % (obj.shape,obj.dtype)) + _log_trace(pickler, "Nu: (%s, %s)", obj.shape, obj.dtype) npdict = getattr(obj, '__dict__', None) f, args, state = obj.__reduce__() pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj) - log.info("# Nu") + _log_trace(pickler, "# Nu") return # end hack if GENERATOR_FAIL and type(obj) == GeneratorType: msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType raise PicklingError(msg) else: - StockPickler.dump(self, obj) + _log_trace(self) + try: + StockPickler.dump(self, obj) + finally: + _log_trace(self, reset=True) return + dump.__doc__ = StockPickler.dump.__doc__ - pass class Unpickler(StockUnpickler): """python's Unpickler extended to interpreter sessions and more types""" @@ -1165,9 +1164,9 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO #@register(CodeType) #def save_code(pickler, obj): -# log.info("Co: %s" % obj) +# _log_trace(pickler, "Co: %s", obj) # pickler.save_reduce(_unmarshal, (marshal.dumps(obj),), obj=obj) -# log.info("# Co") +# _log_trace(pickler, "# Co") # return # The following function is based on 'save_codeobject' from 'cloudpickle' @@ -1176,7 +1175,7 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO # License: https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE @register(CodeType) def save_code(pickler, obj): - log.info("Co: %s" % obj) + _log_trace(pickler, "Co: %s", obj) if PY3: if hasattr(obj, "co_exceptiontable"): args = ( @@ -1214,42 +1213,42 @@ def save_code(pickler, obj): ) pickler.save_reduce(_create_code, args, obj=obj) - log.info("# Co") + _log_trace(pickler, "# Co") return @register(dict) def save_module_dict(pickler, obj): if is_dill(pickler, child=False) and obj == pickler._main.__dict__ and \ not (pickler._session and pickler._first_pass): - log.info("D1: " % (obj,)) + _log_trace(pickler, "Dkvi: <%s>", obj) mapping = obj.mapping | _dictproxy_helper_instance pickler.save_reduce(func, (mapping,), obj=obj) - log.info("# Dkvi") + _log_trace(pickler, "# Dkvi") return _save_dict_view return [ (funcname, save_dict_view_for_function(getattr(dicttype, funcname))) @@ -1274,21 +1273,21 @@ def _save_dict_view(pickler, obj): # License: https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE def save_dict_view(dicttype): def save_dict_keys(pickler, obj): - log.info("Dk: <%s>" % (obj,)) + _log_trace(pickler, "Dk: <%s>", obj) dict_constructor = _shims.Reduce(dicttype.fromkeys, (list(obj),)) pickler.save_reduce(dicttype.keys, (dict_constructor,), obj=obj) - log.info("# Dk") + _log_trace(pickler, "# Dk") def save_dict_values(pickler, obj): - log.info("Dv: <%s>" % (obj,)) + _log_trace(pickler, "Dv: <%s>", obj) dict_constructor = _shims.Reduce(dicttype, (enumerate(obj),)) pickler.save_reduce(dicttype.values, (dict_constructor,), obj=obj) - log.info("# Dv") + _log_trace(pickler, "# Dv") def save_dict_items(pickler, obj): - log.info("Di: <%s>" % (obj,)) + _log_trace(pickler, "Di: <%s>", obj) pickler.save_reduce(dicttype.items, (dicttype(obj),), obj=obj) - log.info("# Di") + _log_trace(pickler, "# Di") return ( ('keys', save_dict_keys), @@ -1311,61 +1310,61 @@ def save_dict_items(pickler, obj): @register(ClassType) def save_classobj(pickler, obj): #FIXME: enable pickler._byref if not _locate_function(obj, pickler): - log.info("C1: %s" % obj) + _log_trace(pickler, "C1: %s", obj) pickler.save_reduce(ClassType, (obj.__name__, obj.__bases__, obj.__dict__), obj=obj) #XXX: or obj.__dict__.copy()), obj=obj) ? - log.info("# C1") + _log_trace(pickler, "# C1") else: - log.info("C2: %s" % obj) + _log_trace(pickler, "C2: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - log.info("# C2") + _log_trace(pickler, "# C2") return @register(LockType) def save_lock(pickler, obj): - log.info("Lo: %s" % obj) + _log_trace(pickler, "Lo: %s", obj) pickler.save_reduce(_create_lock, (obj.locked(),), obj=obj) - log.info("# Lo") + _log_trace(pickler, "# Lo") return @register(RLockType) def save_rlock(pickler, obj): - log.info("RL: %s" % obj) + _log_trace(pickler, "RL: %s", obj) r = obj.__repr__() # don't use _release_save as it unlocks the lock count = int(r.split('count=')[1].split()[0].rstrip('>')) owner = int(r.split('owner=')[1].split()[0]) if PY3 else getattr(obj, '_RLock__owner') pickler.save_reduce(_create_rlock, (count,owner,), obj=obj) - log.info("# RL") + _log_trace(pickler, "# RL") return if not IS_PYPY2: #@register(SocketType) #FIXME: causes multiprocess test_pickling FAIL def save_socket(pickler, obj): - log.info("So: %s" % obj) + _log_trace(pickler, "So: %s", obj) pickler.save_reduce(*reduce_socket(obj)) - log.info("# So") + _log_trace(pickler, "# So") return if sys.hexversion <= 0x3050000: @register(ItemGetterType) def save_itemgetter(pickler, obj): - log.info("Ig: %s" % obj) + _log_trace(pickler, "Ig: %s", obj) helper = _itemgetter_helper() obj(helper) pickler.save_reduce(type(obj), tuple(helper.items), obj=obj) - log.info("# Ig") + _log_trace(pickler, "# Ig") return @register(AttrGetterType) def save_attrgetter(pickler, obj): - log.info("Ag: %s" % obj) + _log_trace(pickler, "Ag: %s", obj) attrs = [] helper = _attrgetter_helper(attrs) obj(helper) pickler.save_reduce(type(obj), tuple(attrs), obj=obj) - log.info("# Ag") + _log_trace(pickler, "# Ag") return def _save_file(pickler, obj, open_): @@ -1401,9 +1400,9 @@ def _save_file(pickler, obj, open_): @register(BufferedWriterType) @register(TextWrapperType) def save_file(pickler, obj): - log.info("Fi: %s" % obj) + _log_trace(pickler, "Fi: %s", obj) f = _save_file(pickler, obj, open) - log.info("# Fi") + _log_trace(pickler, "# Fi") return f if PyTextWrapperType: @@ -1412,9 +1411,9 @@ def save_file(pickler, obj): @register(PyBufferedWriterType) @register(PyTextWrapperType) def save_file(pickler, obj): - log.info("Fi: %s" % obj) + _log_trace(pickler, "Fi: %s", obj) f = _save_file(pickler, obj, _open) - log.info("# Fi") + _log_trace(pickler, "# Fi") return f # The following two functions are based on 'saveCStringIoInput' @@ -1424,42 +1423,42 @@ def save_file(pickler, obj): if InputType: @register(InputType) def save_stringi(pickler, obj): - log.info("Io: %s" % obj) + _log_trace(pickler, "Io: %s", obj) if obj.closed: value = ''; position = 0 else: value = obj.getvalue(); position = obj.tell() pickler.save_reduce(_create_stringi, (value, position, \ obj.closed), obj=obj) - log.info("# Io") + _log_trace(pickler, "# Io") return @register(OutputType) def save_stringo(pickler, obj): - log.info("Io: %s" % obj) + _log_trace(pickler, "Io: %s", obj) if obj.closed: value = ''; position = 0 else: value = obj.getvalue(); position = obj.tell() pickler.save_reduce(_create_stringo, (value, position, \ obj.closed), obj=obj) - log.info("# Io") + _log_trace(pickler, "# Io") return if 0x2050000 <= sys.hexversion < 0x3010000: @register(PartialType) def save_functor(pickler, obj): - log.info("Fu: %s" % obj) + _log_trace(pickler, "Fu: %s", obj) pickler.save_reduce(_create_ftype, (type(obj), obj.func, obj.args, obj.keywords), obj=obj) - log.info("# Fu") + _log_trace(pickler, "# Fu") return if LRUCacheType is not None: from functools import lru_cache @register(LRUCacheType) def save_lru_cache(pickler, obj): - log.info("LRU: %s" % obj) + _log_trace(pickler, "LRU: %s", obj) if OLD39: kwargs = obj.cache_info() args = (kwargs.maxsize,) @@ -1471,14 +1470,14 @@ def save_lru_cache(pickler, obj): else: wrapper = lru_cache pickler.save_reduce(wrapper, (obj.__wrapped__,), obj=obj) - log.info("# LRU") + _log_trace(pickler, "# LRU") return @register(SuperType) def save_super(pickler, obj): - log.info("Su: %s" % obj) + _log_trace(pickler, "Su: %s", obj) pickler.save_reduce(super, (obj.__thisclass__, obj.__self__), obj=obj) - log.info("# Su") + _log_trace(pickler, "# Su") return if OLDER or not PY3: @@ -1488,34 +1487,34 @@ def save_builtin_method(pickler, obj): if obj.__self__ is __builtin__: module = 'builtins' if PY3 else '__builtin__' _t = "B1" - log.info("%s: %s" % (_t, obj)) + _log_trace(pickler, "%s: %s", _t, obj) else: module = obj.__self__ _t = "B3" - log.info("%s: %s" % (_t, obj)) + _log_trace(pickler, "%s: %s", _t, obj) if is_dill(pickler, child=True): _recurse = pickler._recurse pickler._recurse = False pickler.save_reduce(_get_attr, (module, obj.__name__), obj=obj) if is_dill(pickler, child=True): pickler._recurse = _recurse - log.info("# %s" % _t) + _log_trace(pickler, "# %s", _t) else: - log.info("B2: %s" % obj) + _log_trace(pickler, "B2: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - log.info("# B2") + _log_trace(pickler, "# B2") return @register(MethodType) #FIXME: fails for 'hidden' or 'name-mangled' classes def save_instancemethod0(pickler, obj):# example: cStringIO.StringI - log.info("Me: %s" % obj) #XXX: obj.__dict__ handled elsewhere? + _log_trace(pickler, "Me: %s", obj) #XXX: obj.__dict__ handled elsewhere? if PY3: pickler.save_reduce(MethodType, (obj.__func__, obj.__self__), obj=obj) else: pickler.save_reduce(MethodType, (obj.im_func, obj.im_self, obj.im_class), obj=obj) - log.info("# Me") + _log_trace(pickler, "# Me") return if sys.hexversion >= 0x20500f0: @@ -1526,40 +1525,40 @@ def save_instancemethod0(pickler, obj):# example: cStringIO.StringI @register(WrapperDescriptorType) @register(ClassMethodDescriptorType) def save_wrapper_descriptor(pickler, obj): - log.info("Wr: %s" % obj) + _log_trace(pickler, "Wr: %s", obj) pickler.save_reduce(_getattr, (obj.__objclass__, obj.__name__, obj.__repr__()), obj=obj) - log.info("# Wr") + _log_trace(pickler, "# Wr") return else: @register(MemberDescriptorType) @register(GetSetDescriptorType) def save_wrapper_descriptor(pickler, obj): - log.info("Wr: %s" % obj) + _log_trace(pickler, "Wr: %s", obj) pickler.save_reduce(_getattr, (obj.__objclass__, obj.__name__, obj.__repr__()), obj=obj) - log.info("# Wr") + _log_trace(pickler, "# Wr") return @register(MethodWrapperType) def save_instancemethod(pickler, obj): - log.info("Mw: %s" % obj) + _log_trace(pickler, "Mw: %s", obj) if IS_PYPY2 and obj.__self__ is None and obj.im_class: # Can be a class method in PYPY2 if __self__ is none pickler.save_reduce(getattr, (obj.im_class, obj.__name__), obj=obj) return pickler.save_reduce(getattr, (obj.__self__, obj.__name__), obj=obj) - log.info("# Mw") + _log_trace(pickler, "# Mw") return elif not IS_PYPY: @register(MethodDescriptorType) @register(WrapperDescriptorType) def save_wrapper_descriptor(pickler, obj): - log.info("Wr: %s" % obj) + _log_trace(pickler, "Wr: %s", obj) pickler.save_reduce(_getattr, (obj.__objclass__, obj.__name__, obj.__repr__()), obj=obj) - log.info("# Wr") + _log_trace(pickler, "# Wr") return @register(CellType) @@ -1567,7 +1566,7 @@ def save_cell(pickler, obj): try: f = obj.cell_contents except: - log.info("Ce3: %s" % obj) + _log_trace(pickler, "Ce3: %s", obj) # _shims._CELL_EMPTY is defined in _shims.py to support PyPy 2.7. # It unpickles to a sentinel object _dill._CELL_EMPTY, also created in # _shims.py. This object is not present in Python 3 because the cell's @@ -1586,7 +1585,7 @@ def save_cell(pickler, obj): pickler.write(bytes('0', 'UTF-8')) else: pickler.write('0') - log.info("# Ce3") + _log_trace(pickler, "# Ce3") return if is_dill(pickler, child=True): if id(f) in pickler._postproc: @@ -1597,34 +1596,34 @@ def save_cell(pickler, obj): # value as late as possible to prevent cycle. postproc = next(iter(pickler._postproc.values()), None) if postproc is not None: - log.info("Ce2: %s" % obj) + _log_trace(pickler, "Ce2: %s", obj) # _CELL_REF is defined in _shims.py to support older versions of # dill. When breaking changes are made to dill, (_CELL_REF,) can # be replaced by () pickler.save_reduce(_create_cell, (_CELL_REF,), obj=obj) postproc.append((_shims._setattr, (obj, 'cell_contents', f))) - log.info("# Ce2") + _log_trace(pickler, "# Ce2") return - log.info("Ce1: %s" % obj) + _log_trace(pickler, "Ce1: %s", obj) pickler.save_reduce(_create_cell, (f,), obj=obj) - log.info("# Ce1") + _log_trace(pickler, "# Ce1") return if MAPPING_PROXY_TRICK: @register(DictProxyType) def save_dictproxy(pickler, obj): - log.info("Mp: %s" % obj) + _log_trace(pickler, "Mp: %s", obj) mapping = obj | _dictproxy_helper_instance pickler.save_reduce(DictProxyType, (mapping,), obj=obj) - log.info("# Mp") + _log_trace(pickler, "# Mp") return elif not IS_PYPY: if not OLD33: @register(DictProxyType) def save_dictproxy(pickler, obj): - log.info("Mp: %s" % obj) + _log_trace(pickler, "Mp: %s", obj) pickler.save_reduce(DictProxyType, (obj.copy(),), obj=obj) - log.info("# Mp") + _log_trace(pickler, "# Mp") return else: # The following function is based on 'saveDictProxy' from spickle @@ -1632,31 +1631,31 @@ def save_dictproxy(pickler, obj): # License: http://www.apache.org/licenses/LICENSE-2.0 @register(DictProxyType) def save_dictproxy(pickler, obj): - log.info("Dp: %s" % obj) + _log_trace(pickler, "Dp: %s", obj) attr = obj.get('__dict__') #pickler.save_reduce(_create_dictproxy, (attr,'nested'), obj=obj) if type(attr) == GetSetDescriptorType and attr.__name__ == "__dict__" \ and getattr(attr.__objclass__, "__dict__", None) == obj: pickler.save_reduce(getattr, (attr.__objclass__,"__dict__"),obj=obj) - log.info("# Dp") + _log_trace(pickler, "# Dp") return # all bad below... so throw ReferenceError or TypeError raise ReferenceError("%s does not reference a class __dict__" % obj) @register(SliceType) def save_slice(pickler, obj): - log.info("Sl: %s" % obj) + _log_trace(pickler, "Sl: %s", obj) pickler.save_reduce(slice, (obj.start, obj.stop, obj.step), obj=obj) - log.info("# Sl") + _log_trace(pickler, "# Sl") return @register(XRangeType) @register(EllipsisType) @register(NotImplementedType) def save_singleton(pickler, obj): - log.info("Si: %s" % obj) + _log_trace(pickler, "Si: %s", obj) pickler.save_reduce(_eval_repr, (obj.__repr__(),), obj=obj) - log.info("# Si") + _log_trace(pickler, "# Si") return def _proxy_helper(obj): # a dead proxy returns a reference to None @@ -1702,10 +1701,10 @@ def _locate_object(address, module=None): @register(ReferenceType) def save_weakref(pickler, obj): refobj = obj() - log.info("R1: %s" % obj) + _log_trace(pickler, "R1: %s", obj) #refobj = ctypes.pythonapi.PyWeakref_GetObject(obj) # dead returns "None" pickler.save_reduce(_create_weakref, (refobj,), obj=obj) - log.info("# R1") + _log_trace(pickler, "# R1") return @register(ProxyType) @@ -1714,15 +1713,15 @@ def save_weakproxy(pickler, obj): refobj = _locate_object(_proxy_helper(obj)) try: _t = "R2" - log.info("%s: %s" % (_t, obj)) + _log_trace(pickler, "%s: %s", _t, obj) except ReferenceError: _t = "R3" - log.info("%s: %s" % (_t, sys.exc_info()[1])) + _log_trace(pickler, "%s: %s", _t, sys.exc_info()[1]) #callable = bool(getattr(refobj, '__call__', None)) if type(obj) is CallableProxyType: callable = True else: callable = False pickler.save_reduce(_create_weakproxy, (refobj, callable), obj=obj) - log.info("# %s" % _t) + _log_trace(pickler, "# %s", _t) return def _is_builtin_module(module): @@ -1744,72 +1743,72 @@ def save_module(pickler, obj): except RuntimeError: # not memorised module, probably part of dill pass else: - log.info("M2: %s with diff" % obj) - log.info("Diff: %s", changed.keys()) + _log_trace(pickler, "M2: %s with diff", obj) + _log_trace(pickler, "Diff: %s", changed.keys()) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=changed) - log.info("# M2") + _log_trace(pickler, "# M2") return - log.info("M1: %s" % obj) + _log_trace(pickler, "M1: %s", obj) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj) - log.info("# M1") + _log_trace(pickler, "# M1") else: builtin_mod = _is_builtin_module(obj) if obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod or \ is_dill(pickler, child=True) and obj is pickler._main: - log.info("M1: %s" % obj) + _log_trace(pickler, "M1: %s", obj) _main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy? [_main_dict.pop(item, None) for item in singletontypes + ["__builtins__", "__loader__"]] pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=_main_dict) - log.info("# M1") + _log_trace(pickler, "# M1") elif PY3 and obj.__name__ == "dill._dill": - log.info("M2: %s" % obj) + _log_trace(pickler, "M2: %s", obj) pickler.save_global(obj, name="_dill") - log.info("# M2") + _log_trace(pickler, "# M2") else: - log.info("M2: %s" % obj) + _log_trace(pickler, "M2: %s", obj) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj) - log.info("# M2") + _log_trace(pickler, "# M2") return return @register(TypeType) def save_type(pickler, obj, postproc_list=None): if obj in _typemap: - log.info("T1: %s" % obj) + _log_trace(pickler, "T1: %s", obj) pickler.save_reduce(_load_type, (_typemap[obj],), obj=obj) - log.info("# T1") + _log_trace(pickler, "# T1") elif obj.__bases__ == (tuple,) and all([hasattr(obj, attr) for attr in ('_fields','_asdict','_make','_replace')]): # special case: namedtuples - log.info("T6: %s" % obj) + _log_trace(pickler, "T6: %s", obj) if OLD37 or (not obj._field_defaults): pickler.save_reduce(_create_namedtuple, (obj.__name__, obj._fields, obj.__module__), obj=obj) else: defaults = [obj._field_defaults[field] for field in obj._fields] pickler.save_reduce(_create_namedtuple, (obj.__name__, obj._fields, obj.__module__, defaults), obj=obj) - log.info("# T6") + _log_trace(pickler, "# T6") return # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): - log.info("T7: %s" % obj) + _log_trace(pickler, "T7: %s", obj) #XXX: pickler.save_reduce(type, (None,), obj=obj) if PY3: pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8')) else: pickler.write('c__builtin__\nNoneType\n') - log.info("# T7") + _log_trace(pickler, "# T7") elif obj is NotImplementedType: - log.info("T7: %s" % obj) + _log_trace(pickler, "T7: %s", obj) pickler.save_reduce(type, (NotImplemented,), obj=obj) - log.info("# T7") + _log_trace(pickler, "# T7") elif obj is EllipsisType: - log.info("T7: %s" % obj) + _log_trace(pickler, "T7: %s", obj) pickler.save_reduce(type, (Ellipsis,), obj=obj) - log.info("# T7") + _log_trace(pickler, "# T7") else: obj_name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) @@ -1820,11 +1819,11 @@ def save_type(pickler, obj, postproc_list=None): if issubclass(type(obj), type): # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' - log.info("%s: %s" % (_t, obj)) + _log_trace(pickler, "%s: %s", _t, obj) _dict = _dict_from_dictproxy(obj.__dict__) else: _t = 'T3' - log.info("%s: %s" % (_t, obj)) + _log_trace(pickler, "%s: %s", _t, obj) _dict = obj.__dict__ #print (_dict) #print ("%s\n%s" % (type(obj), obj.__name__)) @@ -1838,9 +1837,9 @@ def save_type(pickler, obj, postproc_list=None): _save_with_postproc(pickler, (_create_type, ( type(obj), obj.__name__, obj.__bases__, _dict )), obj=obj, postproc_list=postproc_list) - log.info("# %s" % _t) + _log_trace(pickler, "# %s", _t) else: - log.info("T4: %s" % obj) + _log_trace(pickler, "T4: %s", obj) if incorrectly_named: warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning) if obj_recursive: @@ -1849,7 +1848,7 @@ def save_type(pickler, obj, postproc_list=None): #print ("%s\n%s" % (type(obj), obj.__name__)) #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) StockPickler.save_global(pickler, obj, name=obj_name) - log.info("# T4") + _log_trace(pickler, "# T4") return # Error in PyPy 2.7 when adding ABC support @@ -1860,15 +1859,15 @@ def save_frame(pickler, obj): @register(property) def save_property(pickler, obj): - log.info("Pr: %s" % obj) + _log_trace(pickler, "Pr: %s", obj) pickler.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj) - log.info("# Pr") + _log_trace(pickler, "# Pr") @register(staticmethod) @register(classmethod) def save_classmethod(pickler, obj): - log.info("Cm: %s" % obj) + _log_trace(pickler, "Cm: %s", obj) im_func = '__func__' if PY3 else 'im_func' try: orig_func = getattr(obj, im_func) @@ -1889,12 +1888,12 @@ def save_classmethod(pickler, obj): # state = None pickler.save_reduce(type(obj), (orig_func,), obj=obj) - log.info("# Cm") + _log_trace(pickler, "# Cm") @register(FunctionType) def save_function(pickler, obj): if not _locate_function(obj, pickler): - log.info("F1: %s" % obj) + _log_trace(pickler, "F1: %s", obj) _recurse = getattr(pickler, '_recurse', None) _byref = getattr(pickler, '_byref', None) _postproc = getattr(pickler, '_postproc', None) @@ -1997,12 +1996,12 @@ def save_function(pickler, obj): else: pickler.write('0') - log.info("# F1") + _log_trace(pickler, "# F1") else: - log.info("F2: %s" % obj) + _log_trace(pickler, "F2: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - log.info("# F2") + _log_trace(pickler, "# F2") return # quick sanity checking @@ -2098,10 +2097,16 @@ def _extend(): try: StockPickler.dispatch[t] = func except: #TypeError, PicklingError, UnpicklingError - log.info("skip: %s" % t) + _log_trace(pickler, "skip: %s", t) else: pass return +def _log_trace(pickler, *args, **kwargs): + if is_dill(pickler, child=False): + logger.log_trace(pickler, *args, **kwargs) + else: + logger.info(*args, **kwargs) + del diff, _use_diff, use_diff # EOF diff --git a/dill/detect.py b/dill/detect.py index 41575205..26d5b219 100644 --- a/dill/detect.py +++ b/dill/detect.py @@ -13,8 +13,8 @@ from inspect import ismethod, isfunction, istraceback, isframe, iscode from .pointers import parent, reference, at, parents, children -from ._dill import _trace as trace from ._dill import PY3 +from .logger import trace __all__ = ['baditems','badobjects','badtypes','code','errors','freevars', 'getmodule','globalvars','nestedcode','nestedglobals','outermost', diff --git a/dill/logger.py b/dill/logger.py new file mode 100644 index 00000000..c452348e --- /dev/null +++ b/dill/logger.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# +# Author: Leonardo Gama (@leogama) +# Copyright (c) 2022 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +r""" +Logging utilities for dill. + +Python's logging functionality can be conceptually divided into five steps: + 0. Check logging level -> abort if call level is greater than logger level + 1. Gather information -> construct a LogRecord from passed arguments and context + 2. Filter (optional) -> discard message if the record matches a filter + 3. Format -> format message with args, then format output string with message plus record + 4. Handle -> write the formatted string to output as defined in the handler + + +Basic calling sequence +====================== + +dill.logging.logger.log -> # or logger.info, etc. + Logger.log -> \ + Logger._log -> }- accept 'extra' parameter for custom record entries + Logger.makeRecord -> / + LogRecord.__init__ + Logger.handle -> + Logger.callHandlers -> + Handler.handle -> + Filterer.filter -> + Filter.filter + StreamHandler.emit -> + Handler.format -> + Formatter.format -> + LogRecord.getMessage # does: record.message = msg % args + Formatter.formatMessage -> + PercentStyle.format # does: self._fmt % record.__dict__ + +NOTE: All methods from the second line on are from logging.__init__.py + + +Logger customizations +===================== + +Step 1 - Information +-------------------- +We use a LoggerAdapter subclass to wrap the module's logger, as the logging API +doesn't allow setting it directly with a custom Logger subclass. The added +'log_trace()' method receives a pickle instance as the first argument and +creates extra values to be added in the LogRecord from it, then calls 'log()'. + +Step 3 - Formatting +------------------- +We use a Formatter subclass to add prefix and suffix strings to the log message +in trace mode (an also provide empty defaults for normal logs). The user may +substitute the formatter to customize the extra information display. +""" + +__all__ = ['INFO_DETAIL', 'adapter', 'logger', 'trace'] + +import logging, math + +INFO_DETAIL = (logging.INFO + logging.DEBUG) // 2 + + +class TraceAdapter(logging.LoggerAdapter): + """tracks object graph depth and calculates pickled object size""" + def __init__(self, logger): + self.logger = logger + def process(self, msg, kwargs): + # A no-op override, as we don't have self.extra + return msg, kwargs + def log_trace(self, pickler, msg=None, *args, **kwargs): + """log_trace(self, pickler, msg=None, *args, reset=False, **kwargs)""" + reset = kwargs.pop('reset', False) + if msg is None: + # Initialization and clean-up. + if reset: + pickler._trace_depth = 0 + pickler._size_stack = None + elif self.isEnabledFor(INFO_DETAIL): + pickler._trace_depth = 1 + pickler._size_stack = [] + return + + elif getattr(pickler, '_trace_depth', None): + extra = kwargs.get('extra', {}) + pushed = msg.startswith('#') + size = None + try: + size = pickler._file.tell() + size += pickler.framer.current_frame.tell() + except AttributeError: + pass + if size: + if not pushed: + pickler._size_stack.append(size) + else: + size -= pickler._size_stack.pop() + extra['size'] = size + if pushed: pickler._trace_depth -= 1 + extra['depth'] = pickler._trace_depth + kwargs['extra'] = extra + self.log(INFO_DETAIL, msg, *args, **kwargs) + if not pushed: pickler._trace_depth += 1 + + else: + self.info(msg, *args, **kwargs) + + +class TraceFormatter(logging.Formatter): + """generates message prefix and suffix from record""" + def format(self, record): + fields = {'prefix': "", 'suffix': ""} + if hasattr(record, 'depth'): + fields['prefix'] = record.depth*">" + " " + if hasattr(record, 'size'): + # Show object size in human-redable form. + power = int(math.log(record.size, 2)) // 10 + size = record.size >> power*10 + fields['suffix'] = " (%d %sB)" % (size, "KMGTP"[power] + "i" if power else "") + record.__dict__.update(fields) + return super(TraceFormatter, self).format(record) + + +logger = logging.getLogger('dill') +adapter = TraceAdapter(logger) + +handler = logging.StreamHandler() +logger.addHandler(handler) + +formatter = TraceFormatter("%(prefix)s%(message)s%(suffix)s") +handler.setFormatter(formatter) + + +def trace(boolean, detail=False): + """print a trace through the stack when pickling; useful for debugging""" + if boolean: + logger.setLevel(INFO_DETAIL if detail else logging.INFO) + else: + logger.setLevel(logging.WARNING) From 1885df12fbef56ef7ae09e45486256f5e7a9f56b Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Mon, 9 May 2022 15:42:48 -0300 Subject: [PATCH 02/14] Tests: logging code coverage --- tests/test_logger.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/test_logger.py diff --git a/tests/test_logger.py b/tests/test_logger.py new file mode 100644 index 00000000..4541681a --- /dev/null +++ b/tests/test_logger.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +# Author: Leonardo Gama (@leogama) +# Copyright (c) 2022 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE + +import dill, logging, re +from dill.logger import formatter, handler, logger + +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +def test_logging(regex=None): + buffer = StringIO() + handler = logging.StreamHandler(buffer) + handler.setFormatter(formatter) + logger.addHandler(handler) + try: + dill.dumps({'a': (1, 2), 'b': object(), 'big': list(range(10000))}) + if regex is None: + assert buffer.getvalue() == "" + else: + regex = re.compile(regex + '$') + for line in buffer.getvalue().splitlines(): + assert regex.match(line) + finally: + logger.removeHandler(handler) + buffer.close() + +if __name__ == '__main__': + logger.removeHandler(handler) + test_logging() + dill.detect.trace(True) + test_logging(r'(# )?\w.*[^)]') + dill.detect.trace(True, detail=True) + test_logging(r'>+ (# )?\w.* \(\d+ (\wi)?B\)') + dill.detect.trace(False) + test_logging() From 5452dd76fe5d0a059b5169252e5b9fb563b6fd7b Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Mon, 9 May 2022 16:09:31 -0300 Subject: [PATCH 03/14] logger: just show size after writing object --- dill/logger.py | 2 +- tests/test_logger.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dill/logger.py b/dill/logger.py index c452348e..bc188905 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -96,7 +96,7 @@ def log_trace(self, pickler, msg=None, *args, **kwargs): pickler._size_stack.append(size) else: size -= pickler._size_stack.pop() - extra['size'] = size + extra['size'] = size if pushed: pickler._trace_depth -= 1 extra['depth'] = pickler._trace_depth kwargs['extra'] = extra diff --git a/tests/test_logger.py b/tests/test_logger.py index 4541681a..78daa081 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -36,6 +36,9 @@ def test_logging(regex=None): dill.detect.trace(True) test_logging(r'(# )?\w.*[^)]') dill.detect.trace(True, detail=True) - test_logging(r'>+ (# )?\w.* \(\d+ (\wi)?B\)') + test_logging(r'>+ ' # prefix indicating depth + r'(\w.*[^)]' # begin pickling object + r'|# \w.* \(\d+ (\wi)?B\))' # object written (with size) + ) dill.detect.trace(False) test_logging() From df05fc24bfb1409dfa6f69a5b6e9f28455427b7e Mon Sep 17 00:00:00 2001 From: anivegesana Date: Mon, 9 May 2022 19:55:17 -0700 Subject: [PATCH 04/14] Box drawing formatting --- dill/_dill.py | 2 +- dill/logger.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index a0bd37f7..afa3023f 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -21,7 +21,7 @@ 'UnpicklingError','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE', 'PickleError','PickleWarning','PicklingWarning','UnpicklingWarning'] -from .logger import adapter as logger +from .logger import adapter as logger, trace as _trace import warnings import os diff --git a/dill/logger.py b/dill/logger.py index bc188905..1140a11a 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -112,7 +112,14 @@ class TraceFormatter(logging.Formatter): def format(self, record): fields = {'prefix': "", 'suffix': ""} if hasattr(record, 'depth'): - fields['prefix'] = record.depth*">" + " " + if record.depth <= 0: + fields['prefix'] = "" # ??? + elif record.msg.startswith("#"): + fields['prefix'] = (record.depth-1)*"│" + "└ " + elif record.depth == 1: + fields['prefix'] = "┬ " + else: + fields['prefix'] = (record.depth-2)*"│" + "├┬ " if hasattr(record, 'size'): # Show object size in human-redable form. power = int(math.log(record.size, 2)) // 10 From c141398be7a10f5d257008509de72809a89ef394 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Tue, 10 May 2022 04:03:52 -0300 Subject: [PATCH 05/14] fallback for non-UTF-8 terminals --- dill/logger.py | 26 ++++++++++++++++---------- tests/test_logger.py | 5 ++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/dill/logger.py b/dill/logger.py index 1140a11a..4ee53c50 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # # Author: Leonardo Gama (@leogama) # Copyright (c) 2022 The Uncertainty Quantification Foundation. @@ -57,10 +58,12 @@ __all__ = ['INFO_DETAIL', 'adapter', 'logger', 'trace'] -import logging, math +import locale, logging, math INFO_DETAIL = (logging.INFO + logging.DEBUG) // 2 +TRANS_TABLE = {ord(k): ord(v) for k, v in zip(u"│├┬└", "||+`")} + class TraceAdapter(logging.LoggerAdapter): """tracks object graph depth and calculates pickled object size""" @@ -106,25 +109,28 @@ def log_trace(self, pickler, msg=None, *args, **kwargs): else: self.info(msg, *args, **kwargs) - class TraceFormatter(logging.Formatter): """generates message prefix and suffix from record""" + def __init__(self, *args, **kwargs): + super(TraceFormatter, self).__init__(*args, **kwargs) + self.is_utf8 = locale.getpreferredencoding() == 'UTF-8' def format(self, record): fields = {'prefix': "", 'suffix': ""} - if hasattr(record, 'depth'): - if record.depth <= 0: - fields['prefix'] = "" # ??? - elif record.msg.startswith("#"): - fields['prefix'] = (record.depth-1)*"│" + "└ " + if getattr(record, 'depth', 0) > 0: + if record.msg.startswith("#"): + prefix = (record.depth-1)*u"│" + u"└ " elif record.depth == 1: - fields['prefix'] = "┬ " + prefix = u"┬ " else: - fields['prefix'] = (record.depth-2)*"│" + "├┬ " + prefix = (record.depth-2)*u"│" + u"├┬ " + if not self.is_utf8: + prefix = prefix[:-1].translate(TRANS_TABLE) + "- " + fields['prefix'] = prefix if hasattr(record, 'size'): # Show object size in human-redable form. power = int(math.log(record.size, 2)) // 10 size = record.size >> power*10 - fields['suffix'] = " (%d %sB)" % (size, "KMGTP"[power] + "i" if power else "") + fields['suffix'] = " [%d %sB]" % (size, "KMGTP"[power] + "i" if power else "") record.__dict__.update(fields) return super(TraceFormatter, self).format(record) diff --git a/tests/test_logger.py b/tests/test_logger.py index 78daa081..b468cb50 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -36,9 +36,8 @@ def test_logging(regex=None): dill.detect.trace(True) test_logging(r'(# )?\w.*[^)]') dill.detect.trace(True, detail=True) - test_logging(r'>+ ' # prefix indicating depth - r'(\w.*[^)]' # begin pickling object - r'|# \w.* \(\d+ (\wi)?B\))' # object written (with size) + test_logging(r'(\S*┬ \w.*[^)]' # begin pickling object + r'|│*└ # \w.* \[\d+ (\wi)?B])' # object written (with size) ) dill.detect.trace(False) test_logging() From e757769bd42ec6824cc5efc18cbbea4f2315f9e1 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Wed, 22 Jun 2022 11:30:17 -0300 Subject: [PATCH 06/14] review: corrections and improvements --- dill/_dill.py | 267 +++++++++++++++++++++---------------------- dill/logger.py | 265 ++++++++++++++++++++++++------------------ tests/test_logger.py | 9 +- 3 files changed, 286 insertions(+), 255 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index afa3023f..e0f02a61 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -21,7 +21,7 @@ 'UnpicklingError','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE', 'PickleError','PickleWarning','PicklingWarning','UnpicklingWarning'] -from .logger import adapter as logger, trace as _trace +from .logger import adapter as logger import warnings import os @@ -574,10 +574,10 @@ def dump(self, obj): #NOTE: if settings change, need to update attributes if NumpyUfuncType and numpyufunc(obj): @register(type(obj)) def save_numpy_ufunc(pickler, obj): - _log_trace(pickler, "Nu: %s", obj) + logger.trace(pickler, "Nu: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - _log_trace(pickler, "# Nu") + logger.trace(pickler, "# Nu") return # NOTE: the above 'save' performs like: # import copy_reg @@ -588,9 +588,9 @@ def save_numpy_ufunc(pickler, obj): if NumpyDType and numpydtype(obj): @register(type(obj)) def save_numpy_dtype(pickler, obj): - _log_trace(pickler, "Dt: %s", obj) + logger.trace(pickler, "Dt: %s", obj) pickler.save_reduce(_create_dtypemeta, (obj.type,), obj=obj) - _log_trace(pickler, "# Dt") + logger.trace(pickler, "# Dt") return # NOTE: the above 'save' performs like: # import copy_reg @@ -601,23 +601,18 @@ def save_numpy_dtype(pickler, obj): if NumpyArrayType and ndarraysubclassinstance(obj): @register(type(obj)) def save_numpy_array(pickler, obj): - _log_trace(pickler, "Nu: (%s, %s)", obj.shape, obj.dtype) + logger.trace(pickler, "Nu: (%s, %s)", obj.shape, obj.dtype) npdict = getattr(obj, '__dict__', None) f, args, state = obj.__reduce__() pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj) - _log_trace(pickler, "# Nu") + logger.trace(pickler, "# Nu") return # end hack if GENERATOR_FAIL and type(obj) == GeneratorType: msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType raise PicklingError(msg) - else: - _log_trace(self) - try: - StockPickler.dump(self, obj) - finally: - _log_trace(self, reset=True) - return + logger.trace_setup(self) + StockPickler.dump(self, obj) dump.__doc__ = StockPickler.dump.__doc__ @@ -1164,9 +1159,9 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO #@register(CodeType) #def save_code(pickler, obj): -# _log_trace(pickler, "Co: %s", obj) +# logger.trace(pickler, "Co: %s", obj) # pickler.save_reduce(_unmarshal, (marshal.dumps(obj),), obj=obj) -# _log_trace(pickler, "# Co") +# logger.trace(pickler, "# Co") # return # The following function is based on 'save_codeobject' from 'cloudpickle' @@ -1175,7 +1170,7 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO # License: https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE @register(CodeType) def save_code(pickler, obj): - _log_trace(pickler, "Co: %s", obj) + logger.trace(pickler, "Co: %s", obj) if PY3: if hasattr(obj, "co_exceptiontable"): args = ( @@ -1213,42 +1208,42 @@ def save_code(pickler, obj): ) pickler.save_reduce(_create_code, args, obj=obj) - _log_trace(pickler, "# Co") + logger.trace(pickler, "# Co") return @register(dict) def save_module_dict(pickler, obj): if is_dill(pickler, child=False) and obj == pickler._main.__dict__ and \ not (pickler._session and pickler._first_pass): - _log_trace(pickler, "D1: ", obj) + logger.trace(pickler, "Dkvi: <%s>", obj) mapping = obj.mapping | _dictproxy_helper_instance pickler.save_reduce(func, (mapping,), obj=obj) - _log_trace(pickler, "# Dkvi") + logger.trace(pickler, "# Dkvi") return _save_dict_view return [ (funcname, save_dict_view_for_function(getattr(dicttype, funcname))) @@ -1273,21 +1268,21 @@ def _save_dict_view(pickler, obj): # License: https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE def save_dict_view(dicttype): def save_dict_keys(pickler, obj): - _log_trace(pickler, "Dk: <%s>", obj) + logger.trace(pickler, "Dk: <%s>", obj) dict_constructor = _shims.Reduce(dicttype.fromkeys, (list(obj),)) pickler.save_reduce(dicttype.keys, (dict_constructor,), obj=obj) - _log_trace(pickler, "# Dk") + logger.trace(pickler, "# Dk") def save_dict_values(pickler, obj): - _log_trace(pickler, "Dv: <%s>", obj) + logger.trace(pickler, "Dv: <%s>", obj) dict_constructor = _shims.Reduce(dicttype, (enumerate(obj),)) pickler.save_reduce(dicttype.values, (dict_constructor,), obj=obj) - _log_trace(pickler, "# Dv") + logger.trace(pickler, "# Dv") def save_dict_items(pickler, obj): - _log_trace(pickler, "Di: <%s>", obj) + logger.trace(pickler, "Di: <%s>", obj) pickler.save_reduce(dicttype.items, (dicttype(obj),), obj=obj) - _log_trace(pickler, "# Di") + logger.trace(pickler, "# Di") return ( ('keys', save_dict_keys), @@ -1310,61 +1305,61 @@ def save_dict_items(pickler, obj): @register(ClassType) def save_classobj(pickler, obj): #FIXME: enable pickler._byref if not _locate_function(obj, pickler): - _log_trace(pickler, "C1: %s", obj) + logger.trace(pickler, "C1: %s", obj) pickler.save_reduce(ClassType, (obj.__name__, obj.__bases__, obj.__dict__), obj=obj) #XXX: or obj.__dict__.copy()), obj=obj) ? - _log_trace(pickler, "# C1") + logger.trace(pickler, "# C1") else: - _log_trace(pickler, "C2: %s", obj) + logger.trace(pickler, "C2: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - _log_trace(pickler, "# C2") + logger.trace(pickler, "# C2") return @register(LockType) def save_lock(pickler, obj): - _log_trace(pickler, "Lo: %s", obj) + logger.trace(pickler, "Lo: %s", obj) pickler.save_reduce(_create_lock, (obj.locked(),), obj=obj) - _log_trace(pickler, "# Lo") + logger.trace(pickler, "# Lo") return @register(RLockType) def save_rlock(pickler, obj): - _log_trace(pickler, "RL: %s", obj) + logger.trace(pickler, "RL: %s", obj) r = obj.__repr__() # don't use _release_save as it unlocks the lock count = int(r.split('count=')[1].split()[0].rstrip('>')) owner = int(r.split('owner=')[1].split()[0]) if PY3 else getattr(obj, '_RLock__owner') pickler.save_reduce(_create_rlock, (count,owner,), obj=obj) - _log_trace(pickler, "# RL") + logger.trace(pickler, "# RL") return if not IS_PYPY2: #@register(SocketType) #FIXME: causes multiprocess test_pickling FAIL def save_socket(pickler, obj): - _log_trace(pickler, "So: %s", obj) + logger.trace(pickler, "So: %s", obj) pickler.save_reduce(*reduce_socket(obj)) - _log_trace(pickler, "# So") + logger.trace(pickler, "# So") return if sys.hexversion <= 0x3050000: @register(ItemGetterType) def save_itemgetter(pickler, obj): - _log_trace(pickler, "Ig: %s", obj) + logger.trace(pickler, "Ig: %s", obj) helper = _itemgetter_helper() obj(helper) pickler.save_reduce(type(obj), tuple(helper.items), obj=obj) - _log_trace(pickler, "# Ig") + logger.trace(pickler, "# Ig") return @register(AttrGetterType) def save_attrgetter(pickler, obj): - _log_trace(pickler, "Ag: %s", obj) + logger.trace(pickler, "Ag: %s", obj) attrs = [] helper = _attrgetter_helper(attrs) obj(helper) pickler.save_reduce(type(obj), tuple(attrs), obj=obj) - _log_trace(pickler, "# Ag") + logger.trace(pickler, "# Ag") return def _save_file(pickler, obj, open_): @@ -1400,9 +1395,9 @@ def _save_file(pickler, obj, open_): @register(BufferedWriterType) @register(TextWrapperType) def save_file(pickler, obj): - _log_trace(pickler, "Fi: %s", obj) + logger.trace(pickler, "Fi: %s", obj) f = _save_file(pickler, obj, open) - _log_trace(pickler, "# Fi") + logger.trace(pickler, "# Fi") return f if PyTextWrapperType: @@ -1411,9 +1406,9 @@ def save_file(pickler, obj): @register(PyBufferedWriterType) @register(PyTextWrapperType) def save_file(pickler, obj): - _log_trace(pickler, "Fi: %s", obj) + logger.trace(pickler, "Fi: %s", obj) f = _save_file(pickler, obj, _open) - _log_trace(pickler, "# Fi") + logger.trace(pickler, "# Fi") return f # The following two functions are based on 'saveCStringIoInput' @@ -1423,42 +1418,42 @@ def save_file(pickler, obj): if InputType: @register(InputType) def save_stringi(pickler, obj): - _log_trace(pickler, "Io: %s", obj) + logger.trace(pickler, "Io: %s", obj) if obj.closed: value = ''; position = 0 else: value = obj.getvalue(); position = obj.tell() pickler.save_reduce(_create_stringi, (value, position, \ obj.closed), obj=obj) - _log_trace(pickler, "# Io") + logger.trace(pickler, "# Io") return @register(OutputType) def save_stringo(pickler, obj): - _log_trace(pickler, "Io: %s", obj) + logger.trace(pickler, "Io: %s", obj) if obj.closed: value = ''; position = 0 else: value = obj.getvalue(); position = obj.tell() pickler.save_reduce(_create_stringo, (value, position, \ obj.closed), obj=obj) - _log_trace(pickler, "# Io") + logger.trace(pickler, "# Io") return if 0x2050000 <= sys.hexversion < 0x3010000: @register(PartialType) def save_functor(pickler, obj): - _log_trace(pickler, "Fu: %s", obj) + logger.trace(pickler, "Fu: %s", obj) pickler.save_reduce(_create_ftype, (type(obj), obj.func, obj.args, obj.keywords), obj=obj) - _log_trace(pickler, "# Fu") + logger.trace(pickler, "# Fu") return if LRUCacheType is not None: from functools import lru_cache @register(LRUCacheType) def save_lru_cache(pickler, obj): - _log_trace(pickler, "LRU: %s", obj) + logger.trace(pickler, "LRU: %s", obj) if OLD39: kwargs = obj.cache_info() args = (kwargs.maxsize,) @@ -1470,14 +1465,14 @@ def save_lru_cache(pickler, obj): else: wrapper = lru_cache pickler.save_reduce(wrapper, (obj.__wrapped__,), obj=obj) - _log_trace(pickler, "# LRU") + logger.trace(pickler, "# LRU") return @register(SuperType) def save_super(pickler, obj): - _log_trace(pickler, "Su: %s", obj) + logger.trace(pickler, "Su: %s", obj) pickler.save_reduce(super, (obj.__thisclass__, obj.__self__), obj=obj) - _log_trace(pickler, "# Su") + logger.trace(pickler, "# Su") return if OLDER or not PY3: @@ -1487,34 +1482,34 @@ def save_builtin_method(pickler, obj): if obj.__self__ is __builtin__: module = 'builtins' if PY3 else '__builtin__' _t = "B1" - _log_trace(pickler, "%s: %s", _t, obj) + logger.trace(pickler, "%s: %s", _t, obj) else: module = obj.__self__ _t = "B3" - _log_trace(pickler, "%s: %s", _t, obj) + logger.trace(pickler, "%s: %s", _t, obj) if is_dill(pickler, child=True): _recurse = pickler._recurse pickler._recurse = False pickler.save_reduce(_get_attr, (module, obj.__name__), obj=obj) if is_dill(pickler, child=True): pickler._recurse = _recurse - _log_trace(pickler, "# %s", _t) + logger.trace(pickler, "# %s", _t) else: - _log_trace(pickler, "B2: %s", obj) + logger.trace(pickler, "B2: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - _log_trace(pickler, "# B2") + logger.trace(pickler, "# B2") return @register(MethodType) #FIXME: fails for 'hidden' or 'name-mangled' classes def save_instancemethod0(pickler, obj):# example: cStringIO.StringI - _log_trace(pickler, "Me: %s", obj) #XXX: obj.__dict__ handled elsewhere? + logger.trace(pickler, "Me: %s", obj) #XXX: obj.__dict__ handled elsewhere? if PY3: pickler.save_reduce(MethodType, (obj.__func__, obj.__self__), obj=obj) else: pickler.save_reduce(MethodType, (obj.im_func, obj.im_self, obj.im_class), obj=obj) - _log_trace(pickler, "# Me") + logger.trace(pickler, "# Me") return if sys.hexversion >= 0x20500f0: @@ -1525,40 +1520,40 @@ def save_instancemethod0(pickler, obj):# example: cStringIO.StringI @register(WrapperDescriptorType) @register(ClassMethodDescriptorType) def save_wrapper_descriptor(pickler, obj): - _log_trace(pickler, "Wr: %s", obj) + logger.trace(pickler, "Wr: %s", obj) pickler.save_reduce(_getattr, (obj.__objclass__, obj.__name__, obj.__repr__()), obj=obj) - _log_trace(pickler, "# Wr") + logger.trace(pickler, "# Wr") return else: @register(MemberDescriptorType) @register(GetSetDescriptorType) def save_wrapper_descriptor(pickler, obj): - _log_trace(pickler, "Wr: %s", obj) + logger.trace(pickler, "Wr: %s", obj) pickler.save_reduce(_getattr, (obj.__objclass__, obj.__name__, obj.__repr__()), obj=obj) - _log_trace(pickler, "# Wr") + logger.trace(pickler, "# Wr") return @register(MethodWrapperType) def save_instancemethod(pickler, obj): - _log_trace(pickler, "Mw: %s", obj) + logger.trace(pickler, "Mw: %s", obj) if IS_PYPY2 and obj.__self__ is None and obj.im_class: # Can be a class method in PYPY2 if __self__ is none pickler.save_reduce(getattr, (obj.im_class, obj.__name__), obj=obj) return pickler.save_reduce(getattr, (obj.__self__, obj.__name__), obj=obj) - _log_trace(pickler, "# Mw") + logger.trace(pickler, "# Mw") return elif not IS_PYPY: @register(MethodDescriptorType) @register(WrapperDescriptorType) def save_wrapper_descriptor(pickler, obj): - _log_trace(pickler, "Wr: %s", obj) + logger.trace(pickler, "Wr: %s", obj) pickler.save_reduce(_getattr, (obj.__objclass__, obj.__name__, obj.__repr__()), obj=obj) - _log_trace(pickler, "# Wr") + logger.trace(pickler, "# Wr") return @register(CellType) @@ -1566,7 +1561,7 @@ def save_cell(pickler, obj): try: f = obj.cell_contents except: - _log_trace(pickler, "Ce3: %s", obj) + logger.trace(pickler, "Ce3: %s", obj) # _shims._CELL_EMPTY is defined in _shims.py to support PyPy 2.7. # It unpickles to a sentinel object _dill._CELL_EMPTY, also created in # _shims.py. This object is not present in Python 3 because the cell's @@ -1585,7 +1580,7 @@ def save_cell(pickler, obj): pickler.write(bytes('0', 'UTF-8')) else: pickler.write('0') - _log_trace(pickler, "# Ce3") + logger.trace(pickler, "# Ce3") return if is_dill(pickler, child=True): if id(f) in pickler._postproc: @@ -1596,34 +1591,34 @@ def save_cell(pickler, obj): # value as late as possible to prevent cycle. postproc = next(iter(pickler._postproc.values()), None) if postproc is not None: - _log_trace(pickler, "Ce2: %s", obj) + logger.trace(pickler, "Ce2: %s", obj) # _CELL_REF is defined in _shims.py to support older versions of # dill. When breaking changes are made to dill, (_CELL_REF,) can # be replaced by () pickler.save_reduce(_create_cell, (_CELL_REF,), obj=obj) postproc.append((_shims._setattr, (obj, 'cell_contents', f))) - _log_trace(pickler, "# Ce2") + logger.trace(pickler, "# Ce2") return - _log_trace(pickler, "Ce1: %s", obj) + logger.trace(pickler, "Ce1: %s", obj) pickler.save_reduce(_create_cell, (f,), obj=obj) - _log_trace(pickler, "# Ce1") + logger.trace(pickler, "# Ce1") return if MAPPING_PROXY_TRICK: @register(DictProxyType) def save_dictproxy(pickler, obj): - _log_trace(pickler, "Mp: %s", obj) + logger.trace(pickler, "Mp: %s", obj) mapping = obj | _dictproxy_helper_instance pickler.save_reduce(DictProxyType, (mapping,), obj=obj) - _log_trace(pickler, "# Mp") + logger.trace(pickler, "# Mp") return elif not IS_PYPY: if not OLD33: @register(DictProxyType) def save_dictproxy(pickler, obj): - _log_trace(pickler, "Mp: %s", obj) + logger.trace(pickler, "Mp: %s", obj) pickler.save_reduce(DictProxyType, (obj.copy(),), obj=obj) - _log_trace(pickler, "# Mp") + logger.trace(pickler, "# Mp") return else: # The following function is based on 'saveDictProxy' from spickle @@ -1631,31 +1626,31 @@ def save_dictproxy(pickler, obj): # License: http://www.apache.org/licenses/LICENSE-2.0 @register(DictProxyType) def save_dictproxy(pickler, obj): - _log_trace(pickler, "Dp: %s", obj) + logger.trace(pickler, "Dp: %s", obj) attr = obj.get('__dict__') #pickler.save_reduce(_create_dictproxy, (attr,'nested'), obj=obj) if type(attr) == GetSetDescriptorType and attr.__name__ == "__dict__" \ and getattr(attr.__objclass__, "__dict__", None) == obj: pickler.save_reduce(getattr, (attr.__objclass__,"__dict__"),obj=obj) - _log_trace(pickler, "# Dp") + logger.trace(pickler, "# Dp") return # all bad below... so throw ReferenceError or TypeError raise ReferenceError("%s does not reference a class __dict__" % obj) @register(SliceType) def save_slice(pickler, obj): - _log_trace(pickler, "Sl: %s", obj) + logger.trace(pickler, "Sl: %s", obj) pickler.save_reduce(slice, (obj.start, obj.stop, obj.step), obj=obj) - _log_trace(pickler, "# Sl") + logger.trace(pickler, "# Sl") return @register(XRangeType) @register(EllipsisType) @register(NotImplementedType) def save_singleton(pickler, obj): - _log_trace(pickler, "Si: %s", obj) + logger.trace(pickler, "Si: %s", obj) pickler.save_reduce(_eval_repr, (obj.__repr__(),), obj=obj) - _log_trace(pickler, "# Si") + logger.trace(pickler, "# Si") return def _proxy_helper(obj): # a dead proxy returns a reference to None @@ -1701,10 +1696,10 @@ def _locate_object(address, module=None): @register(ReferenceType) def save_weakref(pickler, obj): refobj = obj() - _log_trace(pickler, "R1: %s", obj) + logger.trace(pickler, "R1: %s", obj) #refobj = ctypes.pythonapi.PyWeakref_GetObject(obj) # dead returns "None" pickler.save_reduce(_create_weakref, (refobj,), obj=obj) - _log_trace(pickler, "# R1") + logger.trace(pickler, "# R1") return @register(ProxyType) @@ -1713,15 +1708,15 @@ def save_weakproxy(pickler, obj): refobj = _locate_object(_proxy_helper(obj)) try: _t = "R2" - _log_trace(pickler, "%s: %s", _t, obj) + logger.trace(pickler, "%s: %s", _t, obj) except ReferenceError: _t = "R3" - _log_trace(pickler, "%s: %s", _t, sys.exc_info()[1]) + logger.trace(pickler, "%s: %s", _t, sys.exc_info()[1]) #callable = bool(getattr(refobj, '__call__', None)) if type(obj) is CallableProxyType: callable = True else: callable = False pickler.save_reduce(_create_weakproxy, (refobj, callable), obj=obj) - _log_trace(pickler, "# %s", _t) + logger.trace(pickler, "# %s", _t) return def _is_builtin_module(module): @@ -1743,72 +1738,72 @@ def save_module(pickler, obj): except RuntimeError: # not memorised module, probably part of dill pass else: - _log_trace(pickler, "M2: %s with diff", obj) - _log_trace(pickler, "Diff: %s", changed.keys()) + logger.trace(pickler, "M2: %s with diff", obj) + logger.trace(pickler, "Diff: %s", changed.keys()) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=changed) - _log_trace(pickler, "# M2") + logger.trace(pickler, "# M2") return - _log_trace(pickler, "M1: %s", obj) + logger.trace(pickler, "M1: %s", obj) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj) - _log_trace(pickler, "# M1") + logger.trace(pickler, "# M1") else: builtin_mod = _is_builtin_module(obj) if obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod or \ is_dill(pickler, child=True) and obj is pickler._main: - _log_trace(pickler, "M1: %s", obj) + logger.trace(pickler, "M1: %s", obj) _main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy? [_main_dict.pop(item, None) for item in singletontypes + ["__builtins__", "__loader__"]] pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=_main_dict) - _log_trace(pickler, "# M1") + logger.trace(pickler, "# M1") elif PY3 and obj.__name__ == "dill._dill": - _log_trace(pickler, "M2: %s", obj) + logger.trace(pickler, "M2: %s", obj) pickler.save_global(obj, name="_dill") - _log_trace(pickler, "# M2") + logger.trace(pickler, "# M2") else: - _log_trace(pickler, "M2: %s", obj) + logger.trace(pickler, "M2: %s", obj) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj) - _log_trace(pickler, "# M2") + logger.trace(pickler, "# M2") return return @register(TypeType) def save_type(pickler, obj, postproc_list=None): if obj in _typemap: - _log_trace(pickler, "T1: %s", obj) + logger.trace(pickler, "T1: %s", obj) pickler.save_reduce(_load_type, (_typemap[obj],), obj=obj) - _log_trace(pickler, "# T1") + logger.trace(pickler, "# T1") elif obj.__bases__ == (tuple,) and all([hasattr(obj, attr) for attr in ('_fields','_asdict','_make','_replace')]): # special case: namedtuples - _log_trace(pickler, "T6: %s", obj) + logger.trace(pickler, "T6: %s", obj) if OLD37 or (not obj._field_defaults): pickler.save_reduce(_create_namedtuple, (obj.__name__, obj._fields, obj.__module__), obj=obj) else: defaults = [obj._field_defaults[field] for field in obj._fields] pickler.save_reduce(_create_namedtuple, (obj.__name__, obj._fields, obj.__module__, defaults), obj=obj) - _log_trace(pickler, "# T6") + logger.trace(pickler, "# T6") return # special cases: NoneType, NotImplementedType, EllipsisType elif obj is type(None): - _log_trace(pickler, "T7: %s", obj) + logger.trace(pickler, "T7: %s", obj) #XXX: pickler.save_reduce(type, (None,), obj=obj) if PY3: pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8')) else: pickler.write('c__builtin__\nNoneType\n') - _log_trace(pickler, "# T7") + logger.trace(pickler, "# T7") elif obj is NotImplementedType: - _log_trace(pickler, "T7: %s", obj) + logger.trace(pickler, "T7: %s", obj) pickler.save_reduce(type, (NotImplemented,), obj=obj) - _log_trace(pickler, "# T7") + logger.trace(pickler, "# T7") elif obj is EllipsisType: - _log_trace(pickler, "T7: %s", obj) + logger.trace(pickler, "T7: %s", obj) pickler.save_reduce(type, (Ellipsis,), obj=obj) - _log_trace(pickler, "# T7") + logger.trace(pickler, "# T7") else: obj_name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) @@ -1819,11 +1814,11 @@ def save_type(pickler, obj, postproc_list=None): if issubclass(type(obj), type): # thanks to Tom Stepleton pointing out pickler._session unneeded _t = 'T2' - _log_trace(pickler, "%s: %s", _t, obj) + logger.trace(pickler, "%s: %s", _t, obj) _dict = _dict_from_dictproxy(obj.__dict__) else: _t = 'T3' - _log_trace(pickler, "%s: %s", _t, obj) + logger.trace(pickler, "%s: %s", _t, obj) _dict = obj.__dict__ #print (_dict) #print ("%s\n%s" % (type(obj), obj.__name__)) @@ -1837,9 +1832,9 @@ def save_type(pickler, obj, postproc_list=None): _save_with_postproc(pickler, (_create_type, ( type(obj), obj.__name__, obj.__bases__, _dict )), obj=obj, postproc_list=postproc_list) - _log_trace(pickler, "# %s", _t) + logger.trace(pickler, "# %s", _t) else: - _log_trace(pickler, "T4: %s", obj) + logger.trace(pickler, "T4: %s", obj) if incorrectly_named: warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning) if obj_recursive: @@ -1848,7 +1843,7 @@ def save_type(pickler, obj, postproc_list=None): #print ("%s\n%s" % (type(obj), obj.__name__)) #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) StockPickler.save_global(pickler, obj, name=obj_name) - _log_trace(pickler, "# T4") + logger.trace(pickler, "# T4") return # Error in PyPy 2.7 when adding ABC support @@ -1859,15 +1854,15 @@ def save_frame(pickler, obj): @register(property) def save_property(pickler, obj): - _log_trace(pickler, "Pr: %s", obj) + logger.trace(pickler, "Pr: %s", obj) pickler.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj) - _log_trace(pickler, "# Pr") + logger.trace(pickler, "# Pr") @register(staticmethod) @register(classmethod) def save_classmethod(pickler, obj): - _log_trace(pickler, "Cm: %s", obj) + logger.trace(pickler, "Cm: %s", obj) im_func = '__func__' if PY3 else 'im_func' try: orig_func = getattr(obj, im_func) @@ -1888,12 +1883,12 @@ def save_classmethod(pickler, obj): # state = None pickler.save_reduce(type(obj), (orig_func,), obj=obj) - _log_trace(pickler, "# Cm") + logger.trace(pickler, "# Cm") @register(FunctionType) def save_function(pickler, obj): if not _locate_function(obj, pickler): - _log_trace(pickler, "F1: %s", obj) + logger.trace(pickler, "F1: %s", obj) _recurse = getattr(pickler, '_recurse', None) _byref = getattr(pickler, '_byref', None) _postproc = getattr(pickler, '_postproc', None) @@ -1996,12 +1991,12 @@ def save_function(pickler, obj): else: pickler.write('0') - _log_trace(pickler, "# F1") + logger.trace(pickler, "# F1") else: - _log_trace(pickler, "F2: %s", obj) + logger.trace(pickler, "F2: %s", obj) name = getattr(obj, '__qualname__', getattr(obj, '__name__', None)) StockPickler.save_global(pickler, obj, name=name) - _log_trace(pickler, "# F2") + logger.trace(pickler, "# F2") return # quick sanity checking @@ -2097,16 +2092,10 @@ def _extend(): try: StockPickler.dispatch[t] = func except: #TypeError, PicklingError, UnpicklingError - _log_trace(pickler, "skip: %s", t) + logger.trace(pickler, "skip: %s", t) else: pass return -def _log_trace(pickler, *args, **kwargs): - if is_dill(pickler, child=False): - logger.log_trace(pickler, *args, **kwargs) - else: - logger.info(*args, **kwargs) - del diff, _use_diff, use_diff # EOF diff --git a/dill/logger.py b/dill/logger.py index 4ee53c50..e1895b94 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -5,149 +5,194 @@ # Copyright (c) 2022 The Uncertainty Quantification Foundation. # License: 3-clause BSD. The full license text is available at: # - https://github.com/uqfoundation/dill/blob/master/LICENSE -r""" +""" Logging utilities for dill. -Python's logging functionality can be conceptually divided into five steps: - 0. Check logging level -> abort if call level is greater than logger level - 1. Gather information -> construct a LogRecord from passed arguments and context - 2. Filter (optional) -> discard message if the record matches a filter - 3. Format -> format message with args, then format output string with message plus record - 4. Handle -> write the formatted string to output as defined in the handler - - -Basic calling sequence -====================== - -dill.logging.logger.log -> # or logger.info, etc. - Logger.log -> \ - Logger._log -> }- accept 'extra' parameter for custom record entries - Logger.makeRecord -> / - LogRecord.__init__ - Logger.handle -> - Logger.callHandlers -> - Handler.handle -> - Filterer.filter -> - Filter.filter - StreamHandler.emit -> - Handler.format -> - Formatter.format -> - LogRecord.getMessage # does: record.message = msg % args - Formatter.formatMessage -> - PercentStyle.format # does: self._fmt % record.__dict__ - -NOTE: All methods from the second line on are from logging.__init__.py - - -Logger customizations -===================== - -Step 1 - Information --------------------- -We use a LoggerAdapter subclass to wrap the module's logger, as the logging API -doesn't allow setting it directly with a custom Logger subclass. The added -'log_trace()' method receives a pickle instance as the first argument and -creates extra values to be added in the LogRecord from it, then calls 'log()'. - -Step 3 - Formatting -------------------- -We use a Formatter subclass to add prefix and suffix strings to the log message -in trace mode (an also provide empty defaults for normal logs). The user may -substitute the formatter to customize the extra information display. +The 'logger' object is the dill's top-level logger. + +The 'adapter' object wraps the logger and implements a 'trace()' method that +generates a detailed tree-style trace for the pickling call at log level INFO. + +The 'trace()' function sets and resets dill's logger log level, enabling and +disabling the pickling trace. + +The trace shows a tree structure depicting the depth of each object serialized +*with dill save functions*, but not the ones that use save functions from +'pickle._Pickler.dispatch'. If the information is available, it also displays +the size in bytes that the object contributed to the pickle stream (including +its child objects). Example trace output: + + >>> import dill, dill.tests + >>> dill.dump_session(main=dill.tests) + ┬ M1: + ├┬ F2: + │└ # F2 [32 B] + ├┬ D2: + │├┬ T4: + ││└ # T4 [35 B] + │├┬ D2: + ││├┬ T4: + │││└ # T4 [50 B] + ││├┬ D2: + │││└ # D2 [84 B] + ││└ # D2 [413 B] + │└ # D2 [763 B] + └ # M1 [813 B] """ -__all__ = ['INFO_DETAIL', 'adapter', 'logger', 'trace'] +__all__ = ['adapter', 'logger', 'trace'] -import locale, logging, math +import dill, locale, logging, math -INFO_DETAIL = (logging.INFO + logging.DEBUG) // 2 +# Tree drawing characters: Unicode to ASCII map. +ASCII_MAP = str.maketrans({"│": "|", "├": "|", "┬": "+", "└": "`"}) -TRANS_TABLE = {ord(k): ord(v) for k, v in zip(u"│├┬└", "||+`")} +## Notes about the design choices ## +# Here is some domumentation of the Standard Library's logging internals that +# can't be found completely in the official documentation. dill's logger is +# obtained by calling logging.getLogger('dill') and therefore is an instance of +# logging.getLoggerClass() at the call time. As this is controlled by the user, +# in order to add some functionality to it it's necessary to use a LoggerAdapter +# to wrap it, overriding some of the adapter's methods and creating new ones. +# +# Basic calling sequence +# ====================== +# +# Python's logging functionality can be conceptually divided into five steps: +# 0. Check logging level -> abort if call level is greater than logger level +# 1. Gather information -> construct a LogRecord from passed arguments and context +# 2. Filter (optional) -> discard message if the record matches a filter +# 3. Format -> format message with args, then format output string with message plus record +# 4. Handle -> write the formatted string to output as defined in the handler +# +# dill.logging.logger.log -> # or logger.info, etc. +# Logger.log -> \ +# Logger._log -> }- accept 'extra' parameter for custom record entries +# Logger.makeRecord -> / +# LogRecord.__init__ +# Logger.handle -> +# Logger.callHandlers -> +# Handler.handle -> +# Filterer.filter -> +# Filter.filter +# StreamHandler.emit -> +# Handler.format -> +# Formatter.format -> +# LogRecord.getMessage # does: record.message = msg % args +# Formatter.formatMessage -> +# PercentStyle.format # does: self._fmt % vars(record) +# +# NOTE: All methods from the second line on are from logging.__init__.py class TraceAdapter(logging.LoggerAdapter): - """tracks object graph depth and calculates pickled object size""" + """ + Tracks object tree depth and calculates pickled object size. + + A single instance of this wraps the module's logger, as the logging API + doesn't allow setting it directly with a custom Logger subclass. The added + 'trace()' method receives a pickle instance as the first argument and + creates extra values to be added in the LogRecord from it, then calls + 'info()'. + + Usage of logger with 'trace()' method: + + >>> from .logger import adapter as logger # instead of 'from .logger import logger' + >>> ... + >>> def save_atype(pickler, obj): + >>> logger.trace(pickler, "Message with %s and %r etc. placeholders", 'text', obj) + >>> ... + """ def __init__(self, logger): self.logger = logger + def addHandler(self, handler): + formatter = TraceFormatter("%(prefix)s%(message)s%(suffix)s", handler=handler) + handler.setFormatter(formatter) + self.logger.addHandler(handler) + def removeHandler(self, handler): + self.logger.removeHandler(handler) def process(self, msg, kwargs): - # A no-op override, as we don't have self.extra + # A no-op override, as we don't have self.extra. return msg, kwargs - def log_trace(self, pickler, msg=None, *args, **kwargs): - """log_trace(self, pickler, msg=None, *args, reset=False, **kwargs)""" - reset = kwargs.pop('reset', False) - if msg is None: - # Initialization and clean-up. - if reset: - pickler._trace_depth = 0 - pickler._size_stack = None - elif self.isEnabledFor(INFO_DETAIL): - pickler._trace_depth = 1 - pickler._size_stack = [] + def trace_setup(self, pickler): + # Called by Pickler.dump(). + if not dill._dill.is_dill(pickler, child=False): return - - elif getattr(pickler, '_trace_depth', None): - extra = kwargs.get('extra', {}) - pushed = msg.startswith('#') - size = None - try: - size = pickler._file.tell() - size += pickler.framer.current_frame.tell() - except AttributeError: - pass - if size: - if not pushed: - pickler._size_stack.append(size) - else: - size -= pickler._size_stack.pop() - extra['size'] = size - if pushed: pickler._trace_depth -= 1 - extra['depth'] = pickler._trace_depth - kwargs['extra'] = extra - self.log(INFO_DETAIL, msg, *args, **kwargs) - if not pushed: pickler._trace_depth += 1 - + if self.isEnabledFor(logging.INFO): + pickler._trace_depth = 1 + pickler._size_stack = [] else: - self.info(msg, *args, **kwargs) + pickler._trace_depth = None + def trace(self, pickler, msg, *args, **kwargs): + if not hasattr(pickler, '_trace_depth'): + logger.info(msg, *args, **kwargs) + return + if pickler._trace_depth is None: + return + extra = kwargs.get('extra', {}) + pushed_obj = msg.startswith('#') + size = None + try: + # Streams are not required to be tellable. + size = pickler._file.tell() + size += pickler.framer.current_frame.tell() + except AttributeError: + pass + if size is not None: + if not pushed_obj: + pickler._size_stack.append(size) + else: + size -= pickler._size_stack.pop() + extra['size'] = size + if pushed_obj: + pickler._trace_depth -= 1 + extra['depth'] = pickler._trace_depth + kwargs['extra'] = extra + self.info(msg, *args, **kwargs) + if not pushed_obj: + pickler._trace_depth += 1 class TraceFormatter(logging.Formatter): - """generates message prefix and suffix from record""" - def __init__(self, *args, **kwargs): - super(TraceFormatter, self).__init__(*args, **kwargs) - self.is_utf8 = locale.getpreferredencoding() == 'UTF-8' + """ + Generates message prefix and suffix from record. + + This Formatter adds prefix and suffix strings to the log message in trace + mode (an also provides empty string defaults for normal logs). + """ + def __init__(self, *args, handler=None, **kwargs): + super().__init__(*args, **kwargs) + try: + encoding = handler.stream.encoding + if encoding is None: + raise AttributeError + except AttributeError: + encoding = locale.getpreferredencoding() + self.is_utf8 = (encoding == 'UTF-8') def format(self, record): fields = {'prefix': "", 'suffix': ""} if getattr(record, 'depth', 0) > 0: if record.msg.startswith("#"): - prefix = (record.depth-1)*u"│" + u"└ " + prefix = (record.depth - 1)*"│" + "└" elif record.depth == 1: - prefix = u"┬ " + prefix = "┬" else: - prefix = (record.depth-2)*u"│" + u"├┬ " + prefix = (record.depth - 2)*"│" + "├┬" if not self.is_utf8: - prefix = prefix[:-1].translate(TRANS_TABLE) + "- " - fields['prefix'] = prefix + prefix = prefix.translate(ASCII_MAP) + "-" + fields['prefix'] = prefix + " " if hasattr(record, 'size'): # Show object size in human-redable form. power = int(math.log(record.size, 2)) // 10 size = record.size >> power*10 fields['suffix'] = " [%d %sB]" % (size, "KMGTP"[power] + "i" if power else "") - record.__dict__.update(fields) - return super(TraceFormatter, self).format(record) - + vars(record).update(fields) + return super().format(record) logger = logging.getLogger('dill') adapter = TraceAdapter(logger) - handler = logging.StreamHandler() -logger.addHandler(handler) - -formatter = TraceFormatter("%(prefix)s%(message)s%(suffix)s") -handler.setFormatter(formatter) - +adapter.addHandler(handler) -def trace(boolean, detail=False): +def trace(boolean): """print a trace through the stack when pickling; useful for debugging""" - if boolean: - logger.setLevel(INFO_DETAIL if detail else logging.INFO) - else: - logger.setLevel(logging.WARNING) + logger.setLevel(logging.INFO if boolean else logging.WARNING) diff --git a/tests/test_logger.py b/tests/test_logger.py index b468cb50..b7be2e51 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -6,7 +6,7 @@ # - https://github.com/uqfoundation/dill/blob/master/LICENSE import dill, logging, re -from dill.logger import formatter, handler, logger +from dill.logger import handler, adapter as logger try: from StringIO import StringIO @@ -16,16 +16,15 @@ def test_logging(regex=None): buffer = StringIO() handler = logging.StreamHandler(buffer) - handler.setFormatter(formatter) logger.addHandler(handler) try: dill.dumps({'a': (1, 2), 'b': object(), 'big': list(range(10000))}) if regex is None: assert buffer.getvalue() == "" else: - regex = re.compile(regex + '$') + regex = re.compile(regex) for line in buffer.getvalue().splitlines(): - assert regex.match(line) + assert regex.fullmatch(line) finally: logger.removeHandler(handler) buffer.close() @@ -34,8 +33,6 @@ def test_logging(regex=None): logger.removeHandler(handler) test_logging() dill.detect.trace(True) - test_logging(r'(# )?\w.*[^)]') - dill.detect.trace(True, detail=True) test_logging(r'(\S*┬ \w.*[^)]' # begin pickling object r'|│*└ # \w.* \[\d+ (\wi)?B])' # object written (with size) ) From 4ed84791ba5e963cd375bc15416989606bf6bd96 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Thu, 23 Jun 2022 23:05:42 -0300 Subject: [PATCH 07/14] review: adjustments --- dill/_dill.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dill/_dill.py b/dill/_dill.py index 1763d353..2821262f 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -23,8 +23,9 @@ __module__ = 'dill' -from .logger import adapter as logger import warnings +from .logger import adapter as logger +from .logger import trace as _trace import os import sys From 70b67f64bb98bb3bb2ca5503916e3a828e4f934d Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Thu, 23 Jun 2022 23:14:47 -0300 Subject: [PATCH 08/14] split module imports into separated lines --- dill/logger.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dill/logger.py b/dill/logger.py index e1895b94..818e029f 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -8,7 +8,7 @@ """ Logging utilities for dill. -The 'logger' object is the dill's top-level logger. +The 'logger' object is dill's top-level logger. The 'adapter' object wraps the logger and implements a 'trace()' method that generates a detailed tree-style trace for the pickling call at log level INFO. @@ -20,7 +20,7 @@ *with dill save functions*, but not the ones that use save functions from 'pickle._Pickler.dispatch'. If the information is available, it also displays the size in bytes that the object contributed to the pickle stream (including -its child objects). Example trace output: +its child objects). Sample trace output: >>> import dill, dill.tests >>> dill.dump_session(main=dill.tests) @@ -42,7 +42,11 @@ __all__ = ['adapter', 'logger', 'trace'] -import dill, locale, logging, math +import locale +import logging +import math + +import dill # Tree drawing characters: Unicode to ASCII map. ASCII_MAP = str.maketrans({"│": "|", "├": "|", "┬": "+", "└": "`"}) From ebd3f54bcdbe735baac59fb40e7c4f176bdf0994 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 24 Jun 2022 10:18:47 -0300 Subject: [PATCH 09/14] deal with non-standardized UTF-8 encoding name --- dill/logger.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dill/logger.py b/dill/logger.py index 818e029f..e144f8a5 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -42,6 +42,7 @@ __all__ = ['adapter', 'logger', 'trace'] +import codecs import locale import logging import math @@ -171,7 +172,12 @@ def __init__(self, *args, handler=None, **kwargs): raise AttributeError except AttributeError: encoding = locale.getpreferredencoding() - self.is_utf8 = (encoding == 'UTF-8') + try: + encoding = codecs.lookup(encoding).name + except LookupError: + self.is_utf8 = False + else: + self.is_utf8 = (encoding == codecs.lookup('utf-8').name) def format(self, record): fields = {'prefix': "", 'suffix': ""} if getattr(record, 'depth', 0) > 0: From cf25de0f60c2b0883214d86362f26b1dfcc1bb12 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 24 Jun 2022 10:19:05 -0300 Subject: [PATCH 10/14] update sample trace in module docs --- dill/__init__.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/dill/__init__.py b/dill/__init__.py index ac93ff6a..8249d2f8 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -187,18 +187,22 @@ >>> import dill.detect >>> dill.detect.trace(True) >>> f = dumps(squared) - F1: at 0x108899e18> - F2: - # F2 - Co: at 0x10866a270, file "", line 1> - F2: - # F2 - # Co - D1: - # D1 - D2: - # D2 - # F1 + ┬ F1: at 0x7fe074f8c280> + ├┬ F2: + │└ # F2 [34 B] + ├┬ Co: at 0x7fe07501eb30, file "", line 1> + │├┬ F2: + ││└ # F2 [19 B] + │└ # Co [87 B] + ├┬ D1: + │└ # D1 [22 B] + ├┬ D2: + │└ # D2 [2 B] + ├┬ D2: + │├┬ D2: + ││└ # D2 [2 B] + │└ # D2 [23 B] + └ # F1 [180 B] >>> dill.detect.trace(False) With trace, we see how ``dill`` stored the lambda (``F1``) by first storing From ffa59d7c67e43bfdf31d23402960524933afafbf Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 24 Jun 2022 11:50:26 -0300 Subject: [PATCH 11/14] use trace() as a context manager --- README.md | 36 ++++++++++++---------- dill/__init__.py | 8 ++--- dill/logger.py | 73 +++++++++++++++++++++++++++++++++++++++++--- tests/test_logger.py | 58 +++++++++++++++++++++++++---------- 4 files changed, 134 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 2b3688e6..d94e1b4a 100644 --- a/README.md +++ b/README.md @@ -170,26 +170,30 @@ To aid in debugging pickling issues, use *dill.detect* which provides tools like pickle tracing:: >>> import dill.detect - >>> dill.detect.trace(True) - >>> f = dumps(squared) - F1: at 0x108899e18> - F2: - # F2 - Co: at 0x10866a270, file "", line 1> - F2: - # F2 - # Co - D1: - # D1 - D2: - # D2 - # F1 - >>> dill.detect.trace(False) + >>> with dill.detect.trace(): + >>> dumps(squared) + ┬ F1: at 0x7fe074f8c280> + ├┬ F2: + │└ # F2 [34 B] + ├┬ Co: at 0x7fe07501eb30, file "", line 1> + │├┬ F2: + ││└ # F2 [19 B] + │└ # Co [87 B] + ├┬ D1: + │└ # D1 [22 B] + ├┬ D2: + │└ # D2 [2 B] + ├┬ D2: + │├┬ D2: + ││└ # D2 [2 B] + │└ # D2 [23 B] + └ # F1 [180 B] With trace, we see how ``dill`` stored the lambda (``F1``) by first storing ``_create_function``, the underlying code object (``Co``) and ``_create_code`` (which is used to handle code objects), then we handle the reference to -the global dict (``D2``). A ``#`` marks when the object is actually stored. +the global dict (``D2``) plus other dictionaries (``D1`` and ``D2``) that +save the lambda object's state. A ``#`` marks when the object is actually stored. More Information diff --git a/dill/__init__.py b/dill/__init__.py index 8249d2f8..39ac4df2 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -185,8 +185,8 @@ tools like pickle tracing:: >>> import dill.detect - >>> dill.detect.trace(True) - >>> f = dumps(squared) + >>> with dill.detect.trace(): + >>> dumps(squared) ┬ F1: at 0x7fe074f8c280> ├┬ F2: │└ # F2 [34 B] @@ -203,12 +203,12 @@ ││└ # D2 [2 B] │└ # D2 [23 B] └ # F1 [180 B] - >>> dill.detect.trace(False) With trace, we see how ``dill`` stored the lambda (``F1``) by first storing ``_create_function``, the underlying code object (``Co``) and ``_create_code`` (which is used to handle code objects), then we handle the reference to -the global dict (``D2``). A ``#`` marks when the object is actually stored. +the global dict (``D2``) plus other dictionaries (``D1`` and ``D2``) that +save the lambda object's state. A ``#`` marks when the object is actually stored. More Information diff --git a/dill/logger.py b/dill/logger.py index e144f8a5..c1b26e9b 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -23,6 +23,7 @@ its child objects). Sample trace output: >>> import dill, dill.tests + >>> dill.detect.trace(True) >>> dill.dump_session(main=dill.tests) ┬ M1: ├┬ F2: @@ -46,6 +47,10 @@ import locale import logging import math +import os +from contextlib import contextmanager +from functools import partial +from typing import NoReturn, Union import dill @@ -200,9 +205,67 @@ def format(self, record): logger = logging.getLogger('dill') adapter = TraceAdapter(logger) -handler = logging.StreamHandler() -adapter.addHandler(handler) +stderr_handler = logging.StreamHandler() +adapter.addHandler(stderr_handler) -def trace(boolean): - """print a trace through the stack when pickling; useful for debugging""" - logger.setLevel(logging.INFO if boolean else logging.WARNING) +def trace(arg: Union[bool, str, os.PathLike] = None, *, mode: str = 'a') -> NoReturn: + """print a trace through the stack when pickling; useful for debugging + + With a single boolean argument, enable or disable the tracing. + + Example usage: + + >>> import dill + >>> dill.detect.trace(True) + >>> dill.dump_session() + + Alternatively, ``trace()`` can be used as a context manager. With no + arguments, it just takes care of restoring the tracing state on exit. + A file name and a file mode may be specitfied to redirect the tracing + output in the ``with`` block context. A log function is yielded by the + manager so the user can write extra information to the file. + + Example usage: + + >>> from dill import detect + >>> D = {'a': 42, 'b': {'x': None}} + >>> with detect.trace(): + >>> dumps(D) + ┬ D2: + ├┬ D2: + │└ # D2 [8 B] + └ # D2 [22 B] + >>> squared = lambda x: x**2 + >>> with detect.trace('output.txt', mode='w') as log: + >>> log("> D = %r", D) + >>> dumps(D) + >>> log("> squared = %r", squared) + >>> dumps(squared) + + Arguments: + arg: a boolean value or an optional ``PathLike`` object for the context manager + mode: mode string for ``open()`` if a file name is passed as the first argument + """ + if not isinstance(arg, bool): + return _trace(filename=arg, mode=mode) + logger.setLevel(logging.INFO if arg else logging.WARNING) + +@contextmanager +def _trace(filename, mode): + """context manager version of trace(); can redirect the trace to a file""" + previous_level = adapter.getEffectiveLevel() + adapter.setLevel(logging.INFO) + redirect = filename is not None + if redirect: + handler = logging.FileHandler(filename, mode) + try: + if redirect: + adapter.removeHandler(stderr_handler) + adapter.addHandler(handler) + yield adapter.info + finally: + adapter.setLevel(previous_level) + if redirect: + handler.close() + adapter.removeHandler(handler) + adapter.addHandler(stderr_handler) diff --git a/tests/test_logger.py b/tests/test_logger.py index b7be2e51..ccba3d90 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -5,36 +5,62 @@ # License: 3-clause BSD. The full license text is available at: # - https://github.com/uqfoundation/dill/blob/master/LICENSE -import dill, logging, re -from dill.logger import handler, adapter as logger +import logging +import re +import tempfile + +import dill +from dill import detect +from dill.logger import stderr_handler, adapter as logger try: from StringIO import StringIO except ImportError: from io import StringIO -def test_logging(regex=None): +test_obj = {'a': (1, 2), 'b': object(), 'f': lambda x: x**2, 'big': list(range(10000))} + +def test_logging(should_trace): buffer = StringIO() handler = logging.StreamHandler(buffer) logger.addHandler(handler) try: - dill.dumps({'a': (1, 2), 'b': object(), 'big': list(range(10000))}) - if regex is None: - assert buffer.getvalue() == "" - else: - regex = re.compile(regex) + dill.dumps(test_obj) + if should_trace: + regex = re.compile(r'(\S*┬ \w.*[^)]' # begin pickling object + r'|│*└ # \w.* \[\d+ (\wi)?B])' # object written (with size) + ) for line in buffer.getvalue().splitlines(): assert regex.fullmatch(line) + return buffer.getvalue() + else: + assert buffer.getvalue() == "" finally: logger.removeHandler(handler) buffer.close() +def test_trace_to_file(stream_trace): + file = tempfile.NamedTemporaryFile(mode='r') + with detect.trace(file.name, mode='w'): + dill.dumps(test_obj) + file_trace = file.read() + file.close() + # Apparently, objects can change location in memory... + reghex = re.compile(r'0x[0-9A-Za-z]+') + assert reghex.sub('', file_trace) == reghex.sub('', stream_trace) + if __name__ == '__main__': - logger.removeHandler(handler) - test_logging() - dill.detect.trace(True) - test_logging(r'(\S*┬ \w.*[^)]' # begin pickling object - r'|│*└ # \w.* \[\d+ (\wi)?B])' # object written (with size) - ) - dill.detect.trace(False) - test_logging() + logger.removeHandler(stderr_handler) + test_logging(should_trace=False) + detect.trace(True) + test_logging(should_trace=True) + detect.trace(False) + test_logging(should_trace=False) + + loglevel = logging.ERROR + logger.setLevel(loglevel) + with detect.trace(): + stream_trace = test_logging(should_trace=True) + test_logging(should_trace=False) + assert logger.getEffectiveLevel() == loglevel + test_trace_to_file(stream_trace) From d3b05b5192ae2d1c81097816c52a19746b5c57a8 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Sat, 25 Jun 2022 19:19:51 -0300 Subject: [PATCH 12/14] pypy special cases --- dill/logger.py | 9 +++++++-- tests/test_logger.py | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/dill/logger.py b/dill/logger.py index c1b26e9b..8019488a 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -145,8 +145,13 @@ def trace(self, pickler, msg, *args, **kwargs): try: # Streams are not required to be tellable. size = pickler._file.tell() - size += pickler.framer.current_frame.tell() - except AttributeError: + frame = pickler.framer.current_frame + try: + size += frame.tell() + except AttributeError: + # PyPy may use a BytesBuilder as frame + size += len(frame) + except (AttributeError, TypeError): pass if size is not None: if not pushed_obj: diff --git a/tests/test_logger.py b/tests/test_logger.py index ccba3d90..b4e4881a 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -18,7 +18,7 @@ except ImportError: from io import StringIO -test_obj = {'a': (1, 2), 'b': object(), 'f': lambda x: x**2, 'big': list(range(10000))} +test_obj = {'a': (1, 2), 'b': object(), 'f': lambda x: x**2, 'big': list(range(10))} def test_logging(should_trace): buffer = StringIO() @@ -47,7 +47,11 @@ def test_trace_to_file(stream_trace): file.close() # Apparently, objects can change location in memory... reghex = re.compile(r'0x[0-9A-Za-z]+') - assert reghex.sub('', file_trace) == reghex.sub('', stream_trace) + file_trace, stream_trace = reghex.sub('0x', file_trace), reghex.sub('0x', stream_trace) + # PyPy prints dictionary contents with repr(dict)... + regdict = re.compile(r'(dict\.__repr__ of ).*') + file_trace, stream_trace = regdict.sub(r'\1{}>', file_trace), regdict.sub(r'\1{}>', stream_trace) + assert file_trace == stream_trace if __name__ == '__main__': logger.removeHandler(stderr_handler) From a5b9a399c1eb567737cfcb4ceebdec304015ac54 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Sat, 25 Jun 2022 23:48:13 -0300 Subject: [PATCH 13/14] accept file handle in trace context manager --- dill/logger.py | 56 ++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/dill/logger.py b/dill/logger.py index 8019488a..7fa2856c 100644 --- a/dill/logger.py +++ b/dill/logger.py @@ -44,13 +44,13 @@ __all__ = ['adapter', 'logger', 'trace'] import codecs +import contextlib import locale import logging import math import os -from contextlib import contextmanager from functools import partial -from typing import NoReturn, Union +from typing import NoReturn, TextIO, Union import dill @@ -213,7 +213,7 @@ def format(self, record): stderr_handler = logging.StreamHandler() adapter.addHandler(stderr_handler) -def trace(arg: Union[bool, str, os.PathLike] = None, *, mode: str = 'a') -> NoReturn: +def trace(arg: Union[bool, TextIO, str, os.PathLike] = None, *, mode: str = 'a') -> NoReturn: """print a trace through the stack when pickling; useful for debugging With a single boolean argument, enable or disable the tracing. @@ -226,9 +226,10 @@ def trace(arg: Union[bool, str, os.PathLike] = None, *, mode: str = 'a') -> NoRe Alternatively, ``trace()`` can be used as a context manager. With no arguments, it just takes care of restoring the tracing state on exit. - A file name and a file mode may be specitfied to redirect the tracing - output in the ``with`` block context. A log function is yielded by the - manager so the user can write extra information to the file. + Either a file handle, or a file name and (optionally) a file mode may be + specitfied to redirect the tracing output in the ``with`` block context. A + log function is yielded by the manager so the user can write extra + information to the file. Example usage: @@ -248,29 +249,36 @@ def trace(arg: Union[bool, str, os.PathLike] = None, *, mode: str = 'a') -> NoRe >>> dumps(squared) Arguments: - arg: a boolean value or an optional ``PathLike`` object for the context manager + arg: a boolean value, or an optional file-like or path-like object for the context manager mode: mode string for ``open()`` if a file name is passed as the first argument """ if not isinstance(arg, bool): - return _trace(filename=arg, mode=mode) + return TraceManager(file=arg, mode=mode) logger.setLevel(logging.INFO if arg else logging.WARNING) -@contextmanager -def _trace(filename, mode): +class TraceManager(contextlib.AbstractContextManager): """context manager version of trace(); can redirect the trace to a file""" - previous_level = adapter.getEffectiveLevel() - adapter.setLevel(logging.INFO) - redirect = filename is not None - if redirect: - handler = logging.FileHandler(filename, mode) - try: - if redirect: + def __init__(self, file, mode): + self.file = file + self.mode = mode + self.redirect = file is not None + self.file_is_stream = hasattr(file, 'write') + def __enter__(self): + if self.redirect: + stderr_handler.flush() + if self.file_is_stream: + self.handler = logging.StreamHandler(self.file) + else: + self.handler = logging.FileHandler(self.file, self.mode) adapter.removeHandler(stderr_handler) - adapter.addHandler(handler) - yield adapter.info - finally: - adapter.setLevel(previous_level) - if redirect: - handler.close() - adapter.removeHandler(handler) + adapter.addHandler(self.handler) + self.old_level = adapter.getEffectiveLevel() + adapter.setLevel(logging.INFO) + return adapter.info + def __exit__(self, *exc_info): + adapter.setLevel(self.old_level) + if self.redirect: + adapter.removeHandler(self.handler) adapter.addHandler(stderr_handler) + if not self.file_is_stream: + self.handler.close() From 60c154cb58673c1f81d5b06a9b854772ab959334 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Mon, 27 Jun 2022 12:00:15 -0300 Subject: [PATCH 14/14] fix dict (PyPy) and mappingproxy (CPython) trace --- dill/_dill.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/dill/_dill.py b/dill/_dill.py index 2821262f..de4cb358 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -1472,18 +1472,22 @@ def save_code(pickler, obj): logger.trace(pickler, "# Co") return +def _repr_dict(obj): + """make a short string representation of a dictionary""" + return "<%s object at %#012x>" % (type(obj).__name__, id(obj)) + @register(dict) def save_module_dict(pickler, obj): if is_dill(pickler, child=False) and obj == pickler._main.__dict__ and \ not (pickler._session and pickler._first_pass): - logger.trace(pickler, "D1: