Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some refactoring of string encoding/decoding utilities #1342

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions psutil/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
enum = None

# can't take it from _common.py as this script is imported by setup.py
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3

__all__ = [
Expand All @@ -61,6 +62,7 @@
'pthread', 'puids', 'sconn', 'scpustats', 'sdiskio', 'sdiskpart',
'sdiskusage', 'snetio', 'snicaddr', 'snicstats', 'sswap', 'suser',
# utility functions
'str2bytes', 'bytes2str', 'unicode2str', 'open_binary', 'open_text',
'conn_tmap', 'deprecated_method', 'isfile_strict', 'memoize',
'parse_environ_block', 'path_exists_strict', 'usage_percent',
'supports_ipv6', 'sockfam_to_enum', 'socktype_to_enum', "wrap_numbers",
Expand Down Expand Up @@ -263,6 +265,61 @@ class BatteryTime(enum.IntEnum):
# --- utils
# ===================================================================

if PY2:
def bytes2str(s, encoding=ENCODING, errors=ENCODING_ERRS):
"""Given bytes, return a str.

On Python 2 this is a no-op since bytes is str; on Python 3 the bytes
are decoded using the optional encoding/errors arguments, or the
constants ENCODING/ENCODING_ERRS by default.
"""
return s

def str2bytes(s, encoding=ENCODING, errors=ENCODING_ERRS):
"""Given a str, return bytes.

On Python 2 this is a no-op since str is bytes; on Python 3 the
bytes are encoding using the optional encoding/errors arguments, or
the constants ENCODING/ENCODING_ERRS by default.
"""
return s

def unicode2str(s, encoding=ENCODING, errors=ENCODING_ERRS):
"""Given a Python 3 str or Python 2 unicode, return a str.

On Python 3 this is a no-op since str is unicode, but on Python 2
the unicode is encoded using the optional encoding/errors arguments,
or the constants ENCODING/ENCODING_ERRS by default.
"""
return s.encode(encoding, errors)
else:
def bytes2str(s, encoding=ENCODING, errors=ENCODING_ERRS):
"""Given bytes, return a str.

On Python 2 this is a no-op since bytes is str; on Python 3 the bytes
are decoded using the optional encoding/errors arguments, or the
constants ENCODING/ENCODING_ERRS by default.
"""
return s.decode(encoding, errors)

def str2bytes(s, encoding=ENCODING, errors=ENCODING_ERRS):
"""Given a str, return bytes.

On Python 2 this is a no-op since str is bytes; on Python 3 the
bytes are encoding using the optional encoding/errors arguments, or
the constants ENCODING/ENCODING_ERRS by default.
"""
return s.encode(encoding, errors)

def unicode2str(s, encoding=ENCODING, errors=ENCODING_ERRS):
"""Given a Python 3 str or Python 2 unicode, return a str.

On Python 3 this is a no-op since str is unicode, but on Python 2
the unicode is encoded using the optional encoding/errors arguments,
or the constants ENCODING/ENCODING_ERRS by default.
"""
return s


def usage_percent(used, total, round_=None):
"""Calculate percentage usage of 'used' against 'total'."""
Expand Down
32 changes: 9 additions & 23 deletions psutil/_pslinux.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
from . import _psposix
from . import _psutil_linux as cext
from . import _psutil_posix as cext_posix
from ._common import ENCODING
from ._common import ENCODING_ERRS
from ._common import isfile_strict
from ._common import memoize
from ._common import memoize_when_activated
Expand All @@ -39,6 +37,8 @@
from ._common import path_exists_strict
from ._common import supports_ipv6
from ._common import usage_percent
from ._common import str2bytes
from ._common import bytes2str
from ._compat import b
from ._compat import basestring
from ._compat import long
Expand Down Expand Up @@ -203,14 +203,6 @@ class IOPriority(enum.IntEnum):
# =====================================================================


if PY3:
def decode(s):
return s.decode(encoding=ENCODING, errors=ENCODING_ERRS)
else:
def decode(s):
return s


def get_procfs_path():
"""Return updated psutil.PROCFS_PATH constant."""
return sys.modules['psutil'].PROCFS_PATH
Expand Down Expand Up @@ -814,8 +806,9 @@ def decode_address(addr, family):
# no end-points connected
if not port:
return ()
if PY3:
ip = ip.encode('ascii')

ip = str2bytes(ip, 'ascii')

if family == socket.AF_INET:
# see: https://github.com/giampaolo/psutil/issues/201
if LITTLE_ENDIAN:
Expand Down Expand Up @@ -1516,11 +1509,7 @@ def oneshot_exit(self):

@wrap_exceptions
def name(self):
name = self._parse_stat_file()[0]
if PY3:
name = decode(name)
# XXX - gets changed later and probably needs refactoring
return name
return bytes2str(self._parse_stat_file()[0])

def exe(self):
try:
Expand Down Expand Up @@ -1730,14 +1719,13 @@ def get_blocks(lines, current_block):
if not path:
path = '[anon]'
else:
if PY3:
path = decode(path)
path = bytes2str(path)
path = path.strip()
if (path.endswith(' (deleted)') and not
path_exists_strict(path)):
path = path[:-10]
ls.append((
decode(addr), decode(perms), path,
bytes2str(addr), bytes2str(perms), path,
data[b'Rss:'],
data.get(b'Size:', 0),
data.get(b'Pss:', 0),
Expand Down Expand Up @@ -1943,9 +1931,7 @@ def rlimit(self, resource, limits=None):

@wrap_exceptions
def status(self):
letter = self._parse_stat_file()[1]
if PY3:
letter = letter.decode()
letter = bytes2str(self._parse_stat_file()[1])
# XXX is '?' legit? (we're not supposed to return it anyway)
return PROC_STATUSES.get(letter, '?')

Expand Down
61 changes: 23 additions & 38 deletions psutil/_pswindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
raise

from ._common import conn_tmap
from ._common import ENCODING
from ._common import ENCODING_ERRS
from ._common import bytes2str
from ._common import unicode2str
from ._common import isfile_strict
from ._common import memoize_when_activated
from ._common import parse_environ_block
Expand Down Expand Up @@ -190,19 +190,6 @@ def convert_dos_path(s):
return os.path.join(driveletter, s[len(rawdrive):])


def py2_strencode(s):
"""Encode a unicode string to a byte string by using the default fs
encoding + "replace" error handler.
"""
if PY3:
return s
else:
if isinstance(s, str):
return s
else:
return s.encode(ENCODING, ENCODING_ERRS)


# =====================================================================
# --- memory
# =====================================================================
Expand Down Expand Up @@ -241,10 +228,10 @@ def swap_memory():

def disk_usage(path):
"""Return disk usage associated with path."""
if PY3 and isinstance(path, bytes):
if isinstance(path, bytes):
# XXX: do we want to use "strict"? Probably yes, in order
# to fail immediately. After all we are accepting input here...
path = path.decode(ENCODING, errors="strict")
path = bytes2str(path, errors='strict')
total, free = cext.disk_usage(path)
used = total - free
percent = usage_percent(used, total, round_=1)
Expand Down Expand Up @@ -346,9 +333,7 @@ def net_if_stats():
ret = {}
rawdict = cext.net_if_stats()
for name, items in rawdict.items():
if not PY3:
assert isinstance(name, unicode), type(name)
name = py2_strencode(name)
name = unicode2str(name)
isup, duplex, speed, mtu = items
if hasattr(_common, 'NicDuplex'):
duplex = _common.NicDuplex(duplex)
Expand All @@ -361,15 +346,15 @@ def net_io_counters():
installed on the system as a dict of raw tuples.
"""
ret = cext.net_io_counters()
return dict([(py2_strencode(k), v) for k, v in ret.items()])
return dict([(unicode2str(k), v) for k, v in ret.items()])


def net_if_addrs():
"""Return the addresses associated to each NIC."""
ret = []
for items in cext.net_if_addrs():
items = list(items)
items[0] = py2_strencode(items[0])
items[0] = unicode2str(items[0])
ret.append(items)
return ret

Expand Down Expand Up @@ -427,7 +412,7 @@ def users():
rawlist = cext.users()
for item in rawlist:
user, hostname, tstamp = item
user = py2_strencode(user)
user = unicode2str(user)
nt = _common.suser(user, None, hostname, tstamp, None)
retlist.append(nt)
return retlist
Expand All @@ -441,7 +426,7 @@ def users():
def win_service_iter():
"""Yields a list of WindowsService instances."""
for name, display_name in cext.winservice_enumerate():
yield WindowsService(py2_strencode(name), py2_strencode(display_name))
yield WindowsService(unicode2str(name), unicode2str(display_name))


def win_service_get(name):
Expand Down Expand Up @@ -482,10 +467,10 @@ def _query_config(self):
cext.winservice_query_config(self._name)
# XXX - update _self.display_name?
return dict(
display_name=py2_strencode(display_name),
binpath=py2_strencode(binpath),
username=py2_strencode(username),
start_type=py2_strencode(start_type))
display_name=unicode2str(display_name),
binpath=unicode2str(binpath),
username=unicode2str(username),
start_type=unicode2str(start_type))

def _query_status(self):
with self._wrap_exceptions():
Expand Down Expand Up @@ -560,7 +545,7 @@ def status(self):

def description(self):
"""Service long description."""
return py2_strencode(cext.winservice_query_descr(self.name()))
return unicode2str(cext.winservice_query_descr(self.name()))

# utils

Expand Down Expand Up @@ -684,9 +669,9 @@ def name(self):
try:
# Note: this will fail with AD for most PIDs owned
# by another user but it's faster.
return py2_strencode(os.path.basename(self.exe()))
return unicode2str(os.path.basename(self.exe()))
except AccessDenied:
return py2_strencode(cext.proc_name(self.pid))
return unicode2str(cext.proc_name(self.pid))

@wrap_exceptions
def exe(self):
Expand All @@ -698,22 +683,22 @@ def exe(self):
# see https://github.com/giampaolo/psutil/issues/528
if self.pid in (0, 4):
raise AccessDenied(self.pid, self._name)
return py2_strencode(convert_dos_path(cext.proc_exe(self.pid)))
return unicode2str(convert_dos_path(cext.proc_exe(self.pid)))

@wrap_exceptions
def cmdline(self):
ret = cext.proc_cmdline(self.pid)
if PY3:
return ret
else:
return [py2_strencode(s) for s in ret]
return [unicode2str(s) for s in ret]

@wrap_exceptions
def environ(self):
ustr = cext.proc_environ(self.pid)
if ustr and not PY3:
assert isinstance(ustr, unicode), type(ustr)
return parse_environ_block(py2_strencode(ustr))
return parse_environ_block(unicode2str(ustr))

def ppid(self):
try:
Expand Down Expand Up @@ -775,7 +760,7 @@ def memory_maps(self):
path = convert_dos_path(path)
if not PY3:
assert isinstance(path, unicode), type(path)
path = py2_strencode(path)
path = unicode2str(path)
addr = hex(addr)
yield (addr, perm, path, rss)

Expand Down Expand Up @@ -835,7 +820,7 @@ def username(self):
if self.pid in (0, 4):
return 'NT AUTHORITY\\SYSTEM'
domain, user = cext.proc_username(self.pid)
return py2_strencode(domain) + '\\' + py2_strencode(user)
return unicode2str(domain) + '\\' + unicode2str(user)

@wrap_exceptions
def create_time(self):
Expand Down Expand Up @@ -891,7 +876,7 @@ def cwd(self):
# return a normalized pathname since the native C function appends
# "\\" at the and of the path
path = cext.proc_cwd(self.pid)
return py2_strencode(os.path.normpath(path))
return unicode2str(os.path.normpath(path))

@wrap_exceptions
def open_files(self):
Expand All @@ -907,7 +892,7 @@ def open_files(self):
_file = convert_dos_path(_file)
if isfile_strict(_file):
if not PY3:
_file = py2_strencode(_file)
_file = unicode2str(_file)
ntuple = _common.popenfile(_file, -1)
ret.add(ntuple)
return list(ret)
Expand Down