From 35bf9aafb390877b014951384843b9f604094868 Mon Sep 17 00:00:00 2001 From: Timo Wilken Date: Mon, 4 Apr 2022 09:54:06 +0000 Subject: [PATCH] Work around non-UTF-8 output from commands (#752) If commands call output something other than UTF-8, then try decoding as ISO-8859-1, which should cover everything. This way, we shouldn't get any encoding errors any more. --- alibuild_helpers/cmd.py | 49 +++++++++++++++++++++++------------ alibuild_helpers/utilities.py | 22 +++------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/alibuild_helpers/cmd.py b/alibuild_helpers/cmd.py index bf950b0c..f13b8e1b 100644 --- a/alibuild_helpers/cmd.py +++ b/alibuild_helpers/cmd.py @@ -13,6 +13,7 @@ # Keep the linter happy if sys.version_info[0] >= 3: basestring = str + unicode = None def is_string(s): @@ -21,23 +22,41 @@ def is_string(s): return isinstance(s, basestring) +def decode_with_fallback(data): + """Try to decode DATA as utf-8; if that doesn't work, fall back to latin-1. + + This combination should cover every possible byte string, as latin-1 covers + every possible single byte. + """ + if sys.version_info[0] >= 3: + if isinstance(data, bytes): + try: + return data.decode("utf-8") + except UnicodeDecodeError: + return data.decode("latin-1") + else: + return str(data) + elif isinstance(data, str): + return unicode(data, "utf-8") # utf-8 is a safe assumption + elif not isinstance(data, unicode): + return unicode(str(data)) + return data + + def getoutput(command): """Run command, check it succeeded, and return its stdout as a string.""" - kwargs = {} if sys.version_info.major < 3 else {"encoding": "utf-8"} - proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=PIPE, - universal_newlines=True, **kwargs) + proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=PIPE) stdout, stderr = proc.communicate() dieOnError(proc.returncode, "Command %s failed with code %d: %s" % - (command, proc.returncode, stderr)) - return stdout + (command, proc.returncode, decode_with_fallback(stderr))) + return decode_with_fallback(stdout) def getstatusoutput(command): """Run command and return its return code and output (stdout and stderr).""" - kwargs = {} if sys.version_info.major < 3 else {"encoding": "utf-8"} - proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=STDOUT, - universal_newlines=True, **kwargs) + proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=STDOUT) merged_output, _ = proc.communicate() + merged_output = decode_with_fallback(merged_output) # Strip a single trailing newline, if one exists, to match the behaviour of # subprocess.getstatusoutput. if merged_output.endswith("\n"): @@ -46,16 +65,12 @@ def getstatusoutput(command): def execute(command, printer=debug): - kwargs = {} if sys.version_info.major < 3 else {"encoding": "utf-8"} - popen = Popen(command, shell=is_string(command), stdout=PIPE, - universal_newlines=True, **kwargs) - lines_iterator = iter(popen.stdout.readline, "") - for line in lines_iterator: - if not line: break - printer("%s", line.strip("\n")) - out = popen.communicate()[0].strip("\n") + popen = Popen(command, shell=is_string(command), stdout=PIPE, stderr=STDOUT) + for line in iter(popen.stdout.readline, b""): + printer("%s", decode_with_fallback(line).strip("\n")) + out = decode_with_fallback(popen.communicate()[0]).strip("\n") if out: - printer(out) + printer("%s", out) return popen.returncode diff --git a/alibuild_helpers/utilities.py b/alibuild_helpers/utilities.py index 29f2e8e7..4b287030 100644 --- a/alibuild_helpers/utilities.py +++ b/alibuild_helpers/utilities.py @@ -13,7 +13,7 @@ except ImportError: from ordereddict import OrderedDict -from alibuild_helpers.cmd import getoutput +from alibuild_helpers.cmd import decode_with_fallback, getoutput from alibuild_helpers.git import git from alibuild_helpers.log import dieOnError @@ -23,10 +23,6 @@ class SpecError(Exception): asList = lambda x : x if type(x) == list else [x] -# Keep the linter happy -if sys.version_info[0] >= 3: - unicode = None - def star(): return re.sub("build.*$", "", basename(sys.argv[0]).lower()) @@ -129,20 +125,10 @@ def validateDefaults(finalPkgSpec, defaults): defaults, "\n".join([" - " + x for x in validDefaults])), validDefaults) + def format(s, **kwds): - if sys.version_info[0] >= 3: - if isinstance(s, bytes): - try: - s = s.decode("utf-8") # to get newlines as such and not as escaped \n - except: - s = s.decode("latin-1") # Workaround issue with some special characters of latin-1 which are not understood by unicode - else: - s = str(s) - elif isinstance(s, str): - s = unicode(s, "utf-8") # utf-8 is a safe assumption - elif not isinstance(s, unicode): - s = unicode(str(s)) - return s % kwds + return decode_with_fallback(s) % kwds + def doDetectArch(hasOsRelease, osReleaseLines, platformTuple, platformSystem, platformProcessor): if platformSystem == "Darwin":