Skip to content

Commit

Permalink
Work around non-UTF-8 output from commands (#752)
Browse files Browse the repository at this point in the history
If commands call output something other than UTF-8, then try decoding as
ISO-8859-1, which should cover everything. This way, we shouldn't get any
encoding errors any more.
  • Loading branch information
TimoWilken committed Apr 4, 2022
1 parent 8622569 commit 35bf9aa
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 35 deletions.
49 changes: 32 additions & 17 deletions alibuild_helpers/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# Keep the linter happy
if sys.version_info[0] >= 3:
basestring = str
unicode = None


def is_string(s):
Expand All @@ -21,23 +22,41 @@ def is_string(s):
return isinstance(s, basestring)


def decode_with_fallback(data):
"""Try to decode DATA as utf-8; if that doesn't work, fall back to latin-1.
This combination should cover every possible byte string, as latin-1 covers
every possible single byte.
"""
if sys.version_info[0] >= 3:
if isinstance(data, bytes):
try:
return data.decode("utf-8")
except UnicodeDecodeError:
return data.decode("latin-1")
else:
return str(data)
elif isinstance(data, str):
return unicode(data, "utf-8") # utf-8 is a safe assumption
elif not isinstance(data, unicode):
return unicode(str(data))
return data


def getoutput(command):
"""Run command, check it succeeded, and return its stdout as a string."""
kwargs = {} if sys.version_info.major < 3 else {"encoding": "utf-8"}
proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=PIPE,
universal_newlines=True, **kwargs)
proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=PIPE)
stdout, stderr = proc.communicate()
dieOnError(proc.returncode, "Command %s failed with code %d: %s" %
(command, proc.returncode, stderr))
return stdout
(command, proc.returncode, decode_with_fallback(stderr)))
return decode_with_fallback(stdout)


def getstatusoutput(command):
"""Run command and return its return code and output (stdout and stderr)."""
kwargs = {} if sys.version_info.major < 3 else {"encoding": "utf-8"}
proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=STDOUT,
universal_newlines=True, **kwargs)
proc = Popen(command, shell=is_string(command), stdout=PIPE, stderr=STDOUT)
merged_output, _ = proc.communicate()
merged_output = decode_with_fallback(merged_output)
# Strip a single trailing newline, if one exists, to match the behaviour of
# subprocess.getstatusoutput.
if merged_output.endswith("\n"):
Expand All @@ -46,16 +65,12 @@ def getstatusoutput(command):


def execute(command, printer=debug):
kwargs = {} if sys.version_info.major < 3 else {"encoding": "utf-8"}
popen = Popen(command, shell=is_string(command), stdout=PIPE,
universal_newlines=True, **kwargs)
lines_iterator = iter(popen.stdout.readline, "")
for line in lines_iterator:
if not line: break
printer("%s", line.strip("\n"))
out = popen.communicate()[0].strip("\n")
popen = Popen(command, shell=is_string(command), stdout=PIPE, stderr=STDOUT)
for line in iter(popen.stdout.readline, b""):
printer("%s", decode_with_fallback(line).strip("\n"))
out = decode_with_fallback(popen.communicate()[0]).strip("\n")
if out:
printer(out)
printer("%s", out)
return popen.returncode


Expand Down
22 changes: 4 additions & 18 deletions alibuild_helpers/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
except ImportError:
from ordereddict import OrderedDict

from alibuild_helpers.cmd import getoutput
from alibuild_helpers.cmd import decode_with_fallback, getoutput
from alibuild_helpers.git import git
from alibuild_helpers.log import dieOnError

Expand All @@ -23,10 +23,6 @@ class SpecError(Exception):

asList = lambda x : x if type(x) == list else [x]

# Keep the linter happy
if sys.version_info[0] >= 3:
unicode = None

def star():
return re.sub("build.*$", "", basename(sys.argv[0]).lower())

Expand Down Expand Up @@ -129,20 +125,10 @@ def validateDefaults(finalPkgSpec, defaults):
defaults,
"\n".join([" - " + x for x in validDefaults])), validDefaults)


def format(s, **kwds):
if sys.version_info[0] >= 3:
if isinstance(s, bytes):
try:
s = s.decode("utf-8") # to get newlines as such and not as escaped \n
except:
s = s.decode("latin-1") # Workaround issue with some special characters of latin-1 which are not understood by unicode
else:
s = str(s)
elif isinstance(s, str):
s = unicode(s, "utf-8") # utf-8 is a safe assumption
elif not isinstance(s, unicode):
s = unicode(str(s))
return s % kwds
return decode_with_fallback(s) % kwds


def doDetectArch(hasOsRelease, osReleaseLines, platformTuple, platformSystem, platformProcessor):
if platformSystem == "Darwin":
Expand Down

0 comments on commit 35bf9aa

Please sign in to comment.