Skip to content

Commit

Permalink
Decode kernel backtraces in seastar-addr2line
Browse files Browse the repository at this point in the history
This change implements best-effort kernel backtrace
decoding using /proc/kallsyms. We look up the kernel
address against the addresses fro kallsyms and report
the symbol with the largest-but-not-greater address.

This file is only avalible to non-root users if:

/proc/sys/kernel/kptr_restrict

is set to 0, it defaults to 1, for security. If the
file is not accessible or the above tunable isn't
set we do not decode kernel addresses.

That /proc/kallsyms file lists the true
addresses of all kernel symbols in the running kernel.
"True" here refers to the fact that under KASLR the
address in the kernel image will in general not be the
same as the address in the running kernel: they will
vary by a random offset that changes each boot.

This means that decoding only works on the machine
that generated the backtrace and only until the next
reboot. If these conditions are violated, we don't detect
it directly, but in the case of KASLR we will probably at
least fail to decode everything as the entire block of
symbols moves enough that addresses will be invalid.

Without KASLR and when decoding against the "wrong"
kallsyms file you will probably just get a bogus backtrace,
i.e., ones where callees don't call the callees shown, etc.
  • Loading branch information
travisdowns authored and BenPope committed Jun 9, 2022
1 parent baaccf1 commit 0af8ee3
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 12 deletions.
131 changes: 120 additions & 11 deletions scripts/addr2line.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@
#
# Copyright (C) 2017 ScyllaDB

import argparse
import bisect
import collections
import re
import sys
import subprocess
from enum import Enum
from typing import Any

# special binary path/module indicating that the address is from the kernel
KERNEL_MODULE = '<kernel>'

class Addr2Line:

Expand Down Expand Up @@ -83,6 +87,89 @@ def __call__(self, address):
self._input.stdin.flush()
return self._read_resolved_address()

class KernelResolver:
"""A resolver for kernel addresses which tries to read from /proc/kallsyms."""

LAST_SYMBOL_MAX_SIZE = 1024

def __init__(self):
syms : list[tuple[int, str]] = []

ksym_re = re.compile(r'(?P<addr>[0-9a-f]+) (?P<type>.+) (?P<name>\S+)')

warnings_left = 10
try:
f = open('/proc/kallsyms', 'r')
except OSError as e:
self.error = f'Cannot open /proc/kallsyms: {e}'
print(self.error)
return

try:
for line in f:
m = ksym_re.match(line)
if not m:
if warnings_left > 0: # don't spam too much
print(f'WARNING: /proc/kallsyms regex match failure: {line.strip()}', file=sys.stdout)
warnings_left =- 1
else:
syms.append((int(m.group('addr'), 16), m.group('name')))
finally:
f.close()

if not syms:
# make empty kallsyms (?) an error so we can assum len >= 1 below
self.error = 'kallsyms was empty'
print(self.error)
return

syms.sort()

if syms[-1][0] == 0:
# zero values for all symbols means that kptr_restrict blocked you
# from seeing the kernel symbol addresses
print('kallsyms is restricted, set /proc/sys/kernel/kptr_restrict to 0 to decode')
self.error = 'kallsyms is restricted'
return

self.error = None
# split because bisect can't take a key func before 3.10
self.sym_addrs : tuple[int]
self.sym_names : tuple[str]
self.sym_addrs, self.sym_names = zip(*syms) # type: ignore


def __call__(self, addrstr):
if self.error:
return addrstr + '\n'

sa = self.sym_addrs
sn = self.sym_names
slen = len(sa)
address = int(addrstr, 16)
# print('resolve address', address)
idx = bisect.bisect_right(sa, address) - 1
assert idx >= -1
assert idx < slen
if idx == -1:
return f'{addrstr} ({sa[0] - address} bytes before first symbol)\n'
if idx == slen - 1:
# We can easily detect symbol addresses which are too small: they fall before
# the first symbol in kallsyms, but for too large it is harder: we can't really
# distinguish between an address that is in the *very last* function in the symbol map
# and one which is beyond that, since kallsyms doesn't include sybol size. Instead
# we use a bit of a quick and dirty heuristic: if the symbol is *far enough* beyond
# the last symbol we assume it is not valid. Most likely, the overwhemling majority
# of cases are invalid (e.g., due to KASLR) as the final symbol in the map is usually
# something obscure.
lastsym = sa[-1]
if address - lastsym > self.LAST_SYMBOL_MAX_SIZE:
return f'{addrstr} ({address - lastsym} bytes after last symbol)\n'
saddr = sa[idx]
assert saddr <= address
return '{}+0x{:x}\n'.format(sn[idx], address - saddr)


class BacktraceResolver(object):

class BacktraceParser(object):
Expand All @@ -100,26 +187,44 @@ def __init__(self):
self.asan_re = re.compile(fr"^(?:.*\s+)\({full_addr_match}\)\s*$", flags=re.IGNORECASE)
self.asan_ignore_re = re.compile(fr"^=.*$", flags=re.IGNORECASE)
self.syslog_re = re.compile(fr"^(?:.*\s+){full_addr_match}\s*$", flags=re.IGNORECASE)
self.kernel_re = re.compile(fr'^kernel callstack: (?P<addrs>(?:{addr}\s*)+)$')
self.separator_re = re.compile(r'^\W*-+\W*$')


def split_addresses(self, addrstring: str, default_path=None):
addresses : list[dict[str, Any]] = []
for obj in addrstring.split():
m = re.match(self.address_re, obj)
assert m, f'addr did not match address regex: {obj}'
#print(f" >>> '{obj}': address {m.groups()}")
addresses.append({'path': m.group(1) or default_path, 'addr': m.group(2)})
return addresses

def __call__(self, line):
def get_prefix(s):
if s is not None:
s = s.strip()
return s or None

# order here is important: the kernel callstack regex
# needs to come first since it is more specific and would
# otherwise be matched by the online regex which comes next
m = self.kernel_re.match(line)
if m:
return {
'type': self.Type.ADDRESS,
'prefix': 'kernel callstack: ',
'addresses' : self.split_addresses(m.group('addrs'), KERNEL_MODULE)
}

m = re.match(self.oneline_re, line)
if m:
#print(f">>> '{line}': oneline {m.groups()}")
ret = {'type': self.Type.ADDRESS}
ret['prefix'] = get_prefix(m.group(1))
addresses = []
for obj in m.group(2).split():
m = re.match(self.address_re, obj)
#print(f" >>> '{obj}': address {m.groups()}")
addresses.append({'path': m.group('path'), 'addr': m.group('addr')})
ret['addresses'] = addresses
return ret
return {
'type': self.Type.ADDRESS,
'prefix': get_prefix(m.group(1)),
'addresses': self.split_addresses(m.group(2))
}

m = re.match(self.asan_ignore_re, line)
if m:
Expand Down Expand Up @@ -170,7 +275,11 @@ def __init__(self, executable, before_lines=1, context_re='', verbose=False, con

def _get_resolver_for_module(self, module):
if not module in self._known_modules:
self._known_modules[module] = Addr2Line(module, self._concise, self._cmd_path)
if module == KERNEL_MODULE:
resolver = KernelResolver()
else:
resolver = Addr2Line(module, self._concise, self._cmd_path)
self._known_modules[module] = resolver
return self._known_modules[module]

def __enter__(self):
Expand Down
19 changes: 18 additions & 1 deletion scripts/seastar-addr2line
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,24 @@ if args.test:
('[2022-04-19T23:09:28.311Z] 0x461bbb8',
{'type': BacktraceResolver.BacktraceParser.Type.ADDRESS, 'prefix': None, 'addresses': [{'path': None, 'addr': '0x461bbb8'}]}),
('[2022-04-19T23:09:28.311Z] /lib64/libpthread.so.0+0x92a4',
{'type': BacktraceResolver.BacktraceParser.Type.ADDRESS, 'prefix': None, 'addresses': [{'path': '/lib64/libpthread.so.0', 'addr': '0x92a4'}]})
{'type': BacktraceResolver.BacktraceParser.Type.ADDRESS, 'prefix': None, 'addresses': [{'path': '/lib64/libpthread.so.0', 'addr': '0x92a4'}]}),

('kernel callstack: ', None),
('kernel callstack: 0xffffffffffffff80',
{
'type': BacktraceResolver.BacktraceParser.Type.ADDRESS,
'prefix': 'kernel callstack: ',
'addresses': [{'path': '<kernel>', 'addr': '0xffffffffffffff80'}]
}
),
('kernel callstack: 0xffffffffffffff80 0xffffffffaf15ccca',
{
'type': BacktraceResolver.BacktraceParser.Type.ADDRESS,
'prefix': 'kernel callstack: ',
'addresses': [{'path': '<kernel>', 'addr': '0xffffffffffffff80'}, {'path': '<kernel>', 'addr': '0xffffffffaf15ccca'}]
}
)

]
parser = BacktraceResolver.BacktraceParser()
for line, expected in data:
Expand Down

0 comments on commit 0af8ee3

Please sign in to comment.