aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Thornburgh <dthorn@google.com>2022-08-01 14:35:25 -0700
committerTobias Hieta <tobias@hieta.se>2022-09-07 08:37:30 +0200
commit4cc81e378a857ffcfdca3d654d3345b9ea9ab05f (patch)
tree675af5d1ecb8454bd2b09113f9f36ee088e0efff
parent[Symbolizer] Implement data symbolizer markup element. (diff)
downloadllvm-project-4cc81e378a857ffcfdca3d654d3345b9ea9ab05f.tar.gz
llvm-project-4cc81e378a857ffcfdca3d654d3345b9ea9ab05f.tar.bz2
llvm-project-4cc81e378a857ffcfdca3d654d3345b9ea9ab05f.zip
[Symbolizer] Implement pc element in symbolizing filter.
Implements the pc element for the symbolizing filter, including it's "ra" and "pc" modes. Return addresses ("ra") are adjusted by decrementing one. By default, {{{pc}}} elements are assumed to point to precise code ("pc") locations. Backtrace elements will adopt the opposite convention. Along the way, some minor refactors of value printing and colorization. Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D131115 (cherry picked from commit bf48b128b02813e53e0c8f6585db837d14c9358f)
-rw-r--r--llvm/docs/CommandGuide/llvm-symbolizer.rst1
-rw-r--r--llvm/docs/SymbolizerMarkupFormat.rst2
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h17
-rw-r--r--llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp145
-rw-r--r--llvm/test/DebugInfo/symbolize-filter-markup-data.test2
-rw-r--r--llvm/test/DebugInfo/symbolize-filter-markup-pc.test188
6 files changed, 327 insertions, 28 deletions
diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 3fff88a7f165..5870e0e9e95e 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -254,7 +254,6 @@ OPTIONS
and prints the results to standard output. The following markup elements are
not yet supported:
- * ``{{pc}}``
* ``{{bt}}``
* ``{{hexdict}}``
* ``{{dumpfile}}``
diff --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst
index 319a33021950..b06cc20f41ef 100644
--- a/llvm/docs/SymbolizerMarkupFormat.rst
+++ b/llvm/docs/SymbolizerMarkupFormat.rst
@@ -184,7 +184,7 @@ human-readable symbolic form.
{{{symbol:_ZN7Mangled4NameEv}}}
{{{symbol:foobar}}}
-``{{{pc:%p}}}``, ``{{{pc:%p:ra}}}``, ``{{{pc:%p:pc}}}`` [#not_yet_implemented]_
+``{{{pc:%p}}}``, ``{{{pc:%p:ra}}}``, ``{{{pc:%p:pc}}}``
Here ``%p`` is the memory address of a code location. It might be presented as a
function name and source location. The second two forms distinguish the kind of
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
index 3a2c2bf49041..b597e9ba7ba2 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -71,6 +71,15 @@ private:
SmallVector<const MMap *> MMaps = {};
};
+ // The semantics of a possible program counter value.
+ enum class PCType {
+ // The address is a return address and must be adjusted to point to the call
+ // itself.
+ ReturnAddress,
+ // The address is the precise location in the code and needs no adjustment.
+ PreciseCode,
+ };
+
bool tryContextualElement(const MarkupNode &Node,
const SmallVector<MarkupNode> &DeferredNodes);
bool tryMMap(const MarkupNode &Element,
@@ -87,6 +96,7 @@ private:
bool tryPresentation(const MarkupNode &Node);
bool trySymbol(const MarkupNode &Node);
+ bool tryPC(const MarkupNode &Node);
bool tryData(const MarkupNode &Node);
bool trySGR(const MarkupNode &Node);
@@ -96,6 +106,9 @@ private:
void restoreColor();
void resetColor();
+ void printRawElement(const MarkupNode &Element);
+ void printValue(Twine Value);
+
Optional<Module> parseModule(const MarkupNode &Element) const;
Optional<MMap> parseMMap(const MarkupNode &Element) const;
@@ -104,10 +117,12 @@ private:
Optional<uint64_t> parseSize(StringRef Str) const;
Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
Optional<std::string> parseMode(StringRef Str) const;
+ Optional<PCType> parsePCType(StringRef Str) const;
bool checkTag(const MarkupNode &Node) const;
bool checkNumFields(const MarkupNode &Element, size_t Size) const;
bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const;
+ bool checkNumFieldsAtMost(const MarkupNode &Element, size_t Size) const;
void reportTypeError(StringRef Str, StringRef TypeName) const;
void reportLocation(StringRef::iterator Loc) const;
@@ -115,6 +130,8 @@ private:
const MMap *getOverlappingMMap(const MMap &Map) const;
const MMap *getContainingMMap(uint64_t Addr) const;
+ uint64_t adjustAddr(uint64_t Addr, PCType Type) const;
+
StringRef lineEnding() const;
raw_ostream &OS;
diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 2bf2e17514e1..70fb49d74b3a 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/Symbolize/Markup.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Debuginfod/Debuginfod.h"
@@ -163,18 +164,17 @@ bool MarkupFilter::tryModule(const MarkupNode &Node,
filterNode(Node);
beginModuleInfoLine(&Module);
OS << "; BuildID=";
- highlightValue();
- OS << toHex(Module.BuildID, /*LowerCase=*/true);
- highlight();
+ printValue(toHex(Module.BuildID, /*LowerCase=*/true));
return true;
}
void MarkupFilter::beginModuleInfoLine(const Module *M) {
highlight();
OS << "[[[ELF module";
- highlightValue();
- OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name);
- highlight();
+ printValue(formatv(" #{0:x} ", M->ID));
+ OS << '"';
+ printValue(M->Name);
+ OS << '"';
MIL = ModuleInfoLine{M};
}
@@ -186,13 +186,12 @@ void MarkupFilter::endAnyModuleInfoLine() {
});
for (const MMap *M : MIL->MMaps) {
OS << (M == MIL->MMaps.front() ? ' ' : ',');
- highlightValue();
- OS << formatv("[{0:x}-{1:x}]", M->Addr, M->Addr + M->Size - 1);
- highlight();
- OS << '(';
- highlightValue();
- OS << M->Mode;
- highlight();
+ OS << '[';
+ printValue(formatv("{0:x}", M->Addr));
+ OS << '-';
+ printValue(formatv("{0:x}", M->Addr + M->Size - 1));
+ OS << "](";
+ printValue(M->Mode);
OS << ')';
}
OS << "]]]" << lineEnding();
@@ -215,6 +214,8 @@ void MarkupFilter::filterNode(const MarkupNode &Node) {
bool MarkupFilter::tryPresentation(const MarkupNode &Node) {
if (trySymbol(Node))
return true;
+ if (tryPC(Node))
+ return true;
return tryData(Node);
}
@@ -230,6 +231,61 @@ bool MarkupFilter::trySymbol(const MarkupNode &Node) {
return true;
}
+bool MarkupFilter::tryPC(const MarkupNode &Node) {
+ if (Node.Tag != "pc")
+ return false;
+ if (!checkNumFieldsAtLeast(Node, 1))
+ return true;
+ if (!checkNumFieldsAtMost(Node, 2))
+ return true;
+
+ Optional<uint64_t> Addr = parseAddr(Node.Fields[0]);
+ if (!Addr)
+ return true;
+
+ // PC addresses that aren't part of a backtrace are assumed to be precise code
+ // locations.
+ PCType Type = PCType::PreciseCode;
+ if (Node.Fields.size() == 2) {
+ Optional<PCType> ParsedType = parsePCType(Node.Fields[1]);
+ if (!ParsedType)
+ return true;
+ Type = *ParsedType;
+ }
+ *Addr = adjustAddr(*Addr, Type);
+
+ const MMap *MMap = getContainingMMap(*Addr);
+ if (!MMap) {
+ WithColor::error() << "no mmap covers address\n";
+ reportLocation(Node.Fields[0].begin());
+ printRawElement(Node);
+ return true;
+ }
+
+ Expected<DILineInfo> LI = Symbolizer.symbolizeCode(
+ MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)});
+ if (!LI) {
+ WithColor::defaultErrorHandler(LI.takeError());
+ printRawElement(Node);
+ return true;
+ }
+ if (LI->FileName == DILineInfo::BadString &&
+ LI->FunctionName == DILineInfo::BadString && LI->Line == 0) {
+ printRawElement(Node);
+ return true;
+ }
+
+ highlight();
+ printValue(LI->FunctionName);
+ OS << '[';
+ printValue(LI->FileName);
+ OS << ':';
+ printValue(Twine(LI->Line));
+ OS << ']';
+ restoreColor();
+ return true;
+}
+
bool MarkupFilter::tryData(const MarkupNode &Node) {
if (Node.Tag != "data")
return false;
@@ -239,21 +295,11 @@ bool MarkupFilter::tryData(const MarkupNode &Node) {
if (!Addr)
return true;
- const auto PrintRaw = [&]() {
- highlight();
- OS << "[[[data:";
- highlightValue();
- OS << "0x" << toHex(*Addr, /*LowerCase=*/true);
- highlight();
- OS << "]]]\n";
- restoreColor();
- };
-
const MMap *MMap = getContainingMMap(*Addr);
if (!MMap) {
WithColor::error() << "no mmap covers address\n";
reportLocation(Node.Fields[0].begin());
- PrintRaw();
+ printRawElement(Node);
return true;
}
@@ -261,7 +307,7 @@ bool MarkupFilter::tryData(const MarkupNode &Node) {
MMap->Mod->BuildID, {MMap->getModuleRelativeAddr(*Addr)});
if (!Symbol) {
WithColor::defaultErrorHandler(Symbol.takeError());
- PrintRaw();
+ printRawElement(Node);
return true;
}
@@ -343,6 +389,24 @@ void MarkupFilter::resetColor() {
OS.resetColor();
}
+void MarkupFilter::printRawElement(const MarkupNode &Element) {
+ highlight();
+ OS << "[[[";
+ printValue(Element.Tag);
+ for (StringRef Field : Element.Fields) {
+ OS << ':';
+ printValue(Field);
+ }
+ OS << "]]]";
+ restoreColor();
+}
+
+void MarkupFilter::printValue(Twine Value) {
+ highlightValue();
+ OS << Value;
+ highlight();
+}
+
// This macro helps reduce the amount of indirection done through Optional
// below, since the usual case upon returning a None Optional is to return None.
#define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR) \
@@ -476,6 +540,17 @@ Optional<std::string> MarkupFilter::parseMode(StringRef Str) const {
return Str.lower();
}
+Optional<MarkupFilter::PCType> MarkupFilter::parsePCType(StringRef Str) const {
+ Optional<MarkupFilter::PCType> Type =
+ StringSwitch<Optional<MarkupFilter::PCType>>(Str)
+ .Case("ra", MarkupFilter::PCType::ReturnAddress)
+ .Case("pc", MarkupFilter::PCType::PreciseCode)
+ .Default(None);
+ if (!Type)
+ reportTypeError(Str, "PC type");
+ return Type;
+}
+
bool MarkupFilter::checkTag(const MarkupNode &Node) const {
if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) {
WithColor::error(errs()) << "tags must be all lowercase characters\n";
@@ -508,6 +583,18 @@ bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element,
return true;
}
+bool MarkupFilter::checkNumFieldsAtMost(const MarkupNode &Element,
+ size_t Size) const {
+ if (Element.Fields.size() > Size) {
+ WithColor::error(errs())
+ << "expected at most " << Size << " field(s); found "
+ << Element.Fields.size() << "\n";
+ reportLocation(Element.Tag.end());
+ return false;
+ }
+ return true;
+}
+
void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const {
WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str
<< "'\n";
@@ -556,6 +643,14 @@ const MarkupFilter::MMap *MarkupFilter::getContainingMMap(uint64_t Addr) const {
return I->second.contains(Addr) ? &I->second : nullptr;
}
+uint64_t MarkupFilter::adjustAddr(uint64_t Addr, PCType Type) const {
+ // Decrementing return addresses by one moves them into the call instruction.
+ // The address doesn't have to be the start of the call instruction, just some
+ // byte on the inside. Subtracting one avoids needing detailed instruction
+ // length information here.
+ return Type == MarkupFilter::PCType::ReturnAddress ? Addr - 1 : Addr;
+}
+
StringRef MarkupFilter::lineEnding() const {
return Line.endswith("\r\n") ? "\r\n" : "\n";
}
diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-data.test b/llvm/test/DebugInfo/symbolize-filter-markup-data.test
index 3ce1baea4d6c..ed7066c7ca78 100644
--- a/llvm/test/DebugInfo/symbolize-filter-markup-data.test
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-data.test
@@ -12,7 +12,7 @@ RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines
CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x0-0x4](r),[0x10-0x11](r)[[END:\]{3}]]
CHECK: long long byte
CHECK: long byte
-CHECK: [[BEGIN]]data:0x05[[END]]
+CHECK: [[BEGIN]]data:0x5[[END]]
ERR: error: expected 1 field(s); found 0
ERR: error: no mmap covers address
diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-pc.test b/llvm/test/DebugInfo/symbolize-filter-markup-pc.test
new file mode 100644
index 000000000000..5d77a5c72411
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-pc.test
@@ -0,0 +1,188 @@
+REQUIRES: x86-registered-target
+RUN: split-file %s %t
+RUN: mkdir -p %t/.build-id/ab
+RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/asm.s \
+RUN: -o %t/.build-id/ab/cdef.debug
+RUN: llvm-symbolizer --debug-file-directory=%t --filter-markup < %t/input \
+RUN: > %t.output 2> %t.err
+RUN: FileCheck %s --input-file=%t.output --match-full-lines \
+RUN: --implicit-check-not {{.}}
+RUN: FileCheck %s --check-prefix=ERR --input-file=%t.err --match-full-lines
+
+CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abcdef [0x0-0xff](r)[[END:\]{3}]]
+CHECK: first[/dir/tmp.c:3]
+CHECK: first[/dir/tmp.c:5]
+CHECK: first[/dir/tmp.c:4]
+CHECK: first[/dir/tmp.c:5]
+CHECK: [[BEGIN]]pc:0xff[[END]]
+CHECK: [[BEGIN]]pc:0x100[[END]]
+
+ERR: error: expected at least 1 field(s); found 0
+ERR: error: no mmap covers address
+ERR: error: expected PC type; found ''
+ERR: error: expected at most 2 field(s); found 3
+
+;--- input
+{{{module:0:a.o:elf:abcdef}}}
+{{{mmap:0:256:load:0:r:0}}}
+{{{pc:0}}}
+{{{pc:0x9}}}
+{{{pc:0x9:ra}}}
+{{{pc:0x9:pc}}}
+{{{pc:0xff}}}
+
+{{{pc}}}
+{{{pc:0x100}}}
+{{{pc:0x9:}}}
+{{{pc:0x9:pc:}}}
+;--- asm.s
+ .text
+ .file "tmp.c"
+ .globl first # -- Begin function first
+ .p2align 4, 0x90
+ .type first,@function
+first: # @first
+.Lfunc_begin0:
+ .file 1 "/dir" "tmp.c"
+ .loc 1 3 0 # tmp.c:3:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+.Ltmp0:
+ .loc 1 4 3 prologue_end # tmp.c:4:3
+ callq second
+ .loc 1 5 1 # tmp.c:5:1
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+.Ltmp1:
+.Lfunc_end0:
+ .size first, .Lfunc_end0-first
+ .cfi_endproc
+ # -- End function
+ .globl second # -- Begin function second
+ .p2align 4, 0x90
+ .type second,@function
+second: # @second
+.Lfunc_begin1:
+ .loc 1 7 0 # tmp.c:7:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+.Ltmp2:
+ .loc 1 8 3 prologue_end # tmp.c:8:3
+ callq first
+ .loc 1 9 1 # tmp.c:9:1
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+.Ltmp3:
+.Lfunc_end1:
+ .size second, .Lfunc_end1-second
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 14 # DW_FORM_strp
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 39 # DW_AT_prototyped
+ .byte 25 # DW_FORM_flag_present
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit
+ .long .Linfo_string0 # DW_AT_producer
+ .short 12 # DW_AT_language
+ .long .Linfo_string1 # DW_AT_name
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Linfo_string2 # DW_AT_comp_dir
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 2 # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .long .Linfo_string3 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 3 # DW_AT_decl_line
+ # DW_AT_prototyped
+ # DW_AT_external
+ .byte 2 # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram
+ .quad .Lfunc_begin1 # DW_AT_low_pc
+ .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .long .Linfo_string4 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 7 # DW_AT_decl_line
+ # DW_AT_prototyped
+ # DW_AT_external
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str,"MS",@progbits,1
+.Linfo_string0:
+ .asciz "clang" # string offset=0
+.Linfo_string1:
+ .asciz "tmp.c" # string offset=30
+.Linfo_string2:
+ .asciz "/dir" # string offset=36
+.Linfo_string3:
+ .asciz "first" # string offset=85
+.Linfo_string4:
+ .asciz "second" # string offset=91
+ .ident "clang"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .addrsig_sym first
+ .addrsig_sym second
+ .section .debug_line,"",@progbits
+.Lline_table_start0: