llvm: Fix line number information to prevent spurious breakpoint hits #20543

tau-dev · 2024-07-08T21:29:08Z

Fixes #19443.
Reset the line number to its previous state after genBodyDebugScope, so that instructions between its end and the next dbg_stmt do not leak inside the block. This core fix is very simple, but surfaces a secondary issue: the LLVM codegen generates blocks in a different order than Clang would for the same CFG in C, which ultimately results in GDB skipping the loop header altogether (see comments for some more details). So we reorder some basic blocks too.

andrewrk · 2024-07-21T08:38:13Z

Is patching Builder really the correct solution here? I think the usage code, if anything, should be updated. Also I'm not convinced the order of basic blocks is the problem.

tau-dev · 2024-07-21T16:23:20Z

Also I'm not convinced the order of basic blocks is the problem.

If you move the placeBlock so that we get the same order of blocks again as we have in status quo:

--- /tmp/llvm.zig       2024-07-21 18:02:01.645028639 +0200
+++ src/codegen/llvm.zig        2024-07-21 18:01:12.664540800 +0200
@@ -5962,10 +5962,10 @@
             .breaks = &breaks,
         });
         defer assert(self.blocks.remove(inst));
+        try self.wip.placeBlock(parent_bb);

         try self.genBodyDebugScope(maybe_inline_func, body);

-        try self.wip.placeBlock(parent_bb);
         self.wip.cursor = .{ .block = parent_bb };

         // Create a phi node only if the block returns a value.

an then, for example, try stepping through a for loop—this is my test code:

export fn foo() u8 {
    const s = [3]u32{4,5,6};

    var x: u8 = 1;
    for (s) |c| {
        if (c == 5)
            x = 2;
        x = 3;
    }
    return x;
}

—then you can see that GDB doesn't stop on the for (s) |c| { line anymore after the first iteration. You can see the reason for this if you compare the LLVM and objdump of this function

with correct block order

define dso_local zeroext i8 @foo() #0 !dbg !6 {
Entry:
  %0 = alloca i32, align 4
  %1 = alloca [8 x i8], align 8
  %2 = alloca [1 x i8], align 1
  call void @llvm.dbg.declare(metadata ptr @0, metadata !8, metadata !DIExpression()), !dbg !7
  store i8 1, ptr %2, align 1, !dbg !9
  call void @llvm.dbg.declare(metadata ptr %2, metadata !10, metadata !DIExpression()), !dbg !9
  store i64 0, ptr %1, align 8, !dbg !9
  br label %Loop, !dbg !11

Loop:
  %3 = load i64, ptr %1, align 8, !dbg !12
  %4 = icmp ult i64 %3, 3, !dbg !13
  br i1 %4, label %Then, label %Else, !dbg !13

Then:
  %5 = getelementptr inbounds [3 x i32], ptr @0, i64 0, i64 %3, !dbg !14
  %6 = load i32, ptr %5, !dbg !14
  store i32 %6, ptr %0, align 4, !dbg !14
  call void @llvm.dbg.declare(metadata ptr %0, metadata !15, metadata !DIExpression()), !dbg !14
  %7 = icmp eq i32 %6, 5, !dbg !16
  br i1 %7, label %Then1, label %Else1, !dbg !16

Else:
  br label %Block2, !dbg !14

Then1:
  store i8 2, ptr %2, align 1, !dbg !17
  br label %Block, !dbg !17

Else1:
  br label %Block, !dbg !18

Block:
  store i8 3, ptr %2, align 1, !dbg !19
  br label %Block1, !dbg !19

Block1:
  %8 = add nuw i64 %3, 1, !dbg !12
  store i64 %8, ptr %1, align 8, !dbg !12
  br label %Loop, !dbg !11

Block2:
  %9 = load i8, ptr %2, align 1, !dbg !20
  ret i8 %9, !dbg !20
}

export fn foo() u8 {
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   48 83 ec 20             sub    $0x20,%rsp
    const s = [3]u32{4,5,6};

    var x: u8 = 1;
   8:   c6 45 ef 01             movb   $0x1,-0x11(%rbp)
   c:   48 c7 45 f0 00 00 00    movq   $0x0,-0x10(%rbp)
  13:   00 
    for (s) |c| {
  14:   48 8b 45 f0             mov    -0x10(%rbp),%rax
  18:   48 89 45 e0             mov    %rax,-0x20(%rbp)
  1c:   48 83 f8 03             cmp    $0x3,%rax
  20:   73 18                   jae    3a <foo+0x3a>
  22:   48 8b 4d e0             mov    -0x20(%rbp),%rcx
  26:   48 8d 05 00 00 00 00    lea    0x0(%rip),%rax        # 2d <foo+0x2d>
  2d:   8b 04 88                mov    (%rax,%rcx,4),%eax
  30:   89 45 fc                mov    %eax,-0x4(%rbp)
        if (c == 5)
  33:   83 f8 05                cmp    $0x5,%eax
  36:   74 04                   je     3c <foo+0x3c>
  38:   eb 08                   jmp    42 <foo+0x42>
    for (s) |c| {
  3a:   eb 1a                   jmp    56 <foo+0x56>
            x = 2;
  3c:   c6 45 ef 02             movb   $0x2,-0x11(%rbp)
  40:   eb 02                   jmp    44 <foo+0x44>
        if (c == 5)
  42:   eb 00                   jmp    44 <foo+0x44>
        x = 3;
  44:   c6 45 ef 03             movb   $0x3,-0x11(%rbp)
  48:   48 8b 45 e0             mov    -0x20(%rbp),%rax
    for (s) |c| {
  4c:   48 83 c0 01             add    $0x1,%rax
  50:   48 89 45 f0             mov    %rax,-0x10(%rbp)
  54:   eb be                   jmp    14 <foo+0x14>
    }
    return x;
  56:   0f b6 45 ef             movzbl -0x11(%rbp),%eax
  5a:   48 83 c4 20             add    $0x20,%rsp
  5e:   5d                      pop    %rbp
  5f:   c3                      ret

and without

define dso_local zeroext i8 @foo() #0 !dbg !6 {
Entry:
  %0 = alloca i32, align 4
  %1 = alloca [8 x i8], align 8
  %2 = alloca [1 x i8], align 1
  call void @llvm.dbg.declare(metadata ptr @0, metadata !8, metadata !DIExpression()), !dbg !7
  store i8 1, ptr %2, align 1, !dbg !9
  call void @llvm.dbg.declare(metadata ptr %2, metadata !10, metadata !DIExpression()), !dbg !9
  store i64 0, ptr %1, align 8, !dbg !9
  br label %Loop, !dbg !11

Block:
  %3 = load i8, ptr %2, align 1, !dbg !12
  ret i8 %3, !dbg !12

Loop:
  %4 = load i64, ptr %1, align 8, !dbg !13
  %5 = icmp ult i64 %4, 3, !dbg !14
  br i1 %5, label %Then, label %Else, !dbg !14

Block1:
  %6 = add nuw i64 %4, 1, !dbg !13
  store i64 %6, ptr %1, align 8, !dbg !13
  br label %Loop, !dbg !11

Then:
  %7 = getelementptr inbounds [3 x i32], ptr @0, i64 0, i64 %4, !dbg !15
  %8 = load i32, ptr %7, !dbg !15
  store i32 %8, ptr %0, align 4, !dbg !15
  call void @llvm.dbg.declare(metadata ptr %0, metadata !16, metadata !DIExpression()), !dbg !15
  %9 = icmp eq i32 %8, 5, !dbg !17
  br i1 %9, label %Then1, label %Else1, !dbg !17

Else:
  br label %Block, !dbg !15

Block2:
  store i8 3, ptr %2, align 1, !dbg !18
  br label %Block1, !dbg !18

Then1:
  store i8 2, ptr %2, align 1, !dbg !19
  br label %Block2, !dbg !19

Else1:
  br label %Block2, !dbg !20
}

export fn foo() u8 {
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   48 83 ec 20             sub    $0x20,%rsp
    const s = [3]u32{4,5,6};

    var x: u8 = 1;
   8:   c6 45 ef 01             movb   $0x1,-0x11(%rbp)
   c:   48 c7 45 f0 00 00 00    movq   $0x0,-0x10(%rbp)
  13:   00 
    for (s) |c| {
  14:   eb 0a                   jmp    20 <foo+0x20>
        if (c == 5)
            x = 2;
        x = 3;
    }
    return x;
  16:   0f b6 45 ef             movzbl -0x11(%rbp),%eax
  1a:   48 83 c4 20             add    $0x20,%rsp
  1e:   5d                      pop    %rbp
  1f:   c3                      ret
    for (s) |c| {
  20:   48 8b 45 f0             mov    -0x10(%rbp),%rax
  24:   48 89 45 e0             mov    %rax,-0x20(%rbp)
  28:   48 83 f8 03             cmp    $0x3,%rax
  2c:   72 10                   jb     3e <foo+0x3e>
  2e:   eb 26                   jmp    56 <foo+0x56>
  30:   48 8b 45 e0             mov    -0x20(%rbp),%rax
  34:   48 83 c0 01             add    $0x1,%rax
  38:   48 89 45 f0             mov    %rax,-0x10(%rbp)
  3c:   eb e2                   jmp    20 <foo+0x20>
  3e:   48 8b 4d e0             mov    -0x20(%rbp),%rcx
  42:   48 8d 05 00 00 00 00    lea    0x0(%rip),%rax        # 49 <foo+0x49>
  49:   8b 04 88                mov    (%rax,%rcx,4),%eax
  4c:   89 45 fc                mov    %eax,-0x4(%rbp)
        if (c == 5)
  4f:   83 f8 05                cmp    $0x5,%eax
  52:   74 0a                   je     5e <foo+0x5e>
  54:   eb 0e                   jmp    64 <foo+0x64>
    for (s) |c| {
  56:   eb be                   jmp    16 <foo+0x16>
        x = 3;
  58:   c6 45 ef 03             movb   $0x3,-0x11(%rbp)
  5c:   eb d2                   jmp    30 <foo+0x30>
            x = 2;
  5e:   c6 45 ef 02             movb   $0x2,-0x11(%rbp)
  62:   eb f4                   jmp    58 <foo+0x58>
        if (c == 5)
  64:   eb f2                   jmp    58 <foo+0x58>

to GDB's stepping logic, which only stops on the first instruction of a new line. Took me some head-scratching to figure that one out : )

How could I influence the block ordering without touching the Builder?

andrewrk · 2024-07-21T20:05:45Z

I think it would be informative to look at it in terms of the LLVM IR that is being generated.

tau-dev · 2024-07-21T20:27:25Z

The LLVM is the same, differing only in the order of the blocks. Hang on a sec, I can show you too...

tau-dev · 2024-07-21T21:06:31Z

Updated the details tags in my previous comment.

Reset the line number to its previous state after genBodyDebugScope, so that instructions between its end and the next dbg_stmt do not wander inside the block. This core fix is very simple, but surfaces a secondary issue: the LLVM codegen generates blocks in a different order than Clang would for the same CFG in C, which ultimately results in GDB skipping the loop header altogether. So we need to reorder some basic blocks too.

tau-dev force-pushed the fix-llvm-loop-lineno branch 2 times, most recently from 3ed994c to 28f87ae Compare July 8, 2024 23:34

tau-dev changed the title ~~Fix #19443~~ llvm: Fix line number information to prevent spurious breakpoint hits Jul 9, 2024

tau-dev force-pushed the fix-llvm-loop-lineno branch from 28f87ae to 0ec103b Compare July 21, 2024 17:05

tau-dev force-pushed the fix-llvm-loop-lineno branch from 0ec103b to af84823 Compare July 21, 2024 22:35

tau-dev force-pushed the fix-llvm-loop-lineno branch from af84823 to ca8b0a5 Compare September 4, 2024 12:01

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

llvm: Fix line number information to prevent spurious breakpoint hits #20543

llvm: Fix line number information to prevent spurious breakpoint hits #20543

tau-dev commented Jul 8, 2024 •

edited

Loading

andrewrk commented Jul 21, 2024 •

edited

Loading

tau-dev commented Jul 21, 2024 •

edited

Loading

andrewrk commented Jul 21, 2024

tau-dev commented Jul 21, 2024 •

edited

Loading

tau-dev commented Jul 21, 2024

llvm: Fix line number information to prevent spurious breakpoint hits #20543

Are you sure you want to change the base?

llvm: Fix line number information to prevent spurious breakpoint hits #20543

Conversation

tau-dev commented Jul 8, 2024 • edited Loading

andrewrk commented Jul 21, 2024 • edited Loading

tau-dev commented Jul 21, 2024 • edited Loading

andrewrk commented Jul 21, 2024

tau-dev commented Jul 21, 2024 • edited Loading

tau-dev commented Jul 21, 2024

tau-dev commented Jul 8, 2024 •

edited

Loading

andrewrk commented Jul 21, 2024 •

edited

Loading

tau-dev commented Jul 21, 2024 •

edited

Loading

tau-dev commented Jul 21, 2024 •

edited

Loading