Ticket #4368 (closed bug: fixed)

Opened 3 years ago

Last modified 3 years ago

T4144(dyn) failing on x86/Linux

Reported by: simonmar Owned by: benl
Priority: highest Milestone: 7.0.1
Component: Compiler (NCG) Version: 6.13
Keywords: Cc:
Operating System: Linux Architecture: x86
Type of failure: Runtime crash Difficulty:
Test Case: Blocked By:
Blocking: Related Tickets:

Description

=====> T4144(dyn) 985 of 2596 [0, 13, 0]
cd ./lib/IO && '/playpen/simonmar/nightly/HEAD/i386-unknown-linux/inplace/bin/ghc-stage2' -fforce-recomp -dcore-lint -dcmm-lint -dno-debug-output -no-user-package-conf -rtsopts  -o T4144 T4144.hs -O -dynamic   >T4144.comp.stderr 2>&1
cd ./lib/IO && ./T4144    </dev/null >T4144.run.stdout 2>T4144.run.stderr
Wrong exit code (expected 0 , actual 139 )
Stdout:

Stderr:
Segmentation fault

*** unexpected failure for T4144(dyn)

Change History

Changed 3 years ago by simonmar

  • owner set to simonmar

Changed 3 years ago by simonmar

  • owner changed from simonmar to benl
  • component changed from Compiler to Compiler (NCG)

This is a bug in the -fregs-graph register allocator, which is now on by default with -O2.

This cmm is from the code generated for libraries/base/GHC/IO/Handle/Text.hs when compiled with -dynamic:

==================== Optimised Cmm ====================
s4xo_ret()
        { [const GHC.IO.Handle.Text.$wa1_srt-s4xo_info+28;, const 8388627;,
       const 65568;]
        }
    c6mw:
        if (I32[R1 + 3] != 0) goto c6my;
        R1 = PicBaseReg + lvl5_r3Dc_closure@gotoff + 1;
        Sp = Sp + 80;
        jump (I32[Sp + 0]) ();
    c6my:
        I32[I32[Sp + 64] + 4] = I32[Sp + 72];
        _c6mu::I32 = BaseReg;
        _c6mt::I32 = I32[Sp + 64];
        foreign "ccall"
          I32[PicBaseReg + dirty_MUT_VAR@got]((_c6mu::I32, PtrHint),
                                              (_c6mt::I32, PtrHint))[_unsafe_call_];
        R1 = I32[Sp + 68];
        I32[Sp + 0] = PicBaseReg + s4xq_info@gotoff;
        if (R1 & 3 != 0) goto c6mB;
        jump I32[R1] ();
    c6mB: jump s4xq_info ();
}

The native code is correct:

==================== Native code ====================
.text
	.align 4,0x90
	.long	base_GHCziIOziHandleziText_zdwa1_srt-(s4xo_info)+28
	.long	8388627
	.long	65568
s4xo_info:
.Lc6mw:
	call 1f
1:	popl	%vI_n6mL
	addl	$_GLOBAL_OFFSET_TABLE_+(.-1b), %vI_n6mL
	cmpl $0,3(%esi)
	jne .Lc6my
	movl %vI_n6mL,%vI_n6mM
	addl $r3Dc_closure@gotoff,%vI_n6mM
	leal 1(%vI_n6mM),%esi
	addl $80,%ebp
	jmp *0(%ebp)
.Lc6my:
	movl 72(%ebp),%vI_n6mN
	movl 64(%ebp),%vI_n6mO
	movl %vI_n6mN,4(%vI_n6mO)
	movl %ebx,%vI_c6mu
	movl 64(%ebp),%vI_c6mt
	pushl %vI_c6mt
	pushl %vI_c6mu
	movl dirty_MUT_VAR@got(%vI_n6mL),%vI_n6mP
	call *%vI_n6mP
	addl $8,%esp
	movl 68(%ebp),%esi
	movl %vI_n6mL,%vI_n6mQ
	addl $s4xq_info@gotoff,%vI_n6mQ
	movl %vI_n6mQ,0(%ebp)
	testl $3,%esi
	jne .Lc6mB
	jmp *(%esi)
.Lc6mB:
	jmp s4xq_info

The liveness analysis is correct:

==================== Liveness annotations added ====================
s4xo_ret()
        { const GHC.IO.Handle.Text.$wa1_srt-s4xo_info+28;
          const 8388627;
          const 65568;
      # firstId     = Just c6mw
      # liveOnEntry = Just [(c6mw, []), (c6my, [(n6mL, %vI_n6mL)]),
                            (c6mB, [])]
        }
    [NONREC
        c6mw:
            	call 1f
            1:	popl	%vI_n6mL
            	addl	$_GLOBAL_OFFSET_TABLE_+(.-1b), %vI_n6mL
                    # born:    %vI_n6mL
                     
            	cmpl $0,3(%esi)
                     
            	jne .Lc6my
                     
            	movl %vI_n6mL,%vI_n6mM
                    # born:    %vI_n6mM
                    # r_dying: %vI_n6mL
                     
            	addl $r3Dc_closure@gotoff,%vI_n6mM
                     
            	leal 1(%vI_n6mM),%esi
                    # r_dying: %vI_n6mM
                     
            	addl $80,%ebp
                     
            	jmp *0(%ebp)
                     ,
     NONREC
        c6my:
            	movl 72(%ebp),%vI_n6mN
                    # born:    %vI_n6mN
                     
            	movl 64(%ebp),%vI_n6mO
                    # born:    %vI_n6mO
                     
            	movl %vI_n6mN,4(%vI_n6mO)
                    # r_dying: %vI_n6mN %vI_n6mO
                     
            	movl %ebx,%vI_c6mu
                    # born:    %vI_c6mu
                     
            	movl 64(%ebp),%vI_c6mt
                    # born:    %vI_c6mt
                     
            	pushl %vI_c6mt
                    # r_dying: %vI_c6mt
                     
            	pushl %vI_c6mu
                    # r_dying: %vI_c6mu
                     
            	movl dirty_MUT_VAR@got(%vI_n6mL),%vI_n6mP
                    # born:    %vI_n6mP
                     
            	call *%vI_n6mP
                    # born:    %r0 %r2 %r3 %r16 %r17 %r18 %r19 %r20 %r21 %r24 %r25 %r26 %r27 %r28 %r29 %r30 %r31
                    # r_dying: %vI_n6mP
                    # w_dying: %r0 %r2 %r3 %r16 %r17 %r18 %r19 %r20 %r21 %r24 %r25 %r26 %r27 %r28 %r29 %r30 %r31
                     
            	addl $8,%esp
                     
            	movl 68(%ebp),%esi
                     
            	movl %vI_n6mL,%vI_n6mQ
                    # born:    %vI_n6mQ
                    # r_dying: %vI_n6mL
                     
            	addl $s4xq_info@gotoff,%vI_n6mQ
                     
            	movl %vI_n6mQ,0(%ebp)
                    # r_dying: %vI_n6mQ
                     
            	testl $3,%esi
                     
            	jne .Lc6mB
                     
            	jmp *(%esi)
                     ,
     NONREC
        c6mB:
            	jmp s4xq_info
                     ]
}

But the final register-allocated code is wrong:

==================== Registers allocated ====================
.text
	.align 4,0x90
	.long	base_GHCziIOziHandleziText_zdwa1_srt-(s4xo_info)+28
	.long	8388627
	.long	65568
s4xo_info:
.Lc6mw:
	call 1f
1:	popl	%eax
	addl	$_GLOBAL_OFFSET_TABLE_+(.-1b), %eax
	cmpl $0,3(%esi)
	jne .Lc6my
	addl $r3Dc_closure@gotoff,%eax
	movl %eax,64(%esp)  # ** spill $eax here
	leal 1(%eax),%esi
	addl $80,%ebp
	jmp *0(%ebp)
.Lc6my:
	movl 72(%ebp),%eax  # ** we just clobbered $eax!!
	movl 64(%ebp),%ecx
	movl %eax,4(%ecx)
	movl %ebx,%ecx
	movl 64(%ebp),%eax
	pushl %eax
	pushl %ecx
	movl 72(%esp),%eax   # ** failed attempt to reload $eax (it wasn't spilled)
	movl dirty_MUT_VAR@got(%eax),%eax
	call *%eax
	addl $8,%esp
	movl 68(%ebp),%esi
	movl 64(%esp),%eax
	addl $s4xq_info@gotoff,%eax
	movl %eax,64(%esp)
	movl %eax,0(%ebp)
	testl $3,%esi
	jne .Lc6mB
	jmp *(%esi)
.Lc6mB:
	jmp s4xq_info

Note the lines with comments. It's as if the register allocator got the dependency graph wrong, because $eax is spilled in a basic block that does not occur before the one in which it is reloaded.

Ben, can you shed any light?

Changed 3 years ago by benl

Yes, the spill cleaner has messed up because it's not tracking slot usage across jumps properly. Here is the code before cleaning:

c5Xy:
                call 1f
             1: popl    %eax
                addl    $_GLOBAL_OFFSET_TABLE_+(.-1b), %eax
                     # born:    %r0

                SPILL %r0,SLOT(0)
                     # r_dying: %r0

                cmpl $0,3(%esi)

                jne .Lc5XA

                RELOAD SLOT(0),%r0
                     # born:    %r0

                addl $r3k3_closure@gotoff,%eax

                SPILL %r0,SLOT(0)
                     # r_dying: %r0

                RELOAD SLOT(0),%r0
                     # born:    %r0

                leal 1(%eax),%esi
                     # r_dying: %r0

                addl $80,%ebp

                jmp *0(%ebp)

         c5XA:
....
                RELOAD SLOT(0),%r0
                     # born:    %r0

                movl dirty_MUT_VAR@got(%eax),%eax
                     # born:    %r0
                     # r_dying: %r0


In this code %r0 is really %eax.

The cleaner has erased the first RELOAD instruction because %eax isn't written to between it's initialisation (by popl) and that RELOAD. The cleaner has then erased the first SPILL instruction because it doesn't know that block c5XA also reloads from it. The fact that other programs work with this bug in place shows how regular GHC generated code is.

It should be easy to fix by tracking what slots are live across jumps as well as what vregs are live. I'm working on it..

Changed 3 years ago by benl

  • status changed from new to merge

Fixed by:

Tue Oct 12 18:54:14 PDT 2010  benl@ouroborus.net
 * RegAlloc: Track slot liveness over jumps in spill cleaner

Note that the spill cleaner should really be doing a proper data flow analysis, which would propagate information about what slots are being used across block boundaries. At the moment we're just doing a single "iteration" where we should really be iterating to a fixed point where no more spill/reload instructions can be erased. It still works, but will leave more spill/reloads behind than strictly needed. I'm not sure what the performance penalty is wrt a perfect clean, and my DPH benchmark programs are all currently broken... probably no worse than before though.

Changed 3 years ago by simonmar

Thanks Ben!

Changed 3 years ago by igloo

  • status changed from merge to closed
  • resolution set to fixed

Merged.

Note: See TracTickets for help on using tickets.