This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] stdlib-bsearch: middle element calculation may overflow


On (03/28/17 11:30), Zack Weinberg wrote:
> On Thu, Mar 16, 2017 at 11:32 PM, Sergey Senozhatsky
> <sergey.senozhatsky.work@gmail.com> wrote:
> > so I modified the test I posted earlier. moved array to .bss, made array
> > size reasonable (so there is no overflow). each ./a.out executes bsearch()
> > 10000000 times.
> 
> Thanks for this.  It might also be useful to show the codegen
> difference, which (on x86-64) is
> 
> -bsearch_stock:
> +bsearch_patched:
>  .LFB10:
>      .cfi_startproc
>      pushq    %r15
> @@ -43,9 +43,11 @@
>      cmpq    %r13, %r14
>      jnb    .L6
>  .L9:
> -    leaq    (%r14,%r13), %rbx
> +    movq    %r13, %rbx
>      movq    (%rsp), %rdi
> +    subq    %r14, %rbx
>      shrq    %rbx
> +    addq    %r14, %rbx
>      movq    %rbx, %r15
>      imulq    %r12, %r15
>      addq    8(%rsp), %r15
> 
> This probably hurts by extending the critical dependency chain, as
> well as by issuing more instructions.  I would like to know what the
> codegen difference looks like on more RISCy architectures (maybe MIPS
> and AArch64?) -- I have to wonder whether a CPU that didn't have
> complex LEA would wind up generating essentially the same code either
> way.

sorry fot the delay.

ARM

arm-none-eabi-gcc (Arch Repository) 6.3.0

-O2

	UNPATCHED										PATCHED

    8368:	e92d4ff8 	push	{r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}     	    83e4:	e92d4ff8 	push	{r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}
    836c:	e2526000 	subs	r6, r2, #0                                   	    83e8:	e2526000 	subs	r6, r2, #0
    8370:	e59da028 	ldr	sl, [sp, #40]	; 0x28                         	    83ec:	e59da028 	ldr	sl, [sp, #40]	; 0x28
    8374:	11a07000 	movne	r7, r0                                      	    83f0:	11a07000 	movne	r7, r0
    8378:	11a08001 	movne	r8, r1                                      	    83f4:	11a08001 	movne	r8, r1
    837c:	11a09003 	movne	r9, r3                                      	    83f8:	11a09003 	movne	r9, r3
    8380:	13a05000 	movne	r5, #0                                      	    83fc:	13a05000 	movne	r5, #0
    8384:	1a000004 	bne	839c <____bsearch+0x34>                       	    8400:	1a000004 	bne	8418 <__bsearch+0x34>
    8388:	ea00000f 	b	83cc <____bsearch+0x64>                        	    8404:	ea00000f 	b	8448 <__bsearch+0x64>
    838c:	0a000011 	beq	83d8 <____bsearch+0x70>                       	    8408:	0a000011 	beq	8454 <__bsearch+0x70>
    8390:	e2845001 	add	r5, r4, #1                                    	    840c:	e2845001 	add	r5, r4, #1
    8394:	e1550006 	cmp	r5, r6                                        	    8410:	e1550006 	cmp	r5, r6
    8398:	2a00000b 	bcs	83cc <____bsearch+0x64>                       	    8414:	2a00000b 	bcs	8448 <__bsearch+0x64>
    839c:	e0854006 	add	r4, r5, r6                                    	    8418:	e0464005 	sub	r4, r6, r5
    83a0:	e1a040a4 	lsr	r4, r4, #1                                    	    841c:	e08540a4 	add	r4, r5, r4, lsr #1
    83a4:	e02b8499 	mla	fp, r9, r4, r8                                	    8420:	e02b8499 	mla	fp, r9, r4, r8
    83a8:	e1a00007 	mov	r0, r7                                        	    8424:	e1a00007 	mov	r0, r7
    83ac:	e1a0100b 	mov	r1, fp                                        	    8428:	e1a0100b 	mov	r1, fp
    83b0:	e1a0e00f 	mov	lr, pc                                        	    842c:	e1a0e00f 	mov	lr, pc
    83b4:	e12fff1a 	bx	sl                                             	    8430:	e12fff1a 	bx	sl
    83b8:	e3500000 	cmp	r0, #0                                        	    8434:	e3500000 	cmp	r0, #0
    83bc:	aafffff2 	bge	838c <____bsearch+0x24>                       	    8438:	aafffff2 	bge	8408 <__bsearch+0x24>
    83c0:	e1a06004 	mov	r6, r4                                        	    843c:	e1a06004 	mov	r6, r4
    83c4:	e1550006 	cmp	r5, r6                                        	    8440:	e1550006 	cmp	r5, r6
    83c8:	3afffff3 	bcc	839c <____bsearch+0x34>                       	    8444:	3afffff3 	bcc	8418 <__bsearch+0x34>
    83cc:	e3a00000 	mov	r0, #0                                        	    8448:	e3a00000 	mov	r0, #0
    83d0:	e8bd4ff8 	pop	{r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}      	    844c:	e8bd4ff8 	pop	{r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}
    83d4:	e12fff1e 	bx	lr                                             	    8450:	e12fff1e 	bx	lr
    83d8:	e1a0000b 	mov	r0, fp                                        	    8454:	e1a0000b 	mov	r0, fp
    83dc:	e8bd4ff8 	pop	{r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}      	    8458:	e8bd4ff8 	pop	{r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}
    83e0:	e12fff1e 	bx	lr                                             	    845c:	e12fff1e 	bx	lr

	-ss


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]