This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.12-143-g0959ffc


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  0959ffc97b738c489087bcf45578c1580a87e66d (commit)
      from  ece298407076558531796450af39199aa0b34bef (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0959ffc97b738c489087bcf45578c1580a87e66d

commit 0959ffc97b738c489087bcf45578c1580a87e66d
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Thu Sep 2 23:36:25 2010 -0700

    Update x86-64 mpn routines from GMP 5.0.1.

diff --git a/ChangeLog b/ChangeLog
index fd4b775..fe512db 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,12 +1,22 @@
+2010-09-02  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/x86_64/add_n.S: Update from GMP 5.0.1.
+	* sysdeps/x86_64/addmul_1.S: Likewise.
+	* sysdeps/x86_64/lshift.S: Likewise.
+	* sysdeps/x86_64/mul_1.S: Likewise.
+	* sysdeps/x86_64/rshift.S: Likewise.
+	* sysdeps/x86_64/sub_n.S: Likewise.
+	* sysdeps/x86_64/submul_1.S: Likewise.
+
 2010-09-01  Samuel Thibault  <samuel.thibault@ens-lyon.org>
 
-        This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
-        Define __sched_param instead of SCHED_* and sched_param when
+	This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
+	Define __sched_param instead of SCHED_* and sched_param when
 	<bits/sched.h> is included with __need_schedparam defined.
-        * bits/sched.h [__need_schedparam]
+	* bits/sched.h [__need_schedparam]
 	(SCHED_OTHER, SCHED_FIFO, SCHED_RR, sched_param): Do not define.
-        [!__defined_schedparam && (__need_schedparam || _SCHED_H)]
-        (__defined_schedparam): Define to 1.
+	[!__defined_schedparam && (__need_schedparam || _SCHED_H)]
+	(__defined_schedparam): Define to 1.
 	(__sched_param): New structure, identical to sched_param.
 	(__need_schedparam): Undefine.
 
diff --git a/sysdeps/x86_64/add_n.S b/sysdeps/x86_64/add_n.S
index 7883f6c..f0b4c3f 100644
--- a/sysdeps/x86_64/add_n.S
+++ b/sysdeps/x86_64/add_n.S
@@ -1,6 +1,6 @@
-/* Add two limb vectors of the same length > 0 and store sum in a third
-   limb vector.
-   Copyright (C) 2004 Free Software Foundation, Inc.
+/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+   sum in a third limb vector.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
@@ -21,22 +21,81 @@
 #include "sysdep.h"
 #include "asm-syntax.h"
 
+#define rp	%rdi
+#define up	%rsi
+#define vp	%rdx
+#define n	%rcx
+#define cy	%r8
+
+#ifndef func
+# define func __mpn_add_n
+# define ADCSBB adc
+#endif
+
 	.text
-ENTRY (__mpn_add_n)
-	leaq	(%rsi,%rcx,8), %rsi
-	leaq	(%rdi,%rcx,8), %rdi
-	leaq	(%rdx,%rcx,8), %rdx
-	negq	%rcx
-	xorl	%eax, %eax			# clear cy
-	.p2align 2
-L(loop):
-	movq	(%rsi,%rcx,8), %rax
-	movq	(%rdx,%rcx,8), %r10
-	adcq	%r10, %rax
-	movq	%rax, (%rdi,%rcx,8)
-	incq	%rcx
-	jne	L(loop)
-	movq	%rcx, %rax			# zero %rax
-	adcq	%rax, %rax
+ENTRY (func)
+	xor	%r8, %r8
+	mov	(up), %r10
+	mov	(vp), %r11
+
+	lea	-8(up,n,8), up
+	lea	-8(vp,n,8), vp
+	lea	-16(rp,n,8), rp
+	mov	%ecx, %eax
+	neg	n
+	and	$3, %eax
+	je	L(b00)
+	add	%rax, n		/* clear low rcx bits for jrcxz */
+	cmp	$2, %eax
+	jl	L(b01)
+	je	L(b10)
+
+L(b11):	shr	%r8		/* set cy */
+	jmp	L(e11)
+
+L(b00):	shr	%r8		/* set cy */
+	mov	%r10, %r8
+	mov	%r11, %r9
+	lea	4(n), n
+	jmp	L(e00)
+
+L(b01):	shr	%r8		/* set cy */
+	jmp	L(e01)
+
+L(b10):	shr	%r8		/* set cy */
+	mov	%r10, %r8
+	mov	%r11, %r9
+	jmp	L(e10)
+
+L(end):	ADCSBB	%r11, %r10
+	mov	%r10, 8(rp)
+	mov	%ecx, %eax	/* clear eax, ecx contains 0 */
+	adc	%eax, %eax
 	ret
-END (__mpn_add_n)
+
+	.p2align 4
+L(top):
+	mov	-24(up,n,8), %r8
+	mov	-24(vp,n,8), %r9
+	ADCSBB	%r11, %r10
+	mov	%r10, -24(rp,n,8)
+L(e00):
+	mov	-16(up,n,8), %r10
+	mov	-16(vp,n,8), %r11
+	ADCSBB	%r9, %r8
+	mov	%r8, -16(rp,n,8)
+L(e11):
+	mov	-8(up,n,8), %r8
+	mov	-8(vp,n,8), %r9
+	ADCSBB	%r11, %r10
+	mov	%r10, -8(rp,n,8)
+L(e10):
+	mov	(up,n,8), %r10
+	mov	(vp,n,8), %r11
+	ADCSBB	%r9, %r8
+	mov	%r8, (rp,n,8)
+L(e01):
+	jrcxz	L(end)
+	lea	4(n), n
+	jmp	L(top)
+END (func)
diff --git a/sysdeps/x86_64/addmul_1.S b/sysdeps/x86_64/addmul_1.S
index bdb5226..e997896 100644
--- a/sysdeps/x86_64/addmul_1.S
+++ b/sysdeps/x86_64/addmul_1.S
@@ -1,6 +1,6 @@
-/* AMD64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
    the result to a second limb vector.
-   Copyright (C) 2004 Free Software Foundation, Inc.
+   Copyright (C) 2003,2004,2005,2007,2008,2009 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
@@ -21,26 +21,95 @@
 #include "sysdep.h"
 #include "asm-syntax.h"
 
+#define rp	%rdi
+#define up	%rsi
+#define n	%rdx
+#define v0	%rcx
+
+#ifndef func
+# define func __mpn_addmul_1
+# define ADDSUB add
+#endif
+
 	.text
-ENTRY (__mpn_addmul_1)
-	movq	%rdx, %r11
-	leaq	(%rsi,%rdx,8), %rsi
-	leaq	(%rdi,%rdx,8), %rdi
-	negq	%r11
-	xorl	%r8d, %r8d
-	xorl	%r10d, %r10d
-	.p2align 2
-L(loop):
-	movq	(%rsi,%r11,8), %rax
-	mulq	%rcx
-	addq	(%rdi,%r11,8), %rax
-	adcq	%r10, %rdx
-	addq	%r8, %rax
-	movq	%r10, %r8
-	movq	%rax, (%rdi,%r11,8)
-	adcq	%rdx, %r8
-	incq	%r11
-	jne	L(loop)
-	movq	%r8, %rax
+ENTRY (func)
+	push	%rbx
+	push	%rbp
+	lea	(%rdx), %rbx
+	neg	%rbx
+
+	mov	(up), %rax
+	mov	(rp), %r10
+
+	lea	-16(rp,%rdx,8), rp
+	lea	(up,%rdx,8), up
+	mul	%rcx
+
+	bt	$0, %ebx
+	jc	L(odd)
+
+	lea	(%rax), %r11
+	mov	8(up,%rbx,8), %rax
+	lea	(%rdx), %rbp
+	mul	%rcx
+	add	$2, %rbx
+	jns	L(n2)
+
+	lea	(%rax), %r8
+	mov	(up,%rbx,8), %rax
+	lea	(%rdx), %r9
+	jmp	L(mid)
+
+L(odd):	add	$1, %rbx
+	jns	L(n1)
+
+	lea	(%rax), %r8
+	mov	(up,%rbx,8), %rax
+	lea	(%rdx), %r9
+	mul	%rcx
+	lea	(%rax), %r11
+	mov	8(up,%rbx,8), %rax
+	lea	(%rdx), %rbp
+	jmp	L(e)
+
+	.p2align 4
+L(top):	mul	%rcx
+	ADDSUB	%r8, %r10
+	lea	(%rax), %r8
+	mov	(up,%rbx,8), %rax
+	adc	%r9, %r11
+	mov	%r10, -8(rp,%rbx,8)
+	mov	(rp,%rbx,8), %r10
+	lea	(%rdx), %r9
+	adc	$0, %rbp
+L(mid):	mul	%rcx
+	ADDSUB	%r11, %r10
+	lea	(%rax), %r11
+	mov	8(up,%rbx,8), %rax
+	adc	%rbp, %r8
+	mov	%r10, (rp,%rbx,8)
+	mov	8(rp,%rbx,8), %r10
+	lea	(%rdx), %rbp
+	adc	$0, %r9
+L(e):	add	$2, %rbx
+	js	L(top)
+
+	mul	%rcx
+	ADDSUB	%r8, %r10
+	adc	%r9, %r11
+	mov	%r10, -8(rp)
+	adc	$0, %rbp
+L(n2):	mov	(rp), %r10
+	ADDSUB	%r11, %r10
+	adc	%rbp, %rax
+	mov	%r10, (rp)
+	adc	$0, %rdx
+L(n1):	mov	8(rp), %r10
+	ADDSUB	%rax, %r10
+	mov	%r10, 8(rp)
+	mov	%ebx, %eax	/* zero rax */
+	adc	%rdx, %rax
+	pop	%rbp
+	pop	%rbx
 	ret
-END (__mpn_addmul_1)
+END (func)
diff --git a/sysdeps/x86_64/lshift.S b/sysdeps/x86_64/lshift.S
index 5ac66f0..f89d3e0 100644
--- a/sysdeps/x86_64/lshift.S
+++ b/sysdeps/x86_64/lshift.S
@@ -1,5 +1,5 @@
-/* AMD64 __mpn_lshift --
-   Copyright 2004, 2006 Free Software Foundation, Inc.
+/* x86-64 __mpn_lshift --
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
@@ -20,41 +20,98 @@
 #include "sysdep.h"
 #include "asm-syntax.h"
 
+#define rp	%rdi
+#define up	%rsi
+#define n	%rdx
+#define cnt	%cl
 
 	.text
 ENTRY (__mpn_lshift)
-	movq	-8(%rsi,%rdx,8), %mm7
-	movd	%ecx, %mm1
-	movl	$64, %eax
-	subl	%ecx, %eax
-	movd	%eax, %mm0
-	movq	%mm7, %mm3
-	psrlq	%mm0, %mm7
-	movd	%mm7, %rax
-	subq	$2, %rdx
-	jl	L(endo)
-	.p2align 2
-L(loop):
-	movq	(%rsi,%rdx,8), %mm6
-	movq	%mm6, %mm2
-	psrlq	%mm0, %mm6
-	psllq	%mm1, %mm3
-	por	%mm6, %mm3
-	movq	%mm3, 8(%rdi,%rdx,8)
-	je	L(ende)
-	movq	-8(%rsi,%rdx,8), %mm7
-	movq	%mm7, %mm3
-	psrlq	%mm0, %mm7
-	psllq	%mm1, %mm2
-	por	%mm7, %mm2
-	movq	%mm2, (%rdi,%rdx,8)
-	subq	$2, %rdx
-	jge	L(loop)
-L(endo):
-	movq	%mm3, %mm2
-L(ende):
-	psllq	%mm1, %mm2
-	movq	%mm2, (%rdi)
-	emms
+	lea	-8(rp,n,8), rp
+	lea	-8(up,n,8), up
+
+	mov	%edx, %eax
+	and	$3, %eax
+	jne	L(nb00)
+L(b00):	/* n = 4, 8, 12, ... */
+	mov	(up), %r10
+	mov	-8(up), %r11
+	xor	%eax, %eax
+	shld	%cl, %r10, %rax
+	mov	-16(up), %r8
+	lea	24(rp), rp
+	sub	$4, n
+	jmp	L(00)
+
+L(nb00):/* n = 1, 5, 9, ... */
+	cmp	$2, %eax
+	jae	L(nb01)
+L(b01):	mov	(up), %r9
+	xor	%eax, %eax
+	shld	%cl, %r9, %rax
+	sub	$2, n
+	jb	L(le1)
+	mov	-8(up), %r10
+	mov	-16(up), %r11
+	lea	-8(up), up
+	lea	16(rp), rp
+	jmp	L(01)
+L(le1):	shl	%cl, %r9
+	mov	%r9, (rp)
+	ret
+
+L(nb01):/* n = 2, 6, 10, ... */
+	jne	L(b11)
+L(b10):	mov	(up), %r8
+	mov	-8(up), %r9
+	xor	%eax, %eax
+	shld	%cl, %r8, %rax
+	sub	$3, n
+	jb	L(le2)
+	mov	-16(up), %r10
+	lea	-16(up), up
+	lea	8(rp), rp
+	jmp	L(10)
+L(le2):	shld	%cl, %r9, %r8
+	mov	%r8, (rp)
+	shl	%cl, %r9
+	mov	%r9, -8(rp)
+	ret
+
+	.p2align 4		/* performance critical! */
+L(b11):	/* n = 3, 7, 11, ... */
+	mov	(up), %r11
+	mov	-8(up), %r8
+	xor	%eax, %eax
+	shld	%cl, %r11, %rax
+	mov	-16(up), %r9
+	lea	-24(up), up
+	sub	$4, n
+	jb	L(end)
+
+	.p2align 4
+L(top):	shld	%cl, %r8, %r11
+	mov	(up), %r10
+	mov	%r11, (rp)
+L(10):	shld	%cl, %r9, %r8
+	mov	-8(up), %r11
+	mov	%r8, -8(rp)
+L(01):	shld	%cl, %r10, %r9
+	mov	-16(up), %r8
+	mov	%r9, -16(rp)
+L(00):	shld	%cl, %r11, %r10
+	mov	-24(up), %r9
+	mov	%r10, -24(rp)
+	add	$-32, up
+	lea	-32(rp), rp
+	sub	$4, n
+	jnc	L(top)
+
+L(end):	shld	%cl, %r8, %r11
+	mov	%r11, (rp)
+	shld	%cl, %r9, %r8
+	mov	%r8, -8(rp)
+	shl	%cl, %r9
+	mov	%r9, -16(rp)
 	ret
 END (__mpn_lshift)
diff --git a/sysdeps/x86_64/mul_1.S b/sysdeps/x86_64/mul_1.S
index 978916b..676afd1 100644
--- a/sysdeps/x86_64/mul_1.S
+++ b/sysdeps/x86_64/mul_1.S
@@ -1,6 +1,6 @@
 /* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store
    the result in a second limb vector.
-   Copyright (C) 2004 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
@@ -21,22 +21,109 @@
 #include <sysdep.h>
 #include "asm-syntax.h"
 
+#define rp	%rdi
+#define up	%rsi
+#define n_param	%rdx
+#define vl	%rcx
+
+#define n	%r11
+
 	.text
 ENTRY (__mpn_mul_1)
-	movq	%rdx, %r11
-	leaq	(%rsi,%rdx,8), %rsi
-	leaq	(%rdi,%rdx,8), %rdi
-	negq	%r11
-	xorl	%r8d, %r8d
-L(loop):
-	movq	(%rsi,%r11,8), %rax
-	mulq	%rcx
-	addq	%r8, %rax
-	movl	$0, %r8d
-	adcq	%rdx, %r8
-	movq	%rax, (%rdi,%r11,8)
-	incq	%r11
-	jne	L(loop)
-	movq	%r8, %rax
+	push	%rbx
+	cfi_adjust_cfa_offset (8)
+	cfi_rel_offset (%rbx, 0)
+	xor	%r10, %r10
+	mov	(up), %rax		/* read first u limb early */
+	mov	n_param, %rbx		/* move away n from rdx, mul uses it */
+	mul	vl
+	mov	%rbx, %r11
+
+	add	%r10, %rax
+	adc	$0, %rdx
+
+	and	$3, %ebx
+	jz	L(b0)
+	cmp	$2, %ebx
+	jz	L(b2)
+	jg	L(b3)
+
+L(b1):	dec	n
+	jne	L(gt1)
+	mov	%rax, (rp)
+	jmp	L(ret)
+L(gt1):	lea	8(up,n,8), up
+	lea	-8(rp,n,8), rp
+	neg	n
+	xor	%r10, %r10
+	xor	%ebx, %ebx
+	mov	%rax, %r9
+	mov	(up,n,8), %rax
+	mov	%rdx, %r8
+	jmp	L(L1)
+
+L(b0):	lea	(up,n,8), up
+	lea	-16(rp,n,8), rp
+	neg	n
+	xor	%r10, %r10
+	mov	%rax, %r8
+	mov	%rdx, %rbx
+	jmp	L(L0)
+
+L(b3):	lea	-8(up,n,8), up
+	lea	-24(rp,n,8), rp
+	neg	n
+	mov	%rax, %rbx
+	mov	%rdx, %r10
+	jmp	L(L3)
+
+L(b2):	lea	-16(up,n,8), up
+	lea	-32(rp,n,8), rp
+	neg	n
+	xor	%r8, %r8
+	xor	%ebx, %ebx
+	mov	%rax, %r10
+	mov	24(up,n,8), %rax
+	mov	%rdx, %r9
+	jmp	L(L2)
+
+	.p2align 4
+L(top): mov	%r10, (rp,n,8)
+	add	%rax, %r9
+	mov	(up,n,8), %rax
+	adc	%rdx, %r8
+	mov	$0, %r10d
+L(L1):	mul	vl
+	mov	%r9, 8(rp,n,8)
+	add	%rax, %r8
+	adc	%rdx, %rbx
+L(L0):	mov	8(up,n,8), %rax
+	mul	vl
+	mov	%r8, 16(rp,n,8)
+	add	%rax, %rbx
+	adc	%rdx, %r10
+L(L3):	mov	16(up,n,8), %rax
+	mul	vl
+	mov	%rbx, 24(rp,n,8)
+	mov	$0, %r8d                # zero
+	mov	%r8, %rbx               # zero
+	add	%rax, %r10
+	mov	24(up,n,8), %rax
+	mov	%r8, %r9                # zero
+	adc	%rdx, %r9
+L(L2):	mul	vl
+	add	$4, n
+	js	L(top)
+
+	mov	%r10, (rp,n,8)
+	add	%rax, %r9
+	adc	%r8, %rdx
+	mov	%r9, 8(rp,n,8)
+	add	%r8, %rdx
+L(ret):	mov	%rdx, %rax
+
+	pop	%rbx
+	cfi_adjust_cfa_offset (-8)
+	cfi_restore (%rbx)
 	ret
 END (__mpn_mul_1)
diff --git a/sysdeps/x86_64/rshift.S b/sysdeps/x86_64/rshift.S
index ee0c8aa..8ff0551 100644
--- a/sysdeps/x86_64/rshift.S
+++ b/sysdeps/x86_64/rshift.S
@@ -1,5 +1,5 @@
-/* AMD64 __mpn_rshift --
-   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+/* x86-64 __mpn_rshift --
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
@@ -20,43 +20,96 @@
 #include "sysdep.h"
 #include "asm-syntax.h"
 
+#define rp	%rdi
+#define up	%rsi
+#define n	%rdx
+#define cnt	%cl
+
 	.text
 ENTRY (__mpn_rshift)
-	movq	(%rsi), %mm7
-	movd	%ecx, %mm1
-	movl	$64, %eax
-	subl	%ecx, %eax
-	movd	%eax, %mm0
-	movq	%mm7, %mm3
-	psllq	%mm0, %mm7
-	movd	%mm7, %rax
-	leaq	(%rsi,%rdx,8), %rsi
-	leaq	(%rdi,%rdx,8), %rdi
-	negq	%rdx
-	addq	$2, %rdx
-	jg	L(endo)
-	.p2align 2
-L(loop):
-	movq	-8(%rsi,%rdx,8), %mm6
-	movq	%mm6, %mm2
-	psllq	%mm0, %mm6
-	psrlq	%mm1, %mm3
-	por	%mm6, %mm3
-	movq	%mm3, -16(%rdi,%rdx,8)
-	je	L(ende)
-	movq	(%rsi,%rdx,8), %mm7
-	movq	%mm7, %mm3
-	psllq	%mm0, %mm7
-	psrlq	%mm1, %mm2
-	por	%mm7, %mm2
-	movq	%mm2, -8(%rdi,%rdx,8)
-	addq	$2, %rdx
-	jle	L(loop)
-L(endo):
-	movq	%mm3, %mm2
-L(ende):
-	psrlq	%mm1, %mm2
-	movq	%mm2, -8(%rdi)
-	emms
+	mov	%edx, %eax
+	and	$3, %eax
+	jne	L(nb00)
+L(b00):	/* n = 4, 8, 12, ... */
+	mov	(up), %r10
+	mov	8(up), %r11
+	xor	%eax, %eax
+	shrd	%cl, %r10, %rax
+	mov	16(up), %r8
+	lea	8(up), up
+	lea	-24(rp), rp
+	sub	$4, n
+	jmp	L(00)
+
+L(nb00):/* n = 1, 5, 9, ... */
+	cmp	$2, %eax
+	jae	L(nb01)
+L(b01):	mov	(up), %r9
+	xor	%eax, %eax
+	shrd	%cl, %r9, %rax
+	sub	$2, n
+	jb	L(le1)
+	mov	8(up), %r10
+	mov	16(up), %r11
+	lea	16(up), up
+	lea	-16(rp), rp
+	jmp	L(01)
+L(le1): shr	%cl, %r9
+	mov	%r9, (rp)
+	ret
+
+L(nb01):/* n = 2, 6, 10, ... */
+	jne	L(b11)
+L(b10):	mov	(up), %r8
+	mov	8(up), %r9
+	xor	%eax, %eax
+	shrd	%cl, %r8, %rax
+	sub	$3, n
+	jb	L(le2)
+	mov	16(up), %r10
+	lea	24(up), up
+	lea	-8(rp), rp
+	jmp	L(10)
+L(le2): shrd	%cl, %r9, %r8
+	mov	%r8, (rp)
+	shr	%cl, %r9
+	mov	%r9, 8(rp)
+	ret
+
+	.p2align 4
+L(b11):	/* n = 3, 7, 11, ... */
+	mov	(up), %r11
+	mov	8(up), %r8
+	xor	%eax, %eax
+	shrd	%cl, %r11, %rax
+	mov	16(up), %r9
+	lea	32(up), up
+	sub	$4, n
+	jb	L(end)
+
+	.p2align 4
+L(top):	shrd	%cl, %r8, %r11
+	mov	-8(up), %r10
+	mov	%r11, (rp)
+L(10):	shrd	%cl, %r9, %r8
+	mov	(up), %r11
+	mov	%r8, 8(rp)
+L(01):	shrd	%cl, %r10, %r9
+	mov	8(up), %r8
+	mov	%r9, 16(rp)
+L(00):	shrd	%cl, %r11, %r10
+	mov	16(up), %r9
+	mov	%r10, 24(rp)
+	add	$32, up
+	lea	32(rp), rp
+	sub	$4, n
+	jnc	L(top)
+
+L(end):	shrd	%cl, %r8, %r11
+	mov	%r11, (rp)
+	shrd	%cl, %r9, %r8
+	mov	%r8, 8(rp)
+	shr	%cl, %r9
+	mov	%r9, 16(rp)
 	ret
 END (__mpn_rshift)
diff --git a/sysdeps/x86_64/sub_n.S b/sysdeps/x86_64/sub_n.S
index 48e1a2e..60c15fc 100644
--- a/sysdeps/x86_64/sub_n.S
+++ b/sysdeps/x86_64/sub_n.S
@@ -1,6 +1,6 @@
-/* AMD64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
+/* x86-64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
    sum in a third limb vector.
-   Copyright (C) 2004 Free Software Foundation, Inc.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
@@ -18,25 +18,7 @@
    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
    MA 02111-1307, USA. */
 
-#include "sysdep.h"
-#include "asm-syntax.h"
+#define func __mpn_sub_n
+#define ADCSBB sbb
 
-	.text
-ENTRY (__mpn_sub_n)
-	leaq	(%rsi,%rcx,8), %rsi
-	leaq	(%rdi,%rcx,8), %rdi
-	leaq	(%rdx,%rcx,8), %rdx
-	negq	%rcx
-	xorl	%eax, %eax		# clear cy
-	.p2align 2
-L(loop):
-	movq	(%rsi,%rcx,8), %rax
-	movq	(%rdx,%rcx,8), %r10
-	sbbq	%r10, %rax
-	movq	%rax, (%rdi,%rcx,8)
-	incq	%rcx
-	jne	L(loop)
-	movq	%rcx, %rax		# zero %rax
-	adcq	%rax, %rax
-	ret
-END (__mpn_sub_n)
+#include "add_n.S"
diff --git a/sysdeps/x86_64/submul_1.S b/sysdeps/x86_64/submul_1.S
index e94c9a7..150a927 100644
--- a/sysdeps/x86_64/submul_1.S
+++ b/sysdeps/x86_64/submul_1.S
@@ -1,6 +1,6 @@
-/* AMD64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+/* x86-64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
    the result from a second limb vector.
-   Copyright (C) 2004 Free Software Foundation, Inc.
+   Copyright (C) 2003,2004,2005,2007,2008,2009 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
@@ -18,29 +18,7 @@
    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
    MA 02111-1307, USA. */
 
-#include "sysdep.h"
-#include "asm-syntax.h"
+#define func __mpn_submul_1
+#define ADDSUB sub
 
-	.text
-ENTRY (__mpn_submul_1)
-	movq	%rdx, %r11
-	leaq	(%rsi,%r11,8), %rsi
-	leaq	(%rdi,%r11,8), %rdi
-	negq	%r11
-	xorl	%r8d, %r8d
-	.p2align 3
-L(loop):
-	movq	(%rsi,%r11,8), %rax
-	movq	(%rdi,%r11,8), %r10
-	mulq	%rcx
-	subq	%r8, %r10
-	movl	$0, %r8d
-	adcl	%r8d, %r8d
-	subq	%rax, %r10
-	adcq	%rdx, %r8
-	movq	%r10, (%rdi,%r11,8)
-	incq	%r11
-	jne	L(loop)
-	movq	%r8, %rax
-	ret
-END (__mpn_submul_1)
+#include "addmul_1.S"

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                 |   20 +++++--
 sysdeps/x86_64/add_n.S    |   99 +++++++++++++++++++++++++++-------
 sysdeps/x86_64/addmul_1.S |  115 ++++++++++++++++++++++++++++++++--------
 sysdeps/x86_64/lshift.S   |  127 ++++++++++++++++++++++++++++++++------------
 sysdeps/x86_64/mul_1.S    |  119 ++++++++++++++++++++++++++++++++++++------
 sysdeps/x86_64/rshift.S   |  129 +++++++++++++++++++++++++++++++-------------
 sysdeps/x86_64/sub_n.S    |   28 ++--------
 sysdeps/x86_64/submul_1.S |   32 ++---------
 8 files changed, 482 insertions(+), 187 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]