This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 0/4] Avoid PIC and optimize i386 syscalls
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: Zack Weinberg <zackw at panix dot com>
- Cc: GNU C Library <libc-alpha at sourceware dot org>
- Date: Mon, 14 Sep 2015 07:17:58 -0700
- Subject: Re: [PATCH 0/4] Avoid PIC and optimize i386 syscalls
- Authentication-results: sourceware.org; auth=none
- References: <20150910131217 dot GA9542 at gmail dot com> <CAKCAbMiR0K0k5wU4d0EtNMfWBh6BV6pz+TZNWVGafe40c5d=sw at mail dot gmail dot com>
On Thu, Sep 10, 2015 at 6:58 AM, Zack Weinberg <zackw@panix.com> wrote:
> I'm not the person who has to approve these, I'm only reading 'em out
> of curiosity, but, the way you've organized this patch series makes it
> difficult to find the interesting changes. I'm not even clear on how
> many there *are*. It would be better if you separated all of the
> mechanical changes -- so for instance you would have one patch that
> adds both the generic and the i386 INLINE_SYSCALL_ERROR_RETURN *but
> does not use them anywhere*, and then a second patch, which ideally
> could be summarized as a sed script, to introduce all the uses. And
> so on.
>
> zw
Here are the updated patches.
--
H.J.
From 3b00c36a7f6f66c9f632534d957946e6bc3b4f10 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 14 Sep 2015 07:15:36 -0700
Subject: [PATCH 0/6] Avoid PIC and optimize i386 syscalls
For ia32 PIC, the first thing of many syscalls does is to call
__x86.get_pc_thunk.reg to load PC into reg in case there is an error,
which is required for setting errno. In most cases, there are no
errors. But we still call __x86.get_pc_thunk.reg. This patch adds
INLINE_SYSCALL_ERROR_RETURN so that i386 can optimize setting errno by
branching to the internal __syscall_error without PLT.
With i386 INLINE_SYSCALL_ERROR_RETURN and i386 syscall inlining
optimization for GCC 5, for sysdeps/unix/sysv/linux/fchmodat.c with
-O2 -march=i686 -mtune=generic, GCC 5.2 now generates:
<fchmodat>:
0: push %ebx
1: mov 0x14(%esp),%eax
5: mov 0x8(%esp),%ebx
9: mov 0xc(%esp),%ecx
d: mov 0x10(%esp),%edx
11: test $0xfffffeff,%eax
16: jne 38 <fchmodat+0x38>
18: test $0x1,%ah
1b: jne 48 <fchmodat+0x48>
1d: mov $0x132,%eax
22: call *%gs:0x10
29: cmp $0xfffff000,%eax
2e: ja 58 <fchmodat+0x58>
30: pop %ebx
31: ret
32: lea 0x0(%esi),%esi
38: pop %ebx
39: mov $0xffffffea,%eax
3e: jmp 3f <fchmodat+0x3f> 3f: R_386_PC32 __syscall_error
43: nop
44: lea 0x0(%esi,%eiz,1),%esi
48: pop %ebx
49: mov $0xffffffa1,%eax
4e: jmp 4f <fchmodat+0x4f> 4f: R_386_PC32 __syscall_error
53: nop
54: lea 0x0(%esi,%eiz,1),%esi
58: pop %ebx
59: jmp 5a <fchmodat+0x5a> 5a: R_386_PC32 __syscall_error
instead of
<fchmodat>:
0: sub $0x8,%esp
3: mov 0x18(%esp),%eax
7: mov %ebx,(%esp)
a: call b <fchmodat+0xb> b: R_386_PC32 __x86.get_pc_thunk.bx
f: add $0x2,%ebx 11: R_386_GOTPC _GLOBAL_OFFSET_TABLE_
15: mov %edi,0x4(%esp)
19: test $0xfffffeff,%eax
1e: jne 70 <fchmodat+0x70>
20: test $0x1,%ah
23: jne 88 <fchmodat+0x88>
25: mov 0x14(%esp),%edx
29: mov 0x10(%esp),%ecx
2d: mov 0xc(%esp),%edi
31: xchg %ebx,%edi
33: mov $0x132,%eax
38: call *%gs:0x10
3f: xchg %edi,%ebx
41: cmp $0xfffff000,%eax
46: ja 58 <fchmodat+0x58>
48: mov (%esp),%ebx
4b: mov 0x4(%esp),%edi
4f: add $0x8,%esp
52: ret
53: nop
54: lea 0x0(%esi,%eiz,1),%esi
58: mov 0x0(%ebx),%edx 5a: R_386_TLS_GOTIE __libc_errno
5e: neg %eax
60: mov %eax,%gs:(%edx)
63: mov $0xffffffff,%eax
68: jmp 48 <fchmodat+0x48>
6a: lea 0x0(%esi),%esi
70: mov 0x0(%ebx),%eax 72: R_386_TLS_GOTIE __libc_errno
76: movl $0x16,%gs:(%eax)
7d: mov $0xffffffff,%eax
82: jmp 48 <fchmodat+0x48>
84: lea 0x0(%esi,%eiz,1),%esi
88: mov 0x0(%ebx),%eax 8a: R_386_TLS_GOTIE __libc_errno
8e: movl $0x5f,%gs:(%eax)
95: mov $0xffffffff,%eax
9a: jmp 48 <fchmodat+0x48>
H.J. Lu (6):
Add INLINE_SYSCALL_ERROR_RETURN
Use INLINE_SYSCALL_ERROR_RETURN
Avoid reading errno in syscall implementations
Use INTERNAL_SYSCALL and INLINE_SYSCALL_ERROR_RETURN
Optimize i386 syscall inlining
i386: Remove syscall assembly codes with 6 arguments
sysdeps/unix/sysv/linux/adjtime.c | 5 +-
sysdeps/unix/sysv/linux/alpha/sysdep.h | 1 +
sysdeps/unix/sysv/linux/arm/sysdep.h | 1 +
sysdeps/unix/sysv/linux/dl-openat64.c | 3 +-
sysdeps/unix/sysv/linux/eventfd.c | 18 +-
sysdeps/unix/sysv/linux/faccessat.c | 8 +-
sysdeps/unix/sysv/linux/fchmodat.c | 10 +-
sysdeps/unix/sysv/linux/fcntl.c | 3 +-
sysdeps/unix/sysv/linux/fstatfs64.c | 12 +-
sysdeps/unix/sysv/linux/futimens.c | 8 +-
sysdeps/unix/sysv/linux/futimes.c | 5 +-
sysdeps/unix/sysv/linux/fxstat.c | 3 +-
sysdeps/unix/sysv/linux/fxstatat.c | 5 +-
sysdeps/unix/sysv/linux/fxstatat64.c | 10 +-
sysdeps/unix/sysv/linux/generic/sysdep.h | 1 +
sysdeps/unix/sysv/linux/getrlimit64.c | 7 +-
sysdeps/unix/sysv/linux/hppa/sysdep.h | 1 +
sysdeps/unix/sysv/linux/i386/Makefile | 23 +++
sysdeps/unix/sysv/linux/i386/brk.c | 12 +-
sysdeps/unix/sysv/linux/i386/clone.S | 8 -
sysdeps/unix/sysv/linux/i386/epoll_pwait.S | 78 ---------
sysdeps/unix/sysv/linux/i386/fxstat.c | 10 +-
sysdeps/unix/sysv/linux/i386/fxstatat.c | 9 +-
sysdeps/unix/sysv/linux/i386/libc-do-syscall.S | 3 +
sysdeps/unix/sysv/linux/i386/lockf64.c | 13 +-
sysdeps/unix/sysv/linux/i386/lxstat.c | 10 +-
sysdeps/unix/sysv/linux/i386/mmap.S | 79 ---------
sysdeps/unix/sysv/linux/i386/{sysdep.S => mmap.c} | 36 ++--
sysdeps/unix/sysv/linux/i386/mmap64.S | 116 -------------
sysdeps/unix/sysv/linux/i386/semtimedop.S | 73 --------
sysdeps/unix/sysv/linux/i386/setegid.c | 5 +-
sysdeps/unix/sysv/linux/i386/seteuid.c | 5 +-
sysdeps/unix/sysv/linux/i386/sigaction.c | 12 +-
sysdeps/unix/sysv/linux/i386/sysdep.c | 30 ++++
sysdeps/unix/sysv/linux/i386/sysdep.h | 193 ++++++++++++++-------
sysdeps/unix/sysv/linux/i386/xstat.c | 10 +-
sysdeps/unix/sysv/linux/ia64/sysdep.h | 1 +
sysdeps/unix/sysv/linux/lutimes.c | 8 +-
sysdeps/unix/sysv/linux/lxstat.c | 3 +-
sysdeps/unix/sysv/linux/lxstat64.c | 6 +-
sysdeps/unix/sysv/linux/m68k/sysdep.h | 1 +
sysdeps/unix/sysv/linux/microblaze/sysdep.h | 1 +
sysdeps/unix/sysv/linux/mips/mips32/sysdep.h | 1 +
sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h | 1 +
sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h | 1 +
sysdeps/unix/sysv/linux/mmap64.c | 5 +-
sysdeps/unix/sysv/linux/mq_open.c | 5 +-
sysdeps/unix/sysv/linux/mq_unlink.c | 8 +-
sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h | 1 +
sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h | 1 +
sysdeps/unix/sysv/linux/prlimit.c | 13 +-
sysdeps/unix/sysv/linux/readahead.c | 3 +-
sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h | 1 +
sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h | 1 +
sysdeps/unix/sysv/linux/setrlimit64.c | 7 +-
sysdeps/unix/sysv/linux/sh/sysdep.h | 1 +
sysdeps/unix/sysv/linux/shmat.c | 6 +-
sysdeps/unix/sysv/linux/signalfd.c | 19 +-
sysdeps/unix/sysv/linux/sparc/sysdep.h | 1 +
sysdeps/unix/sysv/linux/speed.c | 10 +-
sysdeps/unix/sysv/linux/statfs64.c | 13 +-
sysdeps/unix/sysv/linux/sysdep.h | 24 +++
sysdeps/unix/sysv/linux/tcsendbrk.c | 3 +-
sysdeps/unix/sysv/linux/tcsetattr.c | 3 +-
sysdeps/unix/sysv/linux/ustat.c | 5 +-
sysdeps/unix/sysv/linux/utimensat.c | 8 +-
sysdeps/unix/sysv/linux/x86_64/sysdep.h | 1 +
sysdeps/unix/sysv/linux/xmknod.c | 10 +-
sysdeps/unix/sysv/linux/xmknodat.c | 10 +-
sysdeps/unix/sysv/linux/xstat.c | 3 +-
sysdeps/unix/sysv/linux/xstatconv.c | 29 +---
71 files changed, 375 insertions(+), 665 deletions(-)
delete mode 100644 sysdeps/unix/sysv/linux/i386/epoll_pwait.S
delete mode 100644 sysdeps/unix/sysv/linux/i386/mmap.S
rename sysdeps/unix/sysv/linux/i386/{sysdep.S => mmap.c} (53%)
delete mode 100644 sysdeps/unix/sysv/linux/i386/mmap64.S
delete mode 100644 sysdeps/unix/sysv/linux/i386/semtimedop.S
create mode 100644 sysdeps/unix/sysv/linux/i386/sysdep.c
create mode 100644 sysdeps/unix/sysv/linux/sysdep.h
--
2.4.3