This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 1/N, MPX, x86_64] Intel MPX support in glibc for x86_64
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: "Zamyatin, Igor" <igor dot zamyatin at intel dot com>
- Cc: "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>
- Date: Tue, 1 Apr 2014 11:07:04 -0700
- Subject: Re: [PATCH 1/N, MPX, x86_64] Intel MPX support in glibc for x86_64
- Authentication-results: sourceware.org; auth=none
- References: <0EFAB2BDD0F67E4FB6CCC8B9F87D756942972119 at IRSMSX101 dot ger dot corp dot intel dot com> <CAMe9rOrn2WF0A-f_CHPZCO0vMxvsBfA4aUuuLi-VhYWjAA0vpg at mail dot gmail dot com> <CAMe9rOqekpkRvG4QakwZgLHpoOAJOSvobnZsssZHZ8-7=Mbbpg at mail dot gmail dot com>
On Thu, Mar 27, 2014 at 11:11 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Mar 17, 2014 at 11:09 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, Mar 17, 2014 at 11:03 AM, Zamyatin, Igor
>> <igor.zamyatin@intel.com> wrote:
>>> Hi All!
>>>
>>> Attached patch is the first of the set of patches that add support for Intel MPX technology (see e.g. http://software.intel.com/sites/default/files/319433-015.pdf, Chapter 9) in Glibc for x86_64. Namely, this particular patch introduces bounds storing/restoring in _dl_runtime_resolve.
>>>
>>> Is it ok to install?
>>>
>>>
>>> Thanks,
>>> Igor
>>>
>>> 2014-03-13 Igor Zamyatin <igor.zamyatin@intel.com>
>>>
>>> * config.h.in (HAVE_MPX_SUPPORT): New #undef.
>>> * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
>>> * sysdeps/x86_64/configure: Regenerated.
>>> * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_resolve): Add storing
>>> and restoring of Intel MPX bound registers before and after call
>>> of_dl_fixup.
>>
>> The reasons we need to save and restore bound registers in
>> symbol lookup are
>>
>> 1. Branches without BND prefix clear bound registers.
>> 2. x86-64 pass bounds in bound registers as specified in MPX
>> psABI extension on hjl/mpx/master branch at
>>
>> https://github.com/hjl-tools/x86-64-psABI
>> https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc
>>
>> Binutils has been updated to create an alternate PLT to
>> add BND prefix when branching to ld.so.
>>
>
> Are there any comments, feedbacks or objections?
>
> Thanks.
Hi,
This is the final patch to save and restore bound registers
in _dl_runtime_resolve. Tested with MPX and non-MPX
binutils on x86-64 and x32. I will push it onto master if
there is no objection in 24 hours.
Thanks.
--
H.J.
--
2014-04-01 Igor Zamyatin <igor.zamyatin@intel.com>
* config.h.in (HAVE_MPX_SUPPORT): New #undef.
* sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
* sysdeps/x86_64/configure: Regenerated.
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_resolve): Save and
restore Intel MPX bound registers when calling _dl_fixup.
From 4515c621c95e01bd9753b36dcd81cbc9e2144200 Mon Sep 17 00:00:00 2001
From: Igor Zamyatin <igor.zamyatin@intel.com>
Date: Tue, 1 Apr 2014 10:16:04 -0700
Subject: [PATCH] Save/restore bound registers in _dl_runtime_resolve
This patch saves and restores bound registers in symbol lookup for x86-64:
1. Branches without BND prefix clear bound registers.
2. x86-64 pass bounds in bound registers as specified in MPX psABI
extension on hjl/mpx/master branch at
https://github.com/hjl-tools/x86-64-psABI
https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc
Binutils has been updated to create an alternate PLT to add BND prefix
when branching to ld.so.
* config.h.in (HAVE_MPX_SUPPORT): New #undef.
* sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
* sysdeps/x86_64/configure: Regenerated.
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_resolve): Save and
restore Intel MPX bound registers when calling _dl_fixup.
---
ChangeLog | 8 ++++++++
config.h.in | 3 +++
sysdeps/x86_64/configure | 27 +++++++++++++++++++++++++++
sysdeps/x86_64/configure.ac | 15 +++++++++++++++
sysdeps/x86_64/dl-trampoline.S | 39 +++++++++++++++++++++++++++++++++++++++
5 files changed, 92 insertions(+)
diff --git a/ChangeLog b/ChangeLog
index bfb3083..14efdb7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2014-04-01 Igor Zamyatin <igor.zamyatin@intel.com>
+
+ * config.h.in (HAVE_MPX_SUPPORT): New #undef.
+ * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
+ * sysdeps/x86_64/configure: Regenerated.
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_resolve): Save and
+ restore Intel MPX bound registers when calling _dl_fixup.
+
2014-04-01 Will Newton <will.newton@linaro.org>
* benchtests/Makefile (CFLAGS-bench-ffs.c): Add
diff --git a/config.h.in b/config.h.in
index 3fc34bd..b6e3623 100644
--- a/config.h.in
+++ b/config.h.in
@@ -104,6 +104,9 @@
/* Define if assembler supports AVX512. */
#undef HAVE_AVX512_ASM_SUPPORT
+/* Define if assembler supports Intel MPX. */
+#undef HAVE_MPX_SUPPORT
+
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index c1c88c8..45d868d 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -222,6 +222,33 @@ $as_echo "$libc_cv_cc_novzeroupper" >&6; }
config_vars="$config_vars
config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
+$as_echo_n "checking for Intel MPX support... " >&6; }
+if ${libc_cv_asm_mpx+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ bndmov %bnd0,(%rsp)
+EOF
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_asm_mpx=yes
+else
+ libc_cv_asm_mpx=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
+$as_echo "$libc_cv_asm_mpx" >&6; }
+if test $libc_cv_asm_mpx == yes; then
+ $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
+
+fi
+
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
# work around problem with autoconf and empty lines at the end of files
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index d34f9a8..9138f63 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -75,6 +75,21 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper],
])
LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
+dnl Check whether asm supports Intel MPX
+AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
+cat > conftest.s <<\EOF
+ bndmov %bnd0,(%rsp)
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_asm_mpx=yes
+else
+ libc_cv_asm_mpx=no
+fi
+rm -f conftest*])
+if test $libc_cv_asm_mpx == yes; then
+ AC_DEFINE(HAVE_MPX_SUPPORT)
+fi
+
dnl It is always possible to access static and hidden symbols in an
dnl position independent way.
AC_DEFINE(PI_STATIC_AND_HIDDEN)
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 77c4d0f..646fcaf 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -31,8 +31,13 @@
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
+#ifdef __ILP32__
subq $56,%rsp
cfi_adjust_cfa_offset(56)
+#else
+ subq $120,%rsp
+ cfi_adjust_cfa_offset(120)
+#endif
movq %rax,(%rsp) # Preserve registers otherwise clobbered.
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
@@ -40,10 +45,39 @@ _dl_runtime_resolve:
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
+#ifdef __ILP32__
movq 64(%rsp), %rsi # Copy args pushed by PLT in register.
movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
+#else
+# ifdef HAVE_MPX_SUPPORT
+ bndmov %bnd0, 56(%rsp) # We also have to preserve bound registers.
+ bndmov %bnd1, 72(%rsp) # These are nops if Intel MPX isn't available
+ bndmov %bnd2, 88(%rsp) # or disabled.
+ bndmov %bnd3, 104(%rsp)
+# else
+ .byte 0x66,0x0f,0x1b,0x44,0x24,0x38
+ .byte 0x66,0x0f,0x1b,0x4c,0x24,0x48
+ .byte 0x66,0x0f,0x1b,0x54,0x24,0x58
+ .byte 0x66,0x0f,0x1b,0x5c,0x24,0x68
+# endif
+ movq 128(%rsp), %rsi # Copy args pushed by PLT in register.
+ movq 120(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
+#endif
call _dl_fixup # Call resolver.
movq %rax, %r11 # Save return value
+#ifndef __ILP32__
+# ifdef HAVE_MPX_SUPPORT
+ bndmov 104(%rsp), %bnd3 # Restore bound registers back.
+ bndmov 88(%rsp), %bnd2 # These are nops if Intel MPX isn't avaiable
+ bndmov 72(%rsp), %bnd1 # or disabled.
+ bndmov 56(%rsp), %bnd0
+# else
+ .byte 0x66,0x0f,0x1a,0x5c,0x24,0x68
+ .byte 0x66,0x0f,0x1a,0x54,0x24,0x58
+ .byte 0x66,0x0f,0x1a,0x4c,0x24,0x48
+ .byte 0x66,0x0f,0x1a,0x44,0x24,0x38
+# endif
+#endif
movq 48(%rsp), %r9 # Get register content back.
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
@@ -51,8 +85,13 @@ _dl_runtime_resolve:
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
+#ifdef __ILP32__
addq $72, %rsp # Adjust stack(PLT did 2 pushes)
cfi_adjust_cfa_offset(-72)
+#else
+ addq $136, %rsp # Adjust stack(PLT did 2 pushes)
+ cfi_adjust_cfa_offset(-136)
+#endif
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
--
1.8.5.3