This is the mail archive of the libc-ports@sources.redhat.com mailing list for the libc-ports project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Thumb-2 support patch


I have committed this patch adding Thumb-2 support to the ARM port of 
glibc.

This means support for glibc to be compiled by GCC generating Thumb-2 
code.  Most of the assembly code in glibc will still be built in ARM mode 
as the .S files do not contain any directives to switch to Thumb mode; 
this works fine with interworking; however, inline asms need various fixes 
(in this patch) to work for Thumb-2.

Note that this has only been tested for EABI.  I see no reason why Thumb-2 
should not work for old-ABI, but also no reason to use it with old-ABI 
instead of EABI.

Index: ChangeLog.arm
===================================================================
RCS file: /cvs/glibc/ports/ChangeLog.arm,v
retrieving revision 1.90
retrieving revision 1.91
diff -u -r1.90 -r1.91
--- ChangeLog.arm	2 Feb 2009 15:35:22 -0000	1.90
+++ ChangeLog.arm	5 Feb 2009 14:46:41 -0000	1.91
@@ -1,3 +1,25 @@
+2009-02-05  Paul Brook  <paul@codesourcery.com>
+            Joseph Myers  <joseph@codesourcery.com>
+
+	* sysdeps/arm/dl-machine.h (elf_machine_dynamic): Ditto.
+	(elf_machine_load_address): Clear T bit of PLT entry contents.
+	(RTLD_START): Mark function symbols as such.  Tweak pc-relative
+	addressing to avoid depending on pc read pipeline offset.
+	* sysdeps/arm/machine-gmon.h (MCOUNT): Add Thumb-2 implementation.
+	* sysdeps/arm/tls-macros.h: Add alignment for Thumb-2.
+	(ARM_PC_OFFSET): Define.
+	(TLS_IE): Define differently for Thumb-2.
+	(TLS_LE, TLS_LD, TLS_GD): Use ARM_PC_OFFSET.
+	* sysdeps/arm/elf/start.S: Switch to thumb mode for Thumb-2.
+	* sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (INTERNAL_SYSCALL_RAW):
+	Add Thumb implementation.
+	* sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h: New.
+	* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c: Enforce
+	alignment for Thumb-2.  Adjust offset from PC for Thumb-2.
+	* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c: Ditto.
+	* sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (atomic_full_barrier,
+	__arch_compare_and_exchange_val_32_acq): Add Thumb-2 implementation.
+
 2009-02-02  Joseph Myers  <joseph@codesourcery.com>
 
 	* sysdeps/unix/sysv/linux/arm/bits/shm.h (SHM_EXEC): Define.
Index: sysdeps/arm/dl-machine.h
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/arm/dl-machine.h,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- sysdeps/arm/dl-machine.h	21 Sep 2006 18:21:19 -0000	1.60
+++ sysdeps/arm/dl-machine.h	5 Feb 2009 14:46:41 -0000	1.61
@@ -53,11 +53,22 @@
 elf_machine_dynamic (void)
 {
   Elf32_Addr dynamic;
+#ifdef __thumb2__
+  long tmp;
+  asm ("ldr\t%0, 1f\n\t"
+       "adr\t%1, 1f\n\t"
+       "ldr\t%0, [%0, %1]\n\t"
+       "b 2f\n"
+       ".align 2\n"
+       "1: .word _GLOBAL_OFFSET_TABLE_ - 1b\n"
+       "2:" : "=r" (dynamic), "=r"(tmp));
+#else
   asm ("ldr %0, 2f\n"
        "1: ldr %0, [pc, %0]\n"
        "b 3f\n"
        "2: .word _GLOBAL_OFFSET_TABLE_ - (1b+8)\n"
        "3:" : "=r" (dynamic));
+#endif
   return dynamic;
 }
 
@@ -69,6 +80,10 @@
   extern void __dl_start asm ("_dl_start");
   Elf32_Addr got_addr = (Elf32_Addr) &__dl_start;
   Elf32_Addr pcrel_addr;
+#ifdef __thumb__
+  /* Clear the low bit of the funciton address.  */
+  got_addr &= ~(Elf32_Addr) 1;
+#endif
   asm ("adr %0, _dl_start" : "=r" (pcrel_addr));
   return pcrel_addr - got_addr;
 }
@@ -140,7 +155,9 @@
 #define RTLD_START asm ("\
 .text\n\
 .globl _start\n\
+.type _start, %function\n\
 .globl _dl_start_user\n\
+.type _dl_start_user, %function\n\
 _start:\n\
 	@ we are PIC code, so get global offset table\n\
 	ldr	sl, .L_GET_GOT\n\
@@ -152,8 +169,8 @@
 	bl	_dl_start\n\
 	@ returns user entry point in r0\n\
 _dl_start_user:\n\
-	add	sl, pc, sl\n\
-.L_GOT_GOT:\n\
+	adr	r6, .L_GET_GOT\n\
+	add	sl, sl, r6\n\
 	ldr	r4, [sl, r4]\n\
 	@ save the entry point in another register\n\
 	mov	r6, r0\n\
@@ -210,7 +227,7 @@
 	b	.L_done_fixup\n\
 \n\
 .L_GET_GOT:\n\
-	.word	_GLOBAL_OFFSET_TABLE_ - .L_GOT_GOT - 4\n\
+	.word	_GLOBAL_OFFSET_TABLE_ - .L_GET_GOT\n\
 .L_SKIP_ARGS:\n\
 	.word	_dl_skip_args(GOTOFF)\n\
 .L_FINI_PROC:\n\
Index: sysdeps/arm/machine-gmon.h
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/arm/machine-gmon.h,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- sysdeps/arm/machine-gmon.h	10 Oct 2005 15:09:14 -0000	1.7
+++ sysdeps/arm/machine-gmon.h	5 Feb 2009 14:46:41 -0000	1.8
@@ -50,6 +50,28 @@
    }
 */
 
+#ifdef __thumb2__
+
+#define MCOUNT								\
+void _mcount (void)							\
+{									\
+  __asm__("push		{r0, r1, r2, r3};"				\
+	  "movs		fp, fp;"				      	\
+	  "it		eq;"						\
+          "moveq	r1, #0;"					\
+	  "itttt	ne;"						\
+	  "ldrne	r1, [fp, $-4];"					\
+	  "ldrne	r0, [fp, $-12];"				\
+	  "movnes	r0, r0;"					\
+	  "ldrne	r0, [r0, $-4];"					\
+	  "movs		r0, r0;"					\
+	  "it		ne;"						\
+	  "blne		mcount_internal;"				\
+	  "pop		{r0, r1, r2, r3}");				\
+}
+
+#else
+
 #define MCOUNT								\
 void _mcount (void)							\
 {									\
@@ -65,3 +87,4 @@
 	  "ldmia	sp!, {r0, r1, r2, r3}");			\
 }
 
+#endif
Index: sysdeps/arm/tls-macros.h
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/arm/tls-macros.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- sysdeps/arm/tls-macros.h	5 Oct 2005 20:15:21 -0000	1.1
+++ sysdeps/arm/tls-macros.h	5 Feb 2009 14:46:41 -0000	1.2
@@ -1,14 +1,36 @@
+#ifdef __thumb2__
+#define ARM_PC_OFFSET "4"
+#else
+#define ARM_PC_OFFSET "8"
+#endif
+
 #define TLS_LE(x)					\
   ({ int *__result;					\
      void *tp = __builtin_thread_pointer ();		\
      asm ("ldr %0, 1f; "				\
 	  "add %0, %1, %0; "				\
 	  "b 2f; "					\
+	  ".align 2; "					\
 	  "1: .word " #x "(tpoff); "			\
 	  "2: "						\
 	  : "=&r" (__result) : "r" (tp));		\
      __result; })
 
+#ifdef __thumb2__
+#define TLS_IE(x)					\
+  ({ int *__result;					\
+     void *tp = __builtin_thread_pointer ();		\
+     asm ("ldr %0, 1f; "				\
+	  "3: add %0, pc, %0;"				\
+	  "ldr %0, [%0];"				\
+	  "add %0, %1, %0; "				\
+	  "b 2f; "					\
+	  ".align 2; "					\
+	  "1: .word " #x "(gottpoff) + (. - 3b - 4); "	\
+	  "2: "						\
+	  : "=&r" (__result) : "r" (tp));		\
+     __result; })
+#else
 #define TLS_IE(x)					\
   ({ int *__result;					\
      void *tp = __builtin_thread_pointer ();		\
@@ -16,10 +38,12 @@
 	  "3: ldr %0, [pc, %0];"			\
 	  "add %0, %1, %0; "				\
 	  "b 2f; "					\
+	  ".align 2; "					\
 	  "1: .word " #x "(gottpoff) + (. - 3b - 8); "	\
 	  "2: "						\
 	  : "=&r" (__result) : "r" (tp));		\
      __result; })
+#endif
 
 #define TLS_LD(x)					\
   ({ char *__result;					\
@@ -28,12 +52,14 @@
      asm ("ldr %0, 2f; "				\
 	  "1: add %0, pc, %0; "				\
 	  "b 3f; "					\
-	  "2: .word " #x "(tlsldm) + (. - 1b - 8); "	\
+	  ".align 2; "					\
+	  "2: .word " #x "(tlsldm) + (. - 1b - "ARM_PC_OFFSET"); "	\
 	  "3: "						\
 	  : "=r" (__result));				\
      __result = (char *)__tls_get_addr (__result);	\
      asm ("ldr %0, 1f; "				\
 	  "b 2f; "					\
+	  ".align 2; "					\
 	  "1: .word " #x "(tlsldo); "			\
 	  "2: "						\
 	  : "=r" (__offset));				\
@@ -45,7 +71,8 @@
      asm ("ldr %0, 2f; "				\
 	  "1: add %0, pc, %0; "				\
 	  "b 3f; "					\
-	  "2: .word " #x "(tlsgd) + (. - 1b - 8); "	\
+	  ".align 2; "					\
+	  "2: .word " #x "(tlsgd) + (. - 1b - "ARM_PC_OFFSET"); "	\
 	  "3: "						\
 	  : "=r" (__result));				\
      (int *)__tls_get_addr (__result); })
Index: sysdeps/arm/elf/start.S
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/arm/elf/start.S,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- sysdeps/arm/elf/start.S	27 Jan 2009 16:01:19 -0000	1.10
+++ sysdeps/arm/elf/start.S	5 Feb 2009 14:46:42 -0000	1.11
@@ -58,6 +58,10 @@
 		...
 					NULL
 */
+#if defined(__thumb2__)
+	.thumb
+	.syntax unified
+#endif
 
 	.text
 	.globl _start
Index: sysdeps/unix/sysv/linux/arm/eabi/sysdep.h
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/unix/sysv/linux/arm/eabi/sysdep.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- sysdeps/unix/sysv/linux/arm/eabi/sysdep.h	12 Sep 2007 12:57:25 -0000	1.4
+++ sysdeps/unix/sysv/linux/arm/eabi/sysdep.h	5 Feb 2009 14:46:42 -0000	1.5
@@ -44,6 +44,30 @@
    argument; otherwise the (optional) compatibility code for APCS binaries
    may be invoked.  */
 
+#ifdef __thumb__
+/* Hide the use of r7 from the compiler, this would be a lot
+   easier but for the fact that the syscalls can exceed 255.
+   For the moment the LOAD_ARGS_7 is sacrificed.
+   We can't use push/pop inside the asm because that breaks
+   unwinding (ie. thread cancellation).  */
+#undef LOAD_ARGS_7
+#undef INTERNAL_SYSCALL_RAW
+#define INTERNAL_SYSCALL_RAW(name, err, nr, args...)		\
+  ({								\
+      int _sys_buf[2];						\
+      register int _a1 asm ("a1");				\
+      register int *_r6 asm ("r6") = _sys_buf;			\
+      *_r6 = name;						\
+      LOAD_ARGS_##nr (args)					\
+      asm volatile ("str        r7, [r6, #4]\n\t"		\
+                    "ldr      r7, [r6]\n\t"			\
+                    "swi      0       @ syscall " #name "\n\t"	\
+                    "ldr      r7, [r6, #4]"			\
+                   : "=r" (_a1)					\
+                    : "r" (_r6) ASM_ARGS_##nr			\
+                    : "memory");				\
+       _a1; })
+#else /* ARM */
 #undef INTERNAL_SYSCALL_RAW
 #define INTERNAL_SYSCALL_RAW(name, err, nr, args...)		\
   ({								\
@@ -55,6 +79,7 @@
 		     : "r" (_nr) ASM_ARGS_##nr			\
 		     : "memory");				\
        _a1; })
+#endif
 
 /* For EABI, non-constant syscalls are actually pretty easy...  */
 #undef INTERNAL_SYSCALL_NCS
Index: sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h
===================================================================
RCS file: sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h
diff -N sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h	5 Feb 2009 14:46:42 -0000	1.1
@@ -0,0 +1,52 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include_next <aio_misc.h>
+
+#ifdef __thumb2__
+
+#include <errno.h>
+
+/* The Thumb-2 definition of INTERNAL_SYSCALL_RAW has to hide the use
+   of r7 from the compiler because it cannot handle asm clobbering the
+   hard frame pointer.  In aio_suspend, GCC does not eliminate the
+   hard frame pointer because the function uses variable-length
+   arrays, so it generates unwind information using r7 as virtual
+   stack pointer.  During system calls, when r7 has been saved on the
+   stack, this means the unwind information is invalid.  Without extra
+   unwind directives, which would need to cause unwind information for
+   the asm to be generated separately from that for the parts of the
+   function before and after the asm (with three index table entries),
+   it is not possible to represent any temporary change to the virtual
+   stack pointer.  Instead, we move the problematic system calls out
+   of line into a function that does not require a frame pointer.  */
+
+static __attribute_noinline__ void
+aio_misc_wait (int *resultp,
+	       volatile int *futexp,
+	       const struct timespec *timeout,
+	       int cancel)
+{
+  AIO_MISC_WAIT (*resultp, *futexp, timeout, cancel);
+}
+
+#undef AIO_MISC_WAIT
+#define AIO_MISC_WAIT(result, futex, timeout, cancel)	\
+  aio_misc_wait (&result, &futex, timeout, cancel)
+
+#endif
Index: sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c	10 Jul 2007 13:35:28 -0000	1.2
+++ sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c	5 Feb 2009 14:46:42 -0000	1.3
@@ -89,7 +89,12 @@
 "4:	bl	pthread_cancel_init\n"
 "	ldr	r3, [r4, r5]\n"
 "	b	5b\n"
+"	.align 2\n"
+#ifdef __thumb2__
+"1:	.word	_GLOBAL_OFFSET_TABLE_ - 3b - 4\n"
+#else
 "1:	.word	_GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
+#endif
 "2:	.word	libgcc_s_resume(GOTOFF)\n"
 "	.size	_Unwind_Resume, .-_Unwind_Resume\n"
 );
Index: sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c	16 Nov 2005 19:03:42 -0000	1.1
+++ sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c	5 Feb 2009 14:46:42 -0000	1.2
@@ -66,7 +66,12 @@
 "4:	bl	init\n"
 "	ldr	r3, [r4, r5]\n"
 "	b	5b\n"
+"	.align 2\n"
+#ifdef __thumb2__
+"1:	.word	_GLOBAL_OFFSET_TABLE_ - 3b - 4\n"
+#else
 "1:	.word	_GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
+#endif
 "2:	.word	libgcc_s_resume(GOTOFF)\n"
 "	.size	_Unwind_Resume, .-_Unwind_Resume\n"
 );
Index: sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h	2 Jun 2008 01:57:03 -0000	1.2
+++ sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h	5 Feb 2009 14:46:42 -0000	1.3
@@ -37,12 +37,21 @@
 
 void __arm_link_error (void);
 
+#ifdef __thumb2__
+#define atomic_full_barrier() \
+     __asm__ __volatile__						      \
+	     ("movw\tip, #0x0fa0\n\t"					      \
+	      "movt\tip, #0xffff\n\t"					      \
+	      "blx\tip"							      \
+	      : : : "ip", "lr", "cc", "memory");
+#else
 #define atomic_full_barrier() \
      __asm__ __volatile__						      \
 	     ("mov\tip, #0xffff0fff\n\t"				      \
 	      "mov\tlr, pc\n\t"						      \
 	      "add\tpc, ip, #(0xffff0fa0 - 0xffff0fff)"			      \
 	      : : : "ip", "lr", "cc", "memory");
+#endif
 
 /* Atomic compare and exchange.  This sequence relies on the kernel to
    provide a compare and exchange operation which is atomic on the
@@ -59,6 +68,32 @@
    specify one to work around GCC PR rtl-optimization/21223.  Otherwise
    it may cause a_oldval or a_tmp to be moved to a different register.  */
 
+#ifdef __thumb2__
+/* Thumb-2 has ldrex/strex.  However it does not have barrier instructions,
+   so we still need to use the kernel helper.  */
+#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+  ({ register __typeof (oldval) a_oldval asm ("r0");			      \
+     register __typeof (oldval) a_newval asm ("r1") = (newval);		      \
+     register __typeof (mem) a_ptr asm ("r2") = (mem);			      \
+     register __typeof (oldval) a_tmp asm ("r3");			      \
+     register __typeof (oldval) a_oldval2 asm ("r4") = (oldval);	      \
+     __asm__ __volatile__						      \
+	     ("0:\tldr\t%[tmp],[%[ptr]]\n\t"				      \
+	      "cmp\t%[tmp], %[old2]\n\t"				      \
+	      "bne\t1f\n\t"						      \
+	      "mov\t%[old], %[old2]\n\t"				      \
+	      "movw\t%[tmp], #0x0fc0\n\t"				      \
+	      "movt\t%[tmp], #0xffff\n\t"				      \
+	      "blx\t%[tmp]\n\t"						      \
+	      "bcc\t0b\n\t"						      \
+	      "mov\t%[tmp], %[old2]\n\t"				      \
+	      "1:"							      \
+	      : [old] "=&r" (a_oldval), [tmp] "=&r" (a_tmp)		      \
+	      : [new] "r" (a_newval), [ptr] "r" (a_ptr),		      \
+		[old2] "r" (a_oldval2)					      \
+	      : "ip", "lr", "cc", "memory");				      \
+     a_tmp; })
+#else
 #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
   ({ register __typeof (oldval) a_oldval asm ("r0");			      \
      register __typeof (oldval) a_newval asm ("r1") = (newval);		      \
@@ -81,6 +116,7 @@
 		[old2] "r" (a_oldval2)					      \
 	      : "ip", "lr", "cc", "memory");				      \
      a_tmp; })
+#endif
 
 #define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
   ({ __arm_link_error (); oldval; })

-- 
Joseph S. Myers
joseph@codesourcery.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]