This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][BZ #16214] Fix TLS access on S390 with -march=z10


On 11/26/2013 10:37 AM, Andreas Krebbel wrote:
> Hi Siddhesh,
> 
> On 25/11/13 06:50, Siddhesh Poyarekar wrote:
> ...
>> +# ifdef __s390x__
>> +#  define LOAD_R12(r12) asm ("lgr %0,%%r12" : "=r" (r12) ::)
>> +# elif defined __s390__
>> +#  define LOAD_R12(r12) asm ("lr %0,%%r12" : "=r" (r12) ::)
>> +# endif
>> +
>>  # define GET_ADDR_OFFSET \
>>    (ti->ti_offset - (unsigned long) __builtin_thread_pointer ())
>>
>> +/* Subtract contents of r12 from __TI.  We don't really care if this is the
>> +   GOT pointer.  */
>>  # define __TLS_GET_ADDR(__ti) \
>> -  ({ extern char _GLOBAL_OFFSET_TABLE_[] attribute_hidden;		  \
>> -     (void *) __tls_get_offset ((char *) (__ti) - _GLOBAL_OFFSET_TABLE_)  \
>> -     + (unsigned long) __builtin_thread_pointer (); })
>> +({									      \
>> +  unsigned long int r12;						      \
>> +  LOAD_R12 (r12);							      \
>> +  ((void *) __tls_get_offset ((unsigned long int) (__ti) - r12)		      \
>> +   + (unsigned long) __builtin_thread_pointer ());			      \
>> +})
> 
> I cannot see why it shouldn't work that way. However, I think it is more readable to just set up the
> GOT pointer in r12.  For __tls_get_offset it is a prerequisite to have the GOT pointer set up in r12
> so it would appear more logical to me and it should not be any slower. The only user of this is
> dlsym correct?

Yes, _dl_sym and _dl_vsym use this e.g. ->do_sym->_dl_tls_symaddr.

An asm with clobbers is the only way of making this work reliably.

The asm could set r12 to GOT, call __tls_get_offset, restore r12
(or list it clobbered along with all the register __tls_get_offset
touches, and __tls_get_addr), save result, then the rest can be
in C and does the math required to compute the result.

You have to avoid the compiler using r12 for something else in the
middle and the asm is the only way to avoid that.

I note that the last time I tried to list a PIC register in the
asm clobbers list it caused a gcc ICE, so I recommend strongly that
we save/restore it and hide the change from the compiler.
 
>>
>>  #endif
>>
>> diff --git a/sysdeps/s390/s390-32/tls-macros.h b/sysdeps/s390/s390-32/tls-macros.h
>> index 8a0ad58..0a11998 100644
>> --- a/sysdeps/s390/s390-32/tls-macros.h
>> +++ b/sysdeps/s390/s390-32/tls-macros.h
>> @@ -8,12 +8,17 @@
>>
>>  #ifdef PIC
>>  # define TLS_IE(x) \
>> -  ({ unsigned long __offset;						      \
>> +  ({ unsigned long __offset, __save;					      \
>>       asm ("bras %0,1f\n"						      \
>> -	  "0:\t.long " #x "@gotntpoff\n"				      \
>> -	  "1:\tl %0,0(%0)\n\t"						      \
>> -	  "l %0,0(%0,%%r12):tls_load:" #x				      \
>> -	  : "=&a" (__offset) : : "cc" );				      \
>> +	  "0:\t.long _GLOBAL_OFFSET_TABLE_-0b\n\t"			      \
>> +	  ".long " #x "@gotntpoff\n"					      \
>> +	  "1:\tlr %1,%%r12\n\t"						      \
>> +	  "l %%r12,0(%0)\n\t"                                                 \
>> +	  "la %%r12,0(%%r12,%0)\n\t"					      \
>> +	  "l %0,4(%0)\n\t"						      \
>> +	  "l %0,0(%0,%%r12):tls_load:" #x "\n\t"			      \
>> +	  "lr %%r12, %1\n"						      \
>> +	  : "=&a" (__offset), "=&a" (__save) : : "cc" );		      \
>>       (int *) (__builtin_thread_pointer() + __offset); })
>>  #else
>>  # define TLS_IE(x) \
>> diff --git a/sysdeps/s390/s390-64/tls-macros.h b/sysdeps/s390/s390-64/tls-macros.h
>> index be8aa6c..a65047c 100644
>> --- a/sysdeps/s390/s390-64/tls-macros.h
>> +++ b/sysdeps/s390/s390-64/tls-macros.h
>> @@ -8,12 +8,15 @@
>>
>>  #ifdef PIC
>>  # define TLS_IE(x) \
>> -  ({ unsigned long __offset;						      \
>> +  ({ unsigned long __offset, __save;					      \
>>       asm ("bras %0,1f\n"						      \
>>  	  "0:\t.quad " #x "@gotntpoff\n"				      \
>> -	  "1:\tlg %0,0(%0)\n\t"						      \
>> -	  "lg %0,0(%0,%%r12):tls_load:" #x				      \
>> -	  : "=&a" (__offset) : : "cc" );				      \
>> +	  "1:\tlgr %1,%%r12\n\t"					      \
>> +	  "larl %%r12,_GLOBAL_OFFSET_TABLE_\n\t"			      \
>> +	  "lg %0,0(%0)\n\t"						      \
>> +	  "lg %0,0(%0,%%r12):tls_load:" #x "\n\t"			      \
>> +	  "lgr %%r12,%1"						      \
>> +	  : "=&a" (__offset), "=&a" (__save) : : "cc" );		      \
>>       (int *) (__builtin_thread_pointer() + __offset); })
>>  #else
>>  # define TLS_IE(x) \
>>
> 
> For this code to work the GOT pointer is not required to be in r12. Can't you just set it up in a
> compiler chosen reg? Something like this:

Why? This code attempts to emulate exactly what the compiler is going to do
and should be representative of the instruction sequences you'd see being
generated for TLS accesses. These macros are only ever used in testing and
never anywhere else so their speed is not important.
 
I suggest leaving the longer sequences that Siddhesh has here that save/restore
r12 and follow the ABI.

Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]