This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v2] Single threaded stdio optimization


On Thursday 22 June 2017 03:20 PM, Szabolcs Nagy wrote:
> On 21/06/17 17:52, Joseph Myers wrote:
>> On Wed, 21 Jun 2017, Szabolcs Nagy wrote:
>>
>>> created.  A new libc abi symbol, _IO_enable_locks, does this.
>>> (The abilist files will have to be updated in a separate patch).
>>
>> I don't see any declarations or uses of this symbol in public headers, so 
>> why does it need to be at a public symbol version instead of 
>> GLIBC_PRIVATE?
>>
> 
> PATCH v3: use GLIBC_PRIVATE for _IO_enable_locks.
> (should i use __libc_* naming convention for private symbols?)
> 
> Locking overhead can be significant in some stdio operations
> that are common in single threaded applications.
> 
> I'd like to address it in this release if possible as it
> causes performance problems to aarch64 users.  I prefer this
> high-level approach to be reviewed, if that does not work then
> the aarch64 specific low-level approach will be taken.
> 
> This patch adds the _IO_FLAGS2_NEED_LOCK flag to indicate if
> an _IO_FILE object needs to be locked and some of the stdio
> functions just jump to their _unlocked variant when not.  The
> flag is set on all _IO_FILE objects when the first thread is
> created.  A new GLIBC_PRIVATE libc symbol, _IO_enable_locks,
> was added to do this from libpthread.
>
> The optimization can be applied to more stdio functions,
> currently it is only applied to single flag check or single
> non-wide-char standard operations.  The flag should probably
> be never set for files with _IO_USER_LOCK, but that's just a
> further optimization, not a correctness requirement.
> 
> The optimization is valid in a single thread because stdio
> operations are non-as-safe (so lock state is not observable
> from a signal handler) and stdio locks are recursive (so lock
> state is not observable via deadlock).  The optimization is not
> valid if a thread may be created while an stdio lock is taken
> and thus it should be disabled if any user code may run during
> an stdio operation (interposed malloc, printf hooks, etc).
> This makes the optimization more complicated for some stdio
> operations (e.g. printf), but those are bigger and thus less
> important to optimize so this patch does not try to do that.
> 
> 2017-06-22  Szabolcs Nagy  <szabolcs.nagy@arm.com>
> 
> 	* libio/libio.h (_IO_FLAGS2_NEED_LOCK, _IO_need_lock): Define.
> 	* libio/libioP.h (_IO_enable_locks): Declare.
> 	* libio/Versions (_IO_enable_locks): New symbol.
> 	* libio/genops.c (_IO_enable_locks): Define.
> 	(_IO_old_init): Initialize flags2.
> 	* libio/feof.c.c (_IO_feof): Avoid locking when not needed.
> 	* libio/ferror.c (_IO_ferror): Likewise.
> 	* libio/fputc.c (fputc): Likewise.
> 	* libio/putc.c (_IO_putc): Likewise.
> 	* libio/getc.c (_IO_getc): Likewise.
> 	* libio/getchar.c (getchar): Likewise.
> 	* libio/ioungetc.c (_IO_ungetc): Likewise.
> 	* nptl/pthread_create.c (__pthread_create_2_1): Enable stdio locks.
> 	* libio/iofopncook.c (_IO_fopencookie): Enable locking for the file.
> 	* sysdeps/pthread/flockfile.c (__flockfile): Likewise.
> 
> 
> stdio.diff
> 
> 
> diff --git a/libio/Versions b/libio/Versions
> index 2a1d6e6c85..6c1624e8cd 100644
> --- a/libio/Versions
> +++ b/libio/Versions
> @@ -155,5 +155,6 @@ libc {
>    GLIBC_PRIVATE {
>      # Used by NPTL and librt
>      __libc_fatal;
> +    _IO_enable_locks;
>    }
>  }
> diff --git a/libio/feof.c b/libio/feof.c
> index 9712a81e78..8890a5f51f 100644
> --- a/libio/feof.c
> +++ b/libio/feof.c
> @@ -32,6 +32,8 @@ _IO_feof (_IO_FILE *fp)
>  {
>    int result;
>    CHECK_FILE (fp, EOF);
> +  if (!_IO_need_lock (fp))
> +    return _IO_feof_unlocked (fp);
>    _IO_flockfile (fp);
>    result = _IO_feof_unlocked (fp);
>    _IO_funlockfile (fp);
> diff --git a/libio/ferror.c b/libio/ferror.c
> index 01e3bd8e2b..d10fcd9fff 100644
> --- a/libio/ferror.c
> +++ b/libio/ferror.c
> @@ -32,6 +32,8 @@ _IO_ferror (_IO_FILE *fp)
>  {
>    int result;
>    CHECK_FILE (fp, EOF);
> +  if (!_IO_need_lock (fp))
> +    return _IO_ferror_unlocked (fp);
>    _IO_flockfile (fp);
>    result = _IO_ferror_unlocked (fp);

The patch looks OK except for the duplication (and a missing comment
below), which looks a bit clumsy.  How about something like this instead:

  bool need_lock = _IO_need_lock (fp);

  if (need_lock)
    _IO_flockfile (fp);
  result = _IO_ferror_unlocked (fp);
  if (need_lock)
    _IO_funlockfile (fp);

  return result;

You could probably make some kind of a macro out of this, I haven't
looked that hard.

>    _IO_funlockfile (fp);
> diff --git a/libio/fputc.c b/libio/fputc.c
> index a7cd682fe2..b72305c06f 100644
> --- a/libio/fputc.c
> +++ b/libio/fputc.c
> @@ -32,6 +32,8 @@ fputc (int c, _IO_FILE *fp)
>  {
>    int result;
>    CHECK_FILE (fp, EOF);
> +  if (!_IO_need_lock (fp))
> +    return _IO_putc_unlocked (c, fp);
>    _IO_acquire_lock (fp);
>    result = _IO_putc_unlocked (c, fp);
>    _IO_release_lock (fp);
> diff --git a/libio/genops.c b/libio/genops.c
> index a466cfa337..132f1f1a7d 100644
> --- a/libio/genops.c
> +++ b/libio/genops.c
> @@ -570,11 +570,28 @@ _IO_init (_IO_FILE *fp, int flags)
>    _IO_init_internal (fp, flags);
>  }
>  

Add a fat comment here describing in detail what you're doing here and why.

> +static int stdio_needs_locking;
> +
> +void
> +_IO_enable_locks (void)
> +{
> +  _IO_ITER i;
> +
> +  if (stdio_needs_locking)
> +    return;
> +  stdio_needs_locking = 1;
> +  for (i = _IO_iter_begin (); i != _IO_iter_end (); i = _IO_iter_next (i))
> +    _IO_iter_file (i)->_flags2 |= _IO_FLAGS2_NEED_LOCK;
> +}
> +libc_hidden_def (_IO_enable_locks)
> +
>  void
>  _IO_old_init (_IO_FILE *fp, int flags)
>  {
>    fp->_flags = _IO_MAGIC|flags;
>    fp->_flags2 = 0;
> +  if (stdio_needs_locking)
> +    fp->_flags2 |= _IO_FLAGS2_NEED_LOCK;
>    fp->_IO_buf_base = NULL;
>    fp->_IO_buf_end = NULL;
>    fp->_IO_read_base = NULL;
> diff --git a/libio/getc.c b/libio/getc.c
> index b58fd62308..fd66ef93cf 100644
> --- a/libio/getc.c
> +++ b/libio/getc.c
> @@ -34,6 +34,8 @@ _IO_getc (FILE *fp)
>  {
>    int result;
>    CHECK_FILE (fp, EOF);
> +  if (!_IO_need_lock (fp))
> +    return _IO_getc_unlocked (fp);
>    _IO_acquire_lock (fp);
>    result = _IO_getc_unlocked (fp);
>    _IO_release_lock (fp);
> diff --git a/libio/getchar.c b/libio/getchar.c
> index 5b41595d17..d79932114e 100644
> --- a/libio/getchar.c
> +++ b/libio/getchar.c
> @@ -33,6 +33,8 @@ int
>  getchar (void)
>  {
>    int result;
> +  if (!_IO_need_lock (_IO_stdin))
> +    return _IO_getc_unlocked (_IO_stdin);
>    _IO_acquire_lock (_IO_stdin);
>    result = _IO_getc_unlocked (_IO_stdin);
>    _IO_release_lock (_IO_stdin);
> diff --git a/libio/iofopncook.c b/libio/iofopncook.c
> index a08dfdaa42..982f464a68 100644
> --- a/libio/iofopncook.c
> +++ b/libio/iofopncook.c
> @@ -172,6 +172,8 @@ _IO_cookie_init (struct _IO_cookie_file *cfile, int read_write,
>    _IO_mask_flags (&cfile->__fp.file, read_write,
>  		  _IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
>  
> +  cfile->__fp.file._flags2 |= _IO_FLAGS2_NEED_LOCK;
> +
>    /* We use a negative number different from -1 for _fileno to mark that
>       this special stream is not associated with a real file, but still has
>       to be treated as such.  */
> diff --git a/libio/ioungetc.c b/libio/ioungetc.c
> index 951064fa12..917cad8abb 100644
> --- a/libio/ioungetc.c
> +++ b/libio/ioungetc.c
> @@ -33,6 +33,8 @@ _IO_ungetc (int c, _IO_FILE *fp)
>    CHECK_FILE (fp, EOF);
>    if (c == EOF)
>      return EOF;
> +  if (!_IO_need_lock (fp))
> +    return _IO_sputbackc (fp, (unsigned char) c);
>    _IO_acquire_lock (fp);
>    result = _IO_sputbackc (fp, (unsigned char) c);
>    _IO_release_lock (fp);
> diff --git a/libio/libio.h b/libio/libio.h
> index 518ffd8e44..14bcb92332 100644
> --- a/libio/libio.h
> +++ b/libio/libio.h
> @@ -119,6 +119,7 @@
>  # define _IO_FLAGS2_SCANF_STD 16
>  # define _IO_FLAGS2_NOCLOSE 32
>  # define _IO_FLAGS2_CLOEXEC 64
> +# define _IO_FLAGS2_NEED_LOCK 128
>  #endif
>  
>  /* These are "formatting flags" matching the iostream fmtflags enum values. */
> @@ -451,6 +452,9 @@ extern int _IO_ftrylockfile (_IO_FILE *) __THROW;
>  #define _IO_cleanup_region_end(_Doit) /**/
>  #endif
>  
> +#define _IO_need_lock(_fp) \
> +  (((_fp)->_flags2 & _IO_FLAGS2_NEED_LOCK) != 0)
> +
>  extern int _IO_vfscanf (_IO_FILE * __restrict, const char * __restrict,
>  			_IO_va_list, int *__restrict);
>  extern int _IO_vfprintf (_IO_FILE *__restrict, const char *__restrict,
> diff --git a/libio/libioP.h b/libio/libioP.h
> index eb93418c8d..163dfb1386 100644
> --- a/libio/libioP.h
> +++ b/libio/libioP.h
> @@ -444,6 +444,8 @@ extern void _IO_list_unlock (void) __THROW;
>  libc_hidden_proto (_IO_list_unlock)
>  extern void _IO_list_resetlock (void) __THROW;
>  libc_hidden_proto (_IO_list_resetlock)
> +extern void _IO_enable_locks (void) __THROW;
> +libc_hidden_proto (_IO_enable_locks)
>  
>  /* Default jumptable functions. */
>  
> @@ -750,7 +752,7 @@ extern _IO_off64_t _IO_seekpos_unlocked (_IO_FILE *, _IO_off64_t, int)
>  
>  #if _G_HAVE_MMAP
>  
> -# ifdef _LIBC
> +# if IS_IN (libc)
>  /* When using this code in the GNU libc we must not pollute the name space.  */
>  #  define mmap __mmap
>  #  define munmap __munmap
> diff --git a/libio/putc.c b/libio/putc.c
> index b591c5538b..6e1fdeef3a 100644
> --- a/libio/putc.c
> +++ b/libio/putc.c
> @@ -25,6 +25,8 @@ _IO_putc (int c, _IO_FILE *fp)
>  {
>    int result;
>    CHECK_FILE (fp, EOF);
> +  if (!_IO_need_lock (fp))
> +    return _IO_putc_unlocked (c, fp);
>    _IO_acquire_lock (fp);
>    result = _IO_putc_unlocked (c, fp);
>    _IO_release_lock (fp);
> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> index c7d1b8f413..0b3fa942f2 100644
> --- a/nptl/pthread_create.c
> +++ b/nptl/pthread_create.c
> @@ -32,6 +32,7 @@
>  #include <exit-thread.h>
>  #include <default-sched.h>
>  #include <futex-internal.h>
> +#include "libioP.h"
>  
>  #include <shlib-compat.h>
>  
> @@ -756,6 +757,9 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
>          collect_default_sched (pd);
>      }
>  
> +  if (__glibc_unlikely (__nptl_nthreads == 1))
> +    _IO_enable_locks ();
> +
>    /* Pass the descriptor to the caller.  */
>    *newthread = (pthread_t) pd;
>  
> diff --git a/sysdeps/pthread/flockfile.c b/sysdeps/pthread/flockfile.c
> index 7fe8e99161..a8e6c28ed9 100644
> --- a/sysdeps/pthread/flockfile.c
> +++ b/sysdeps/pthread/flockfile.c
> @@ -25,6 +25,7 @@
>  void
>  __flockfile (FILE *stream)
>  {
> +  stream->_flags2 |= _IO_FLAGS2_NEED_LOCK;
>    _IO_lock_lock (*stream->_lock);
>  }
>  strong_alias (__flockfile, _IO_flockfile)
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]