This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 6/7] stdlib: Optimization qsort{_r} swap implementation


Adhemerval Zanella wrote:
+static inline bool
+check_alignment (const void *base, size_t align)
+{
+  return _STRING_ARCH_unaligned || ((uintptr_t)base % (align - 1)) == 0;
+}

Surely the '(align - 1)' was supposed to be 'align'. Has this been tested on an architecture that does not allow unaligned access?

+static inline void
+swap_generic (void *a, void *b, size_t size)

Why is this inline? It's used only as a function pointer, and the other functions so used are not declared inline.

+static inline swap_t
+select_swap_func (const void *base, size_t size)
+{
+  if (size == 4 && check_alignment (base, 4))
+    return swap_u32;
+  else if (size == 8 && check_alignment (base, 8))
+    return swap_u64;
+  return swap_generic;
+}

The conditions aren't portable enough. Use something like this instead for swap_u32, and similarly for swap_u64.

  if (size == sizeof (uint32_t) && check_alignment (base, alignof (uint32_t)))
    return swap_u32;

+static void
+swap_u32 (void *a, void *b, size_t size)

The pointer arguments should be declared 'void *restrict'. This can help GCC generate better code. Similarly for the other swap functions.

+  uint32_t tmp = *(uint32_t*) a;
+  *(uint32_t*) a = *(uint32_t*) b;
+  *(uint32_t*) b = tmp;

It's nicer to avoid casts when possible, as is the case here and elsewhere. This is because casts are too powerful in C. Something like this, say:

    uint32_t *ua = a, *ub = b, tmp = *ua;
    *ua = *ub, *ub = tmp;

+  unsigned char tmp[128];

Why 128? A comment seems called for.

+static inline void
+swap_generic (void *a, void *b, size_t size)
+{
+  unsigned char tmp[128];
+  do
+    {
+      size_t s = size > sizeof (tmp) ? sizeof (tmp) : size;
+      memcpy (tmp, a, s);
+      a = __mempcpy (a, b, s);
+      b = __mempcpy (b, tmp, s);
+      size -= s;
+    }
+  while (size > 0);
+}

On my platform (GCC 7.2.1 20170915 (Red Hat 7.2.1-2) x86-64) this inlined the memcpy but not the mempcpy calls. How about something like this instead? It should let the compiler do a better job of block-move-style operations in the loop. If mempcpy is inlined for you, feel free to substitute it for two of the loop's calls to memcpy.

  static void
  swap_generic (void *restrict va, void *restrict vb, size_t size)
  {
    char *a = va, *b = vb;
    enum { n = 128 }; /* Why 128?  */
    unsigned char tmp[n];
    while (size >= n)
      {
	memcpy (tmp, a, n);
	memcpy (a, b, n);
	memcpy (b, tmp, n);
	a += n;
	b += n;
	size -= n;
      }
    memcpy (tmp, a, size);
    memcpy (a, b, size);
    memcpy (b, tmp, size);
  }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]