This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] nss_files: Fix re-reading of long lines [BZ #18991]


On 09/01/2017 08:04 PM, Florian Weimer wrote:
> This fixes the line skipping due to an insufficient read buffer size.
> 
> However, if the line length is sufficiently close to the buffer size, so
> that it does not leave enough room for the aliases pointer array, then
> we can get a very late ERANGE error in the line parser.  This will still
> result in a skipped line.  I think this means that bug 18991 is not a
> regression, strictly speaking.
> 
> The test case actually catches this, but only 64-bit architectures, due
> to the way the buffer sizes work out.

I think I found a way to salvage this approach.  If parse_line fails
with ERANGE, we need to seek back to the start of the line.  We can do
this because we know the exact length of the line in bytes at this point.

I added further testing with different name counts, so that both buffer
management errors should be covered on both 32-bit and 64-bit
architectures now.

I was curious how the additional lseek system calls due to the proactive
ftello call in __libc_readline_unlock would affect performance.  It
seems that they do not matter.  I benchmarked getpwnam_r in three
different ways: first user (root), a user roughly 10,000 entries down,
and a non-existing user with an /etc/passwd containing about 20,000
entries.  For root, there was no statistically significant difference.
For the other cases, the new approach is about 2% to 3% faster,
presumably due to the more efficient implementation of
__libc_readline_unlocked.  (There were no ERANGE errors involved in this
test; in case of ERANGE, performance is shot anyway.)

Thanks,
Florian
nss_files: Fix re-reading of long lines [BZ #18991]

Use the new __libc_readline_unlocked function to pick up
reading at the same line in case the buffer needs to be enlarged.

2017-09-02  Florian Weimer  <fweimer@redhat.com>

	[BZ #18991]
	* nss/nss_files/files-XXX.c (internal_getent): Use
	__libc_readline_unlocked.  Seek back to the start of the line if
	parsing failes with ERANGE.
	(get_contents_ret, get_contents): Remove.
	* nss/tst-nss-files-hosts-getent.c: New file.
	* nss/Makefile (tests): Add tst-nss-files-hosts-getent.
	(tst-nss-files-hosts-getent): Link with -ldl.

diff --git a/nss/Makefile b/nss/Makefile
index c9a5200f96..ac16e48d43 100644
--- a/nss/Makefile
+++ b/nss/Makefile
@@ -63,6 +63,7 @@ xtests			= bug-erange
 # Tests which need libdl
 ifeq (yes,$(build-shared))
 tests += tst-nss-files-hosts-erange
+tests += tst-nss-files-hosts-getent
 endif
 
 # If we have a thread library then we can test cancellation against
@@ -163,3 +164,4 @@ $(objpfx)tst-cancel-getpwuid_r: $(shared-thread-library)
 endif
 
 $(objpfx)tst-nss-files-hosts-erange: $(libdl)
+$(objpfx)tst-nss-files-hosts-getent: $(libdl)
diff --git a/nss/nss_files/files-XXX.c b/nss/nss_files/files-XXX.c
index 265331ef21..c6ec1a8458 100644
--- a/nss/nss_files/files-XXX.c
+++ b/nss/nss_files/files-XXX.c
@@ -128,51 +128,6 @@ CONCAT(_nss_files_end,ENTNAME) (void)
 }
 
 
-typedef enum
-{
-  gcr_ok = 0,
-  gcr_error = -1,
-  gcr_overflow = -2
-} get_contents_ret;
-
-/* Hack around the fact that fgets only accepts int sizes.  */
-static get_contents_ret
-get_contents (char *linebuf, size_t len, FILE *stream)
-{
-  size_t remaining_len = len;
-  char *curbuf = linebuf;
-
-  do
-    {
-      int curlen = ((remaining_len > (size_t) INT_MAX) ? INT_MAX
-		    : remaining_len);
-
-      /* Terminate the line so that we can test for overflow.  */
-      ((unsigned char *) curbuf)[curlen - 1] = 0xff;
-
-      char *p = fgets_unlocked (curbuf, curlen, stream);
-
-      /* EOF or read error.  */
-      if (p == NULL)
-        return gcr_error;
-
-      /* Done reading in the line.  */
-      if (((unsigned char *) curbuf)[curlen - 1] == 0xff)
-        return gcr_ok;
-
-      /* Drop the terminating '\0'.  */
-      remaining_len -= curlen - 1;
-      curbuf += curlen - 1;
-    }
-  /* fgets copies one less than the input length.  Our last iteration is of
-     REMAINING_LEN and once that is done, REMAINING_LEN is decremented by
-     REMAINING_LEN - 1, leaving the result as 1.  */
-  while (remaining_len > 1);
-
-  /* This means that the current buffer was not large enough.  */
-  return gcr_overflow;
-}
-
 /* Parsing the database file into `struct STRUCTURE' data structures.  */
 static enum nss_status
 internal_getent (FILE *stream, struct STRUCTURE *result,
@@ -191,45 +146,69 @@ internal_getent (FILE *stream, struct STRUCTURE *result,
       return NSS_STATUS_TRYAGAIN;
     }
 
-  do
+  while (true)
     {
-      get_contents_ret r = get_contents (data->linebuffer, linebuflen, stream);
-
-      if (r == gcr_error)
+      ssize_t r = __libc_readline_unlocked
+	(stream, data->linebuffer, linebuflen);
+      if (r < 0)
 	{
-	  /* End of file or read error.  */
+	  *errnop = errno;
+	  H_ERRNO_SET (NETDB_INTERNAL);
+	  if (*errnop == ERANGE)
+	    /* Request larger buffer.  */
+	    return NSS_STATUS_TRYAGAIN;
+	  else
+	    /* Other read failure.  */
+	    return NSS_STATUS_UNAVAIL;
+	}
+      else if (r == 0)
+	{
+	  /* End of file.  */
 	  H_ERRNO_SET (HOST_NOT_FOUND);
 	  return NSS_STATUS_NOTFOUND;
 	}
 
-      if (r == gcr_overflow)
-	{
-	  /* The line is too long.  Give the user the opportunity to
-	     enlarge the buffer.  */
-	  *errnop = ERANGE;
-	  H_ERRNO_SET (NETDB_INTERNAL);
-	  return NSS_STATUS_TRYAGAIN;
-	}
-
       /* Everything OK.  Now skip leading blanks.  */
       p = data->linebuffer;
       while (isspace (*p))
 	++p;
-    }
-  while (*p == '\0' || *p == '#' /* Ignore empty and comment lines.  */
-	 /* Parse the line.  If it is invalid, loop to get the next
-	    line of the file to parse.  */
-	 || ! (parse_result = parse_line (p, result, data, buflen, errnop
-					  EXTRA_ARGS)));
-
-  if (__glibc_unlikely (parse_result == -1))
-    {
-      H_ERRNO_SET (NETDB_INTERNAL);
-      return NSS_STATUS_TRYAGAIN;
-    }
 
-  /* Filled in RESULT with the next entry from the database file.  */
-  return NSS_STATUS_SUCCESS;
+      /* Ignore empty and comment lines.  */
+      if (*p == '\0' || *p == '#')
+	continue;
+
+      /* Parse the line.   */
+      *errnop = EINVAL;
+      parse_result = parse_line (p, result, data, buflen, errnop EXTRA_ARGS);
+
+      if (parse_result == -1)
+	{
+	  if (*errnop == ERANGE)
+	    {
+	      /* Return to the original file position at the beginning
+		 of the line, so that the next call can read it again
+		 if necessary.  */
+	      if (__fseeko64 (stream, -r, SEEK_CUR) != 0)
+		{
+		  if (errno == ERANGE)
+		    *errnop = EINVAL;
+		  else
+		    *errnop = errno;
+		  H_ERRNO_SET (NETDB_INTERNAL);
+		  return NSS_STATUS_UNAVAIL;
+		}
+	    }
+	  H_ERRNO_SET (NETDB_INTERNAL);
+	  return NSS_STATUS_TRYAGAIN;
+	}
+
+      /* Return the data if parsed successfully.  */
+      if (parse_result != 0)
+	return NSS_STATUS_SUCCESS;
+
+      /* If it is invalid, loop to get the next line of the file to
+	 parse.  */
+    }
 }
 
 
diff --git a/nss/tst-nss-files-hosts-getent.c b/nss/tst-nss-files-hosts-getent.c
new file mode 100644
index 0000000000..d7514e8e38
--- /dev/null
+++ b/nss/tst-nss-files-hosts-getent.c
@@ -0,0 +1,276 @@
+/* Enumerate /etc/hosts with a long line (bug 18991).
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <gnu/lib-names.h>
+#include <netdb.h>
+#include <nss.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/check_nss.h>
+#include <support/namespace.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/xmemstream.h>
+#include <support/xstdio.h>
+#include <support/xunistd.h>
+
+struct support_chroot *chroot_env;
+
+/* Number of alias names in the long line.  This is varied to catch
+   different cases where the ERANGE handling can go wrong (line buffer
+   length, alias buffer).  */
+static int name_count;
+
+/* Write /etc/hosts, from outside of the chroot.  */
+static void
+write_hosts (void)
+{
+  FILE *fp = xfopen (chroot_env->path_hosts, "w");
+  fputs ("127.0.0.1   localhost localhost.localdomain\n", fp);
+  fputs ("192.0.2.2 host2.example.com\n", fp);
+  fputs ("192.0.2.1", fp);
+  for (int i = 0; i < name_count; ++i)
+    fprintf (fp, " host%d.example.com", i);
+  fputs ("\n192.0.2.80 www.example.com\n"
+         "192.0.2.5 host5.example.com\n"
+         "192.0.2.81 www1.example.com\n", fp);
+  xfclose (fp);
+}
+
+const char *host1_expected =
+  "name: localhost\n"
+  "alias: localhost.localdomain\n"
+  "address: 127.0.0.1\n";
+const char *host2_expected =
+  "name: host2.example.com\n"
+  "address: 192.0.2.2\n";
+const char *host4_expected =
+  "name: www.example.com\n"
+  "address: 192.0.2.80\n";
+const char *host5_expected =
+  "name: host5.example.com\n"
+  "address: 192.0.2.5\n";
+const char *host6_expected =
+  "name: www1.example.com\n"
+  "address: 192.0.2.81\n";
+
+static void
+prepare (int argc, char **argv)
+{
+  chroot_env = support_chroot_create
+    ((struct support_chroot_configuration)
+     {
+       .resolv_conf = "",
+       .hosts = "",             /* Filled in by write_hosts.  */
+       .host_conf = "multi on\n",
+     });
+}
+
+/* If -1, no sethostent call.  Otherwise, pass do_stayopen as the
+   sethostent argument.  */
+static int do_stayopen;
+
+/* If non-zero, perform an endostent call.  */
+static int do_endent;
+
+static void
+subprocess_getent (void *closure)
+{
+  xchroot (chroot_env->path_chroot);
+
+  errno = 0;
+  if (do_stayopen >= 0)
+    sethostent (do_stayopen);
+  TEST_VERIFY (errno == 0);
+
+  int i = 0;
+  while (true)
+    {
+      struct xmemstream expected;
+      xopen_memstream (&expected);
+      switch (++i)
+        {
+        case 1:
+          fputs (host1_expected, expected.out);
+          break;
+        case 2:
+          fputs (host2_expected, expected.out);
+          break;
+        case 3:
+          fputs ("name: host0.example.com\n", expected.out);
+          for (int j = 1; j < name_count; ++j)
+            fprintf (expected.out, "alias: host%d.example.com\n", j);
+          fputs ("address: 192.0.2.1\n", expected.out);
+          break;
+        case 4:
+          fputs (host4_expected, expected.out);
+          break;
+        case 5:
+          fputs (host5_expected, expected.out);
+          break;
+        case 6:
+          fputs (host6_expected, expected.out);
+          break;
+        default:
+          fprintf (expected.out, "*** unexpected host %d ***\n", i);
+          break;
+        }
+      xfclose_memstream (&expected);
+      char *context = xasprintf ("do_stayopen=%d host=%d", do_stayopen, i);
+
+      errno = 0;
+      struct hostent *e = gethostent ();
+      if (e == NULL)
+        {
+          TEST_VERIFY (errno == 0);
+          break;
+        }
+      check_hostent (context, e, expected.buffer);
+      free (context);
+      free (expected.buffer);
+    }
+
+  errno = 0;
+  if (do_endent)
+    endhostent ();
+  TEST_VERIFY (errno == 0);
+
+  /* Exercise process termination.   */
+  exit (0);
+}
+
+/* getaddrinfo test.  To be run from a subprocess.  */
+static void
+test_gai (int family)
+{
+  struct addrinfo hints =
+    {
+      .ai_family = family,
+      .ai_protocol = IPPROTO_TCP,
+      .ai_socktype = SOCK_STREAM,
+    };
+
+  struct addrinfo *ai;
+  int ret = getaddrinfo ("host2.example.com", "80", &hints, &ai);
+  check_addrinfo ("host2.example.com", ai, ret,
+                  "address: STREAM/TCP 192.0.2.2 80\n"
+                  "address: STREAM/TCP 192.0.2.1 80\n");
+
+  ret = getaddrinfo ("host5.example.com", "80", &hints, &ai);
+  check_addrinfo ("host5.example.com", ai, ret,
+                  "address: STREAM/TCP 192.0.2.1 80\n"
+                  "address: STREAM/TCP 192.0.2.5 80\n");
+
+  ret = getaddrinfo ("www.example.com", "80", &hints, &ai);
+  check_addrinfo ("www.example.com", ai, ret,
+                  "address: STREAM/TCP 192.0.2.80 80\n");
+
+  ret = getaddrinfo ("www1.example.com", "80", &hints, &ai);
+  check_addrinfo ("www1.example.com", ai, ret,
+                  "address: STREAM/TCP 192.0.2.81 80\n");
+}
+
+/* Subprocess routine for gethostbyname/getaddrinfo testing.  */
+static void
+subprocess_gethost (void *closure)
+{
+  xchroot (chroot_env->path_chroot);
+
+  /* This tests enlarging the read buffer in the multi case.  */
+  struct xmemstream expected;
+  xopen_memstream (&expected);
+  fputs ("name: host2.example.com\n", expected.out);
+  for (int j = 1; j < name_count; ++j)
+    /* NB: host2 is duplicated in the alias list.  */
+    fprintf (expected.out, "alias: host%d.example.com\n", j);
+  fputs ("alias: host0.example.com\n"
+         "address: 192.0.2.2\n"
+         "address: 192.0.2.1\n",
+         expected.out);
+  xfclose_memstream (&expected);
+  check_hostent ("host2.example.com",
+                 gethostbyname ("host2.example.com"),
+                 expected.buffer);
+  free (expected.buffer);
+
+  /* Similarly, but with a different order in the /etc/hosts file.  */
+  xopen_memstream (&expected);
+  fputs ("name: host0.example.com\n", expected.out);
+  for (int j = 1; j < name_count; ++j)
+    fprintf (expected.out, "alias: host%d.example.com\n", j);
+  /* NB: host5 is duplicated in the alias list.  */
+  fputs ("alias: host5.example.com\n"
+         "address: 192.0.2.1\n"
+         "address: 192.0.2.5\n",
+         expected.out);
+  xfclose_memstream (&expected);
+  check_hostent ("host5.example.com",
+                 gethostbyname ("host5.example.com"),
+                 expected.buffer);
+  free (expected.buffer);
+
+  check_hostent ("www.example.com",
+                 gethostbyname ("www.example.com"),
+                 host4_expected);
+  check_hostent ("www1.example.com",
+                 gethostbyname ("www1.example.com"),
+                 host6_expected);
+
+  test_gai (AF_INET);
+  test_gai (AF_UNSPEC);
+}
+
+static int
+do_test (void)
+{
+  support_become_root ();
+  if (!support_can_chroot ())
+    return EXIT_UNSUPPORTED;
+
+  __nss_configure_lookup ("hosts", "files");
+  if (dlopen (LIBNSS_FILES_SO, RTLD_LAZY) == NULL)
+    FAIL_EXIT1 ("could not load " LIBNSS_DNS_SO ": %s", dlerror ());
+
+  /* Each name takes about 20 bytes, so this covers a wide range of
+     buffer sizes, from less than 1000 bytes to about 18000 bytes.  */
+  for (name_count = 40; name_count <= 850; ++name_count)
+    {
+      write_hosts ();
+
+      for (do_stayopen = -1; do_stayopen < 2; ++do_stayopen)
+        for (do_endent = 0; do_endent < 2; ++do_endent)
+          {
+            if (test_verbose > 0)
+              printf ("info: name_count=%d do_stayopen=%d do_endent=%d\n",
+                      name_count, do_stayopen, do_endent);
+            support_isolate_in_subprocess (subprocess_getent, NULL);
+          }
+
+      support_isolate_in_subprocess (subprocess_gethost, NULL);
+    }
+
+  support_chroot_free (chroot_env);
+  return 0;
+}
+
+#define PREPARE prepare
+#include <support/test-driver.c>

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]