This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] nss_files: Avoid large buffers with many host addresses [BZ #22078]
- From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- To: libc-alpha at sourceware dot org
- Date: Tue, 5 Sep 2017 15:40:27 -0300
- Subject: Re: [PATCH] nss_files: Avoid large buffers with many host addresses [BZ #22078]
- Authentication-results: sourceware.org; auth=none
- References: <20170904173210.3CE87439942E3@oldenburg.str.redhat.com>
On 04/09/2017 14:32, Florian Weimer wrote:
> The previous implementation had at least a quadratic space
> requirement in the number of host addresses and aliases.
>
> 2017-09-04 Florian Weimer <fweimer@redhat.com>
>
> [BZ #22078]
> Avoid large NSS buffers with many addresses, aliases.
> * nss/nss_files/files-hosts.c (gethostbyname3_multi): Rewrite
> using dynarrays and struct alloc_buffer.
> * nss/Makefile (tests): Add tst-nss-files-hosts-multi.
> (tst-nss-files-hosts-multi): Link with -ldl.
> * nss/tst-nss-files-hosts-multi.c: New file.
>
> diff --git a/nss/Makefile b/nss/Makefile
> index c9a5200f96..ecf3530188 100644
> --- a/nss/Makefile
> +++ b/nss/Makefile
> @@ -63,6 +63,7 @@ xtests = bug-erange
> # Tests which need libdl
> ifeq (yes,$(build-shared))
> tests += tst-nss-files-hosts-erange
> +tests += tst-nss-files-hosts-multi
> endif
>
> # If we have a thread library then we can test cancellation against
> @@ -163,3 +164,4 @@ $(objpfx)tst-cancel-getpwuid_r: $(shared-thread-library)
> endif
>
> $(objpfx)tst-nss-files-hosts-erange: $(libdl)
> +$(objpfx)tst-nss-files-hosts-multi: $(libdl)
> diff --git a/nss/nss_files/files-hosts.c b/nss/nss_files/files-hosts.c
> index c2cd07584c..f40673056b 100644
> --- a/nss/nss_files/files-hosts.c
> +++ b/nss/nss_files/files-hosts.c
> @@ -23,6 +23,7 @@
> #include <netdb.h>
> #include <resolv/resolv-internal.h>
> #include <scratch_buffer.h>
> +#include <alloc_buffer.h>
>
>
> /* Get implementation for some internal functions. */
> @@ -116,24 +117,46 @@ DB_LOOKUP (hostbyaddr, ,,,
> }, const void *addr, socklen_t len, int af)
> #undef EXTRA_ARGS_VALUE
>
> +/* Type of the address and alias arrays. */
> +#define DYNARRAY_STRUCT array
> +#define DYNARRAY_ELEMENT char *
> +#define DYNARRAY_PREFIX array_
> +#include <malloc/dynarray-skeleton.c>
> +
This will create a 16 elements vector as default (128 bytes). Should we
aim to use the default or can we use a slight large buffer to speed up
slightly generic case (assuming generic case use more than 16 entries)?
> static enum nss_status
> gethostbyname3_multi (FILE * stream, const char *name, int af,
> struct hostent *result, char *buffer, size_t buflen,
> int *errnop, int *herrnop, int flags)
> {
> + assert (af == AF_INET || af == AF_INET6);
> +
> /* We have to get all host entries from the file. */
> struct scratch_buffer tmp_buffer;
> scratch_buffer_init (&tmp_buffer);
> struct hostent tmp_result_buf;
> - int naddrs = 1;
> - int naliases = 0;
> - char *bufferend;
> + struct array addresses;
> + array_init (&addresses);
> + struct array aliases;
> + array_init (&aliases);
> enum nss_status status;
> + bool new_data = false;
>
> - while (result->h_aliases[naliases] != NULL)
> - ++naliases;
> + /* The output buffer uses the unused space after the first
> + result. */
> + struct alloc_buffer outbuf;
Indentation seems off here.
> + {
> + /* Preserve the aliases encountered so far. */
> + size_t i;
> + for (i = 0; result->h_aliases[i] != NULL; ++i)
> + array_add (&aliases, result->h_aliases[i]);
>
> - bufferend = (char *) &result->h_aliases[naliases + 1];
> + char *bufferend = (char *) &result->h_aliases[i + 1];
> + outbuf = alloc_buffer_create (bufferend, buffer + buflen - bufferend);
> + }
> +
> + /* Preserve the addresses. */
> + for (size_t i = 0; result->h_addr_list[i] != NULL; ++i)
> + array_add (&addresses, result->h_addr_list[i]);
>
> again:
> while ((status = internal_getent (stream, &tmp_result_buf, tmp_buffer.data,
> @@ -154,110 +177,72 @@ gethostbyname3_multi (FILE * stream, const char *name, int af,
> }
> while ((matches = 0));
>
> + /* If the line matches, we need to copy the addresses and
> + aliases, so that we can reuse tmp_buffer for the next
> + line. */
> if (matches)
> {
> - /* We could be very clever and try to recycle a few bytes
> - in the buffer instead of generating new arrays. But
> - we are not doing this here since it's more work than
> - it's worth. Simply let the user provide a bit bigger
> - buffer. */
> - char **new_h_addr_list;
> - char **new_h_aliases;
> - int newaliases = 0;
> - size_t newstrlen = 0;
> - int cnt;
> + new_data = true;
>
> - /* Count the new aliases and the length of the strings. */
> - while (tmp_result_buf.h_aliases[newaliases] != NULL)
> + /* Record the addresses. */
> + for (int i = 0; tmp_result_buf.h_addr_list[i] != NULL; ++i)
> {
> - char *cp = tmp_result_buf.h_aliases[newaliases];
> - ++newaliases;
> - newstrlen += strlen (cp) + 1;
> + /* Allocate the target space in the output buffer,
> + depending on the address family. */
> + void *target;
> + if (af == AF_INET)
> + {
> + assert (tmp_result_buf.h_length == 4);
> + target = alloc_buffer_alloc (&outbuf, struct in_addr);
> + }
> + else if (af == AF_INET6)
> + {
> + assert (tmp_result_buf.h_length == 16);
> + target = alloc_buffer_alloc (&outbuf, struct in6_addr);
> + }
> + else
> + __builtin_unreachable ();
> +
> + if (target == NULL)
> + {
> + /* Request a larger output buffer. */
> + *errnop = ERANGE;
> + *herrnop = NETDB_INTERNAL;
> + status = NSS_STATUS_TRYAGAIN;
> + goto out;
> + }
> + memcpy (target, tmp_result_buf.h_addr_list[i],
> + tmp_result_buf.h_length);
> + array_add (&addresses, target);
> }
> - /* If the real name is different add it also to the
> - aliases. This means that there is a duplication
> - in the alias list but this is really the user's
> - problem. */
> - if (strcmp (old_result->h_name,
> - tmp_result_buf.h_name) != 0)
> +
> + /* Record the aliases. */
> + for (int i = 0; tmp_result_buf.h_aliases[i] != NULL; ++i)
Shouldn't it use size_t for index?
> {
> - ++newaliases;
> - newstrlen += strlen (tmp_result_buf.h_name) + 1;
> + char *alias = tmp_result_buf.h_aliases[i];
> + array_add (&aliases, alloc_buffer_copy_string (&outbuf, alias));
Shouldn't it check and bail for failed allocation for alloc_buffer_copy_string ?
> }
>
> - /* Make sure bufferend is aligned. */
> - assert ((bufferend - (char *) 0) % sizeof (char *) == 0);
> + /* If the real name is different add it also to the aliases.
> + This means that there is a duplication in the alias list
> + but this is really the user's problem. */
> + {
> + char *new_name = tmp_result_buf.h_name;
> + if (strcmp (old_result->h_name, new_name) != 0)
> + array_add (&aliases,
> + alloc_buffer_copy_string (&outbuf, new_name));
> + }
Same as before.
>
> - /* Now we can check whether the buffer is large enough.
> - 16 is the maximal size of the IP address. */
> - if (bufferend + 16 + (naddrs + 2) * sizeof (char *)
> - + roundup (newstrlen, sizeof (char *))
> - + (naliases + newaliases + 1) * sizeof (char *)
> - >= buffer + buflen)
> + /* Memory allocation failures during the expansion of the
> + temporary arrays. */
> + if (array_has_failed (&addresses) || array_has_failed (&aliases))
> {
> - *errnop = ERANGE;
> + *errnop = errno;
> *herrnop = NETDB_INTERNAL;
> - status = NSS_STATUS_TRYAGAIN;
> + status = NSS_STATUS_UNAVAIL;
> goto out;
> }
>
> - new_h_addr_list =
> - (char **) (bufferend
> - + roundup (newstrlen, sizeof (char *))
> - + 16);
> - new_h_aliases =
> - (char **) ((char *) new_h_addr_list
> - + (naddrs + 2) * sizeof (char *));
> -
> - /* Copy the old data in the new arrays. */
> - for (cnt = 0; cnt < naddrs; ++cnt)
> - new_h_addr_list[cnt] = old_result->h_addr_list[cnt];
> -
> - for (cnt = 0; cnt < naliases; ++cnt)
> - new_h_aliases[cnt] = old_result->h_aliases[cnt];
> -
> - /* Store the new strings. */
> - cnt = 0;
> - while (tmp_result_buf.h_aliases[cnt] != NULL)
> - {
> - new_h_aliases[naliases++] = bufferend;
> - bufferend = (__stpcpy (bufferend,
> - tmp_result_buf.h_aliases[cnt])
> - + 1);
> - ++cnt;
> - }
> -
> - if (cnt < newaliases)
> - {
> - new_h_aliases[naliases++] = bufferend;
> - bufferend = __stpcpy (bufferend,
> - tmp_result_buf.h_name) + 1;
> - }
> -
> - /* Final NULL pointer. */
> - new_h_aliases[naliases] = NULL;
> -
> - /* Round up the buffer end address. */
> - bufferend += (sizeof (char *)
> - - ((bufferend - (char *) 0)
> - % sizeof (char *))) % sizeof (char *);
> -
> - /* Now the new address. */
> - new_h_addr_list[naddrs++] =
> - memcpy (bufferend, tmp_result_buf.h_addr,
> - tmp_result_buf.h_length);
> -
> - /* Also here a final NULL pointer. */
> - new_h_addr_list[naddrs] = NULL;
> -
> - /* Store the new array pointers. */
> - old_result->h_aliases = new_h_aliases;
> - old_result->h_addr_list = new_h_addr_list;
> -
> - /* Compute the new buffer end. */
> - bufferend = (char *) &new_h_aliases[naliases + 1];
> - assert (bufferend <= buffer + buflen);
> -
> result = old_result;
> }
> }
> @@ -266,16 +251,52 @@ gethostbyname3_multi (FILE * stream, const char *name, int af,
> {
> if (!scratch_buffer_grow (&tmp_buffer))
> {
> + *errnop = errno;
> *herrnop = NETDB_INTERNAL;
> - status = NSS_STATUS_TRYAGAIN;
> + status = NSS_STATUS_UNAVAIL;
> }
> else
> goto again;
> }
> else
> - status = NSS_STATUS_SUCCESS;
> + {
> + /* Copy the address and alias arrays into the output buffer and
> + add NULL terminators. The pointed-to elements were directly
> + written into the output buffer above and do not need to be
> + copied again. */
> + if (new_data)
> + {
> + size_t addresses_count = array_size (&addresses);
> + size_t aliases_count = array_size (&aliases);
> + char **out_addresses = alloc_buffer_alloc_array
> + (&outbuf, char *, addresses_count + 1);
> + char **out_aliases = alloc_buffer_alloc_array
> + (&outbuf, char *, aliases_count + 1);
> + if (out_addresses == NULL || out_aliases == NULL)
> + {
> + /* The output buffer is not large enough. */
> + *errnop = ERANGE;
> + *herrnop = NETDB_INTERNAL;
> + status = NSS_STATUS_TRYAGAIN;
> + goto out;
> + }
> + memcpy (out_addresses, array_begin (&addresses),
> + addresses_count * sizeof (char *));
> + out_addresses[addresses_count] = NULL;
> + memcpy (out_aliases, array_begin (&aliases),
> + aliases_count * sizeof (char *));
> + out_aliases[aliases_count] = NULL;
> +
> + result->h_addr_list = out_addresses;
> + result->h_aliases = out_aliases;
> + }
> +
> + status = NSS_STATUS_SUCCESS;
> + }
> out:
> scratch_buffer_free (&tmp_buffer);
> + array_free (&addresses);
> + array_free (&aliases);
> return status;
> }
>
> diff --git a/nss/tst-nss-files-hosts-multi.c b/nss/tst-nss-files-hosts-multi.c
> new file mode 100644
> index 0000000000..c86a4e05d0
> --- /dev/null
> +++ b/nss/tst-nss-files-hosts-multi.c
> @@ -0,0 +1,327 @@
> +/* Parse /etc/hosts in multi mode with many addresses/aliases.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include <dlfcn.h>
> +#include <errno.h>
> +#include <gnu/lib-names.h>
> +#include <netdb.h>
> +#include <nss.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <support/check.h>
> +#include <support/check_nss.h>
> +#include <support/namespace.h>
> +#include <support/support.h>
> +#include <support/test-driver.h>
> +#include <support/test-driver.h>
> +#include <support/xmemstream.h>
> +#include <support/xstdio.h>
> +#include <support/xunistd.h>
> +#include <sys/resource.h>
> +
> +struct support_chroot *chroot_env;
> +
> +static void
> +prepare (int argc, char **argv)
> +{
> + chroot_env = support_chroot_create
> + ((struct support_chroot_configuration)
> + {
> + .resolv_conf = "",
> + .hosts = "", /* See write_hosts below. */
> + .host_conf = "multi on\n",
> + });
> +}
> +
> +/* Create the /etc/hosts file from outside the chroot. */
> +static void
> +write_hosts (int count)
> +{
> + TEST_VERIFY (count > 0 && count <= 65535);
> + FILE *fp = xfopen (chroot_env->path_hosts, "w");
> + fputs ("127.0.0.1 localhost localhost.localdomain\n"
> + "::1 localhost localhost.localdomain\n",
> + fp);
> + for (int i = 0; i < count; ++i)
> + {
> + fprintf (fp, "10.4.%d.%d www4.example.com\n",
> + (i / 256) & 0xff, i & 0xff);
> + fprintf (fp, "10.46.%d.%d www.example.com\n",
> + (i / 256) & 0xff, i & 0xff);
> + fprintf (fp, "192.0.2.1 alias.example.com v4-%d.example.com\n", i);
> + fprintf (fp, "2001:db8::6:%x www6.example.com\n", i);
> + fprintf (fp, "2001:db8::46:%x www.example.com\n", i);
> + fprintf (fp, "2001:db8::1 alias.example.com v6-%d.example.com\n", i);
> + }
> + xfclose (fp);
> +}
> +
> +/* Parameters of a single test. */
> +struct test_params
> +{
> + const char *name; /* Name to query. */
> + const char *marker; /* Address marker for the name. */
> + int count; /* Number of addresses/aliases. */
> + int family; /* AF_INET, AF_INET_6 or AF_UNSPEC. */
> + bool canonname; /* True if AI_CANONNAME should be enabled. */
> +};
> +
> +/* Expected result of gethostbyname/gethostbyname2. */
> +static char *
> +expected_ghbn (const struct test_params *params)
> +{
> + TEST_VERIFY (params->family == AF_INET || params->family == AF_INET6);
> +
> + struct xmemstream expected;
> + xopen_memstream (&expected);
> + if (strcmp (params->name, "alias.example.com") == 0)
> + {
> + fprintf (expected.out, "name: %s\n", params->name);
> + char af;
> + if (params->family == AF_INET)
> + af = '4';
> + else
> + af = '6';
> + for (int i = 0; i < params->count; ++i)
> + fprintf (expected.out, "alias: v%c-%d.example.com\n", af, i);
> +
> + for (int i = 0; i < params->count; ++i)
> + if (params->family == AF_INET)
> + fputs ("address: 192.0.2.1\n", expected.out);
> + else
> + fputs ("address: 2001:db8::1\n", expected.out);
> + }
> + else /* www/www4/www6 name. */
> + {
> + bool do_ipv4 = params->family == AF_INET
> + && strncmp (params->name, "www6", 4) != 0;
> + bool do_ipv6 = params->family == AF_INET6
> + && strncmp (params->name, "www4", 4) != 0;
> + if (do_ipv4 || do_ipv6)
> + {
> + fprintf (expected.out, "name: %s\n", params->name);
> + if (do_ipv4)
> + for (int i = 0; i < params->count; ++i)
> + fprintf (expected.out, "address: 10.%s.%d.%d\n",
> + params->marker, i / 256, i % 256);
> + if (do_ipv6)
> + for (int i = 0; i < params->count; ++i)
> + fprintf (expected.out, "address: 2001:db8::%s:%x\n",
> + params->marker, i);
> + }
> + else
> + fputs ("error: HOST_NOT_FOUND\n", expected.out);
> + }
> + xfclose_memstream (&expected);
> + return expected.buffer;
> +}
> +
> +/* Expected result of getaddrinfo. */
> +static char *
> +expected_gai (const struct test_params *params)
> +{
> + bool do_ipv4 = false;
> + bool do_ipv6 = false;
> + if (params->family == AF_UNSPEC)
> + do_ipv4 = do_ipv6 = true;
> + else if (params->family == AF_INET)
> + do_ipv4 = true;
> + else if (params->family == AF_INET6)
> + do_ipv6 = true;
> +
> + struct xmemstream expected;
> + xopen_memstream (&expected);
> + if (strcmp (params->name, "alias.example.com") == 0)
> + {
> + if (params->canonname)
> + fprintf (expected.out,
> + "flags: AI_CANONNAME\n"
> + "canonname: %s\n",
> + params->name);
> +
> + if (do_ipv4)
> + for (int i = 0; i < params->count; ++i)
> + fputs ("address: STREAM/TCP 192.0.2.1 80\n", expected.out);
> + if (do_ipv6)
> + for (int i = 0; i < params->count; ++i)
> + fputs ("address: STREAM/TCP 2001:db8::1 80\n", expected.out);
> + }
> + else /* www/www4/www6 name. */
> + {
> + if (strncmp (params->name, "www4", 4) == 0)
> + do_ipv6 = false;
> + else if (strncmp (params->name, "www6", 4) == 0)
> + do_ipv4 = false;
> + /* Otherwise, we have www as the name, so we do both. */
> +
> + if (do_ipv4 || do_ipv6)
> + {
> + if (params->canonname)
> + fprintf (expected.out,
> + "flags: AI_CANONNAME\n"
> + "canonname: %s\n",
> + params->name);
> +
> + if (do_ipv4)
> + for (int i = 0; i < params->count; ++i)
> + fprintf (expected.out, "address: STREAM/TCP 10.%s.%d.%d 80\n",
> + params->marker, i / 256, i % 256);
> + if (do_ipv6)
> + for (int i = 0; i < params->count; ++i)
> + fprintf (expected.out,
> + "address: STREAM/TCP 2001:db8::%s:%x 80\n",
> + params->marker, i);
> + }
> + else
> + fputs ("error: Name or service not known\n", expected.out);
> + }
> + xfclose_memstream (&expected);
> + return expected.buffer;
> +}
> +
> +static void
> +run_gbhn_gai (struct test_params *params)
> +{
> + char *ctx = xasprintf ("name=%s marker=%s count=%d family=%d",
> + params->name, params->marker, params->count,
> + params->family);
> + if (test_verbose > 0)
> + printf ("info: %s\n", ctx);
> +
> + /* Check gethostbyname, gethostbyname2. */
> + if (params->family == AF_INET)
> + {
> + char *expected = expected_ghbn (params);
> + check_hostent (ctx, gethostbyname (params->name), expected);
> + free (expected);
> + }
> + if (params->family != AF_UNSPEC)
> + {
> + char *expected = expected_ghbn (params);
> + check_hostent (ctx, gethostbyname2 (params->name, params->family),
> + expected);
> + free (expected);
> + }
> +
> + /* Check getaddrinfo. */
> + for (int do_canonical = 0; do_canonical < 2; ++do_canonical)
> + {
> + params->canonname = do_canonical;
> + char *expected = expected_gai (params);
> + struct addrinfo hints =
> + {
> + .ai_family = params->family,
> + .ai_socktype = SOCK_STREAM,
> + .ai_protocol = IPPROTO_TCP,
> + };
> + if (do_canonical)
> + hints.ai_flags |= AI_CANONNAME;
> + struct addrinfo *ai;
> + int ret = getaddrinfo (params->name, "80", &hints, &ai);
> + check_addrinfo (ctx, ai, ret, expected);
> + if (ret == 0)
> + freeaddrinfo (ai);
> + free (expected);
> + }
> +
> + free (ctx);
> +}
> +
> +/* Callback for the subprocess which runs the test in a chroot. */
> +static void
> +subprocess (void *closure)
> +{
> + struct test_params *params = closure;
> +
> + xchroot (chroot_env->path_chroot);
> +
> + static const int families[] = { AF_INET, AF_INET6, AF_UNSPEC, -1 };
> + static const char *const names[] =
> + {
> + "www.example.com", "www4.example.com", "www6.example.com",
> + "alias.example.com",
> + NULL
> + };
> + static const char *const names_marker[] = { "46", "4", "6", "" };
> +
> + for (int family_idx = 0; families[family_idx] >= 0; ++family_idx)
> + {
> + params->family = families[family_idx];
> + for (int names_idx = 0; names[names_idx] != NULL; ++names_idx)
> + {
> + params->name = names[names_idx];
> + params->marker = names_marker[names_idx];
> + run_gbhn_gai (params);
> + }
> + }
> +}
> +
> +/* Run the test for a specific number of addresses/aliases. */
> +static void
> +run_test (int count)
> +{
> + write_hosts (count);
> +
> + struct test_params params =
> + {
> + .count = count,
> + };
> +
> + support_isolate_in_subprocess (subprocess, ¶ms);
> +}
> +
> +static int
> +do_test (void)
> +{
I think it is missing the required support to run as a normal use:
[...]
support_become_root ();
if (!support_can_chroot ())
return EXIT_UNSUPPORTED;
[...]
> + /* This test should not use gigabytes of memory. */
> + {
> + struct rlimit limit;
> + if (getrlimit (RLIMIT_AS, &limit) != 0)
> + {
> + printf ("getrlimit (RLIMIT_AS) failed: %m\n");
> + return 1;
> + }
> + long target = 200 * 1024 * 1024;
> + if (limit.rlim_cur == RLIM_INFINITY || limit.rlim_cur > target)
> + {
> + limit.rlim_cur = target;
> + if (setrlimit (RLIMIT_AS, &limit) != 0)
> + {
> + printf ("setrlimit (RLIMIT_AS) failed: %m\n");
> + return 1;
> + }
> + }
> + }
> +
> + __nss_configure_lookup ("hosts", "files");
> + if (dlopen (LIBNSS_FILES_SO, RTLD_LAZY) == NULL)
> + FAIL_EXIT1 ("could not load " LIBNSS_DNS_SO ": %s", dlerror ());
> +
> + /* Run the tests with a few different address/alias counts. */
> + for (int count = 1; count <= 111; ++count)
> + run_test (count);
> + run_test (1111);
> + run_test (22222);
> +
> + support_chroot_free (chroot_env);
> + return 0;
> +}
> +
> +#define PREPARE prepare
> +#include <support/test-driver.c>
>