This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix handling of collating symbols in regexps


On 02/07/2013 04:17 PM, Roland McGrath wrote:
> Paul, is there a downside to the wrappers approach?

No, that should work and it sounds better.

To answer your earlier question, the ABI change alters the
layouts of struct re_pattern_buffer aka regex_t,
struct re_registers, regmatch_t, and regoff_t; and it alters
the calling conventions of re_search, re_search_2, re_match,
re_match_2, re_set_registers, re_compile_pattern, re_compile_fastmap,
regcomp, regexec, regerror, and regfree.  Here is the relevant
part of the diff to regex.h:

--- glibc/posix/regex.h	2013-01-03 09:40:24.130105228 -0800
+++ gnulib/lib/regex.h	2013-02-03 21:51:40.068080524 -0800
...
+#ifdef _REGEX_LARGE_OFFSETS
+
+/* Use types and values that are wide enough to represent signed and
+   unsigned byte offsets in memory.  This currently works only when
+   the regex code is used outside of the GNU C library; it is not yet
+   supported within glibc itself, and glibc users should not define
+   _REGEX_LARGE_OFFSETS.  */
+
+/* The type of nonnegative object indexes.  Traditionally, GNU regex
+   uses 'int' for these.  Code that uses __re_idx_t should work
+   regardless of whether the type is signed.  */
+typedef size_t __re_idx_t;
+
+/* The type of object sizes.  */
+typedef size_t __re_size_t;
+
+/* The type of object sizes, in places where the traditional code
+   uses unsigned long int.  */
+typedef size_t __re_long_size_t;
+
+#else
+
+/* The traditional GNU regex implementation mishandles strings longer
+   than INT_MAX.  */
+typedef int __re_idx_t;
+typedef unsigned int __re_size_t;
+typedef unsigned long int __re_long_size_t;
+
+#endif
+
 /* The following two types have to be signed and unsigned integer type
    wide enough to hold a value of a pointer.  For most ANSI compilers
    ptrdiff_t and size_t should be likely OK.  Still size of these two
...
@@ -356,16 +422,15 @@
 
 struct re_pattern_buffer
 {
-  /* Space that holds the compiled pattern.  It is declared as
-     `unsigned char *' because its elements are sometimes used as
-     array indexes.  */
-  unsigned char *__REPB_PREFIX(buffer);
+  /* Space that holds the compiled pattern.  The type
+     'struct re_dfa_t' is private and is not declared here.  */
+  struct re_dfa_t *__REPB_PREFIX(buffer);
 
   /* Number of bytes to which `buffer' points.  */
-  unsigned long int __REPB_PREFIX(allocated);
+  __re_long_size_t __REPB_PREFIX(allocated);
 
   /* Number of bytes actually used in `buffer'.  */
-  unsigned long int __REPB_PREFIX(used);
+  __re_long_size_t __REPB_PREFIX(used);
 
   /* Syntax setting with which the pattern was compiled.  */
   reg_syntax_t __REPB_PREFIX(syntax);
...
@@ -423,7 +488,16 @@
 typedef struct re_pattern_buffer regex_t;
 
 /* Type for byte offsets within the string.  POSIX mandates this.  */
+#ifdef _REGEX_LARGE_OFFSETS
+/* POSIX 1003.1-2008 requires that regoff_t be at least as wide as
+   ptrdiff_t and ssize_t.  We don't know of any hosts where ptrdiff_t
+   is wider than ssize_t, so ssize_t is safe.  */
+typedef ssize_t regoff_t;
+#else
+/* The traditional GNU regex implementation mishandles strings longer
+   than INT_MAX.  */
 typedef int regoff_t;
+#endif
 
 
 #ifdef __USE_GNU
@@ -431,7 +505,7 @@
    regex.texinfo for a full description of what registers match.  */
 struct re_registers
 {
-  unsigned num_regs;
+  __re_size_t num_regs;
   regoff_t *start;
   regoff_t *end;
 };
@@ -485,47 +559,52 @@
    characters.  Return the starting position of the match, -1 for no
    match, or -2 for an internal error.  Also return register
    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
-extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
-		      int __length, int __start, int __range,
+extern regoff_t re_search (struct re_pattern_buffer *__buffer,
+			   const char *__string, __re_idx_t __length,
+			   __re_idx_t __start, regoff_t __range,
 		      struct re_registers *__regs);
 
 
 /* Like `re_search', but search in the concatenation of STRING1 and
    STRING2.  Also, stop searching at index START + STOP.  */
-extern int re_search_2 (struct re_pattern_buffer *__buffer,
-			const char *__string1, int __length1,
-			const char *__string2, int __length2, int __start,
-			int __range, struct re_registers *__regs, int __stop);
+extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
+			     const char *__string1, __re_idx_t __length1,
+			     const char *__string2, __re_idx_t __length2,
+			     __re_idx_t __start, regoff_t __range,
+			     struct re_registers *__regs,
+			     __re_idx_t __stop);
 
 
 /* Like `re_search', but return how many characters in STRING the regexp
    in BUFFER matched, starting at position START.  */
-extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
-		     int __length, int __start, struct re_registers *__regs);
+extern regoff_t re_match (struct re_pattern_buffer *__buffer,
+			  const char *__string, __re_idx_t __length,
+			  __re_idx_t __start, struct re_registers *__regs);
 
 
 /* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
-extern int re_match_2 (struct re_pattern_buffer *__buffer,
-		       const char *__string1, int __length1,
-		       const char *__string2, int __length2, int __start,
-		       struct re_registers *__regs, int __stop);
+extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
+			    const char *__string1, __re_idx_t __length1,
+			    const char *__string2, __re_idx_t __length2,
+			    __re_idx_t __start, struct re_registers *__regs,
+			    __re_idx_t __stop);
 
 
 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
    ENDS.  Subsequent matches using BUFFER and REGS will use this memory
    for recording register information.  STARTS and ENDS must be
    allocated with malloc, and must each be at least `NUM_REGS * sizeof
    (regoff_t)' bytes long.
 
    If NUM_REGS == 0, then subsequent matches should allocate their own
    register data.
 
    Unless this function is called, the first search or match using
    PATTERN_BUFFER will allocate its own register data, without
    freeing the old data.  */
 extern void re_set_registers (struct re_pattern_buffer *__buffer,
 			      struct re_registers *__regs,
-			      unsigned int __num_regs,
+			      __re_size_t __num_regs,
 			      regoff_t *__starts, regoff_t *__ends);
 #endif	/* Use GNU */
 
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]