This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] More regex microoptimization


Hi.  This further series of speedups results in an improvement of
6-8% on all the single-byte testcases (no backreferences, single-
byte character sets w/o collation symbols) I tried.  They can be
categorized as follows:

1) add several __builtin_expect predictions.  Most are that
mb_cur_max == 1, that matching is case sensitive and no translation
table is used.  This is true for all three of sed, awk and grep
by default.  Every group of predictions was checked one by one.

2) streamline more of transit_state.  More code was moved out of it
into check_matching; in addition, I decided to penalize the 512-entry
transition table case in exchange for skipping the test of a flag
in the common case -- the penalized case is already much slower due
to calling iswalnum.  This is also the reason for the changes in
regex_internal.h and in build_trtable.

3) use a switch statement to pick one of the several incantations
of fastmap scanning.  This wins especially when many matches are
attempted but most fail, e.g. for regexes starting with a period.

Paolo

2004-03-05  Paolo Bonzini  <bonzini@gnu.org>

	* posix/regex_internal.c (free_state): Free the
	word_trtable if it is used.
	(re_string_reconstruct, re_string_context_at): Add
	several branch predictions for mb_cur_max == 1,
	case-sensitive matching and no transition table being used.
	* posix/regex_internal.h (re_dfastate_t): Turn the
	word_trtable from a 1-bit flag into a pointer to a
	transition table.
	* posix/regexec.c (acquire_init_state_context): Do not
	always inline.
	* posix/regexec.c (build_trtable): Store the transition
	table into state.  Return a boolean indicating success.
	(transit_state): Check trtable and word_trtable separately,
	instead of looking into word_trtable for information about
	the trtable's contents.  Remove the check for out-of-bounds
	buffers.
	(check_matching): Check here for out-of-bounds buffers.
	(match_ctx_free_subtops): Remove, merge into...
	(match_ctx_clean): ... this function.
	(match_ctx_free): Call match_ctx_clean.
	(re_search_internal): Store into match_kind a set of bits
	indicating which incantation of fastmap scanning must be
	used.  Use a switch statement instead of multiple ifs.
	Exit the final "for (;;)" with goto free_return unless
	the match succeeded, thus simplifying some conditionals.


diff -u save/regex_internal.c ./regex_internal.c
--- save/regex_internal.c	2004-03-10 12:28:08.000000000 +0100
+++ ./regex_internal.c	2004-03-10 12:28:35.000000000 +0100
@@ -581,7 +581,7 @@
      int idx, eflags;
 {
   int offset = idx - pstr->raw_mbs_idx;
-  if (offset < 0)
+  if (BE (offset < 0, 0))
     {
       /* Reset buffer.  */
 #ifdef RE_ENABLE_I18N
@@ -601,10 +601,10 @@
       offset = idx;
     }
 
-  if (offset != 0)
+  if (BE (offset != 0, 1))
     {
       /* Are the characters which are already checked remain?  */
-      if (offset < pstr->valid_raw_len
+      if (BE (offset < pstr->valid_raw_len, 1)
 #ifdef RE_ENABLE_I18N
 	  /* Handling this would enlarge the code too much.
 	     Accept a slowdown in that case.  */
@@ -615,11 +615,11 @@
 	  /* Yes, move them to the front of the buffer.  */
 	  pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
 #ifdef RE_ENABLE_I18N
-	  if (pstr->mb_cur_max > 1)
+	  if (BE (pstr->mb_cur_max, 1) > 1)
 	    memmove (pstr->wcs, pstr->wcs + offset,
 		     (pstr->valid_len - offset) * sizeof (wint_t));
 #endif /* RE_ENABLE_I18N */
-	  if (pstr->mbs_allocated)
+	  if (BE (pstr->mbs_allocated, 0))
 	    memmove (pstr->mbs, pstr->mbs + offset,
 		     pstr->valid_len - offset);
 	  pstr->valid_len -= offset;
@@ -717,7 +717,7 @@
 				      ? CONTEXT_NEWLINE : 0));
 	    }
 	}
-      if (!pstr->mbs_allocated)
+      if (!BE (pstr->mbs_allocated, 0))
 	pstr->mbs += offset;
     }
   pstr->raw_mbs_idx = idx;
@@ -726,7 +726,7 @@
 
   /* Then build the buffers.  */
 #ifdef RE_ENABLE_I18N
-  if (pstr->mb_cur_max > 1)
+  if (BE (pstr->mb_cur_max, 1) > 1)
     {
       if (pstr->icase)
 	{
@@ -739,16 +739,17 @@
     }
   else
 #endif /* RE_ENABLE_I18N */
+  if (BE (pstr->mbs_allocated, 0))
     {
       if (pstr->icase)
 	build_upper_buffer (pstr);
       else if (pstr->trans != NULL)
 	re_string_translate_buffer (pstr);
-      else
-	pstr->valid_len = pstr->len;
     }
-  pstr->cur_idx = 0;
+  else
+    pstr->valid_len = pstr->len;
 
+  pstr->cur_idx = 0;
   return REG_NOERROR;
 }
 
@@ -846,18 +847,17 @@
      int idx, eflags;
 {
   int c;
-  if (idx < 0 || idx == input->len)
-    {
-      if (idx < 0)
-	/* In this case, we use the value stored in input->tip_context,
-	   since we can't know the character in input->mbs[-1] here.  */
-	return input->tip_context;
-      else /* (idx == input->len) */
-	return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
-		: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
-    }
+  if (BE (idx < 0, 0))
+    /* In this case, we use the value stored in input->tip_context,
+       since we can't know the character in input->mbs[-1] here.  */
+    return input->tip_context;
+
+  else if (BE (idx == input->len, 0))
+    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+
 #ifdef RE_ENABLE_I18N
-  if (input->mb_cur_max > 1)
+  else if (BE (input->mb_cur_max, 1) > 1)
     {
       wint_t wc;
       int wc_idx = idx;
@@ -1650,5 +1650,6 @@
     }
   re_node_set_free (&state->nodes);
   re_free (state->trtable);
+  re_free (state->word_trtable);
   re_free (state);
 }
diff -u save/regex_internal.h ./regex_internal.h
--- save/regex_internal.h	2004-03-10 12:28:08.000000000 +0100
+++ ./regex_internal.h	2004-03-10 12:28:35.000000000 +0100
@@ -478,6 +478,7 @@
   re_node_set nodes;
   re_node_set *entrance_nodes;
   struct re_dfastate_t **trtable;
+  struct re_dfastate_t **word_trtable;
   unsigned int context : 4;
   unsigned int halt : 1;
   /* If this state can accept `multi byte'.
@@ -487,7 +488,6 @@
   /* If this state has backreference node(s).  */
   unsigned int has_backref : 1;
   unsigned int has_constraint : 1;
-  unsigned int word_trtable : 1;
 };
 typedef struct re_dfastate_t re_dfastate_t;
 
diff -u save/regexec.c ./regexec.c
--- save/regexec.c	2004-03-10 12:28:08.000000000 +0100
+++ ./regexec.c	2004-03-10 12:31:41.000000000 +0100
@@ -22,8 +22,6 @@
 				     int n) internal_function;
 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
 static void match_ctx_free (re_match_context_t *cache) internal_function;
-static void match_ctx_free_subtops (re_match_context_t *mctx)
-     internal_function;
 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
 					  int str_idx, int from, int to)
      internal_function;
@@ -57,7 +55,7 @@
 			      int nregs, int regs_allocated) internal_function;
 static inline re_dfastate_t *acquire_init_state_context
      (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
-     __attribute ((always_inline)) internal_function;
+     internal_function;
 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
      internal_function;
 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
@@ -172,8 +170,8 @@
 					 re_node_set *cur_nodes, int cur_str,
 					 int last_str, int subexp_num,
 					 int type) internal_function;
-static re_dfastate_t **build_trtable (re_dfa_t *dfa,
-				      re_dfastate_t *state) internal_function;
+static int build_trtable (re_dfa_t *dfa,
+			  re_dfastate_t *state) internal_function;
 #ifdef RE_ENABLE_I18N
 static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
 				    const re_string_t *input, int idx) internal_function;
@@ -603,15 +601,16 @@
   reg_errcode_t err;
   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
   int left_lim, right_lim, incr;
-  int fl_longest_match, match_first, match_last = -1;
-  int fast_translate, sb;
+  int fl_longest_match, match_first, match_kind, match_last = -1;
+  int fast_translate, sb, ch;
 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
   re_match_context_t mctx = { .dfa = dfa };
 #else
   re_match_context_t mctx;
 #endif
-  char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
-		    && range && !preg->can_be_null) ? preg->fastmap : NULL);
+  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
+  unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
 
 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
   memset (&mctx, '\0', sizeof (re_match_context_t));
@@ -684,88 +683,97 @@
   left_lim = (range < 0) ? start + range : start;
   right_lim = (range < 0) ? start : start + range;
   sb = dfa->mb_cur_max == 1;
-  fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
+  match_kind = 
+    (fastmap ? 8 : 0)
+    | (sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+    | (range >= 0 ? 2 : 0)
+    | (t != NULL ? 1 : 0);
 
-  for (;;)
+  for (;; match_first += incr)
     {
-      /* At first get the current byte from input string.  */
-      if (fastmap)
-	{
-	  if (BE (fast_translate, 1))
-	    {
-	      unsigned RE_TRANSLATE_TYPE t
-		= (unsigned RE_TRANSLATE_TYPE) preg->translate;
-	      if (BE (range >= 0, 1))
-		{
-		  if (BE (t != NULL, 0))
-		    {
-		      while (BE (match_first < right_lim, 1)
-			     && !fastmap[t[(unsigned char) string[match_first]]])
-			++match_first;
-		    }
-		  else
-		    {
-		      while (BE (match_first < right_lim, 1)
-			     && !fastmap[(unsigned char) string[match_first]])
-			++match_first;
-		    }
-		  if (BE (match_first == right_lim, 0))
-		    {
-		      int ch = match_first >= length
-			       ? 0 : (unsigned char) string[match_first];
-		      if (!fastmap[t ? t[ch] : ch])
-			break;
-		    }
-		}
-	      else
-		{
-		  while (match_first >= left_lim)
-		    {
-		      int ch = match_first >= length
-			       ? 0 : (unsigned char) string[match_first];
-		      if (fastmap[t ? t[ch] : ch])
-			break;
-		      --match_first;
-		    }
-		  if (match_first < left_lim)
-		    break;
-		}
+      err = REG_NOMATCH;
+      if (match_first < left_lim || right_lim < match_first)
+	goto free_return;
+
+      /* Advance as rapidly as possible through the string, until we
+	 find a plausible place to start matching.  This may be done
+	 with varying efficiency, so there are various possibilities:
+	 only the most common of them are specialized, in order to
+	 save on code size.  We use a switch statement for speed.  */
+      switch (match_kind)
+	{
+	case 0: case 1: case 2: case 3:
+	case 4: case 5: case 6: case 7:
+	  /* No fastmap.  */
+	  break;
+
+	case 15:
+	  /* Fastmap with single-byte translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[t[(unsigned char) string[match_first]]])
+	    ++match_first;
+	  goto forward_match_found_start_or_reached_end;
+
+	case 14:
+	  /* Fastmap without translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[(unsigned char) string[match_first]])
+	    ++match_first;
+
+	forward_match_found_start_or_reached_end:
+	  if (BE (match_first == right_lim, 0))
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (!fastmap[t ? t[ch] : ch])
+		goto free_return;
 	    }
-	  else
-	    {
-	      int ch;
+	  break;
 
-	      do
+	case 12:
+	case 13:
+	  /* Fastmap without multi-byte translation, match backwards.  */
+	  while (match_first >= left_lim)
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (fastmap[t ? t[ch] : ch])
+		break;
+	      --match_first;
+	    }
+	  if (match_first < left_lim)
+	    goto free_return;
+	  break;
+	  
+	default:
+	  /* In this case, we can't determine easily the current byte,
+	     since it might be a component byte of a multibyte
+	     character.  Then we use the constructed buffer instead.  */
+	  do
+	    {
+	      /* If MATCH_FIRST is out of the valid range, reconstruct the
+		 buffers.  */
+	      if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len <= match_first
+		  || match_first < mctx.input.raw_mbs_idx)
 		{
-		  /* In this case, we can't determine easily the current byte,
-		     since it might be a component byte of a multibyte
-		     character.  Then we use the constructed buffer
-		     instead.  */
-		  /* If MATCH_FIRST is out of the valid range, reconstruct the
-		     buffers.  */
-		  if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len
-		      <= match_first
-		      || match_first < mctx.input.raw_mbs_idx)
-		    {
-		      err = re_string_reconstruct (&mctx.input, match_first,
-						   eflags);
-		      if (BE (err != REG_NOERROR, 0))
-			goto free_return;
-		    }
-		  /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
-		     Note that MATCH_FIRST must not be smaller than 0.  */
-		  ch = ((match_first >= length) ? 0
-		       : re_string_byte_at (&mctx.input,
-					    match_first
-					    - mctx.input.raw_mbs_idx));
-		  if (fastmap[ch])
-		    break;
-		  match_first += incr;
+		  err = re_string_reconstruct (&mctx.input, match_first,
+					       eflags);
+		  if (BE (err != REG_NOERROR, 0))
+		    goto free_return;
 		}
-	      while (match_first >= left_lim && match_first <= right_lim);
-	      if (! fastmap[ch])
+	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+		 Note that MATCH_FIRST must not be smaller than 0.  */
+	      ch = ((match_first >= length) ? 0
+		    : re_string_byte_at (&mctx.input,
+					 match_first - mctx.input.raw_mbs_idx));
+	      if (fastmap[ch])
 		break;
+	      match_first += incr;
 	    }
+	  while (match_first >= left_lim && match_first <= right_lim);
+	  if (!fastmap[ch])
+	    goto free_return;
+	  break;
 	}
 
       /* Reconstruct the buffers so that the matcher can assume that
@@ -773,57 +781,60 @@
       err = re_string_reconstruct (&mctx.input, match_first, eflags);
       if (BE (err != REG_NOERROR, 0))
 	goto free_return;
+
 #ifdef RE_ENABLE_I18N
-     /* Eliminate it when it is a component of a multibyte character
-	 and isn't the head of a multibyte character.  */
-      if (sb || re_string_first_byte (&mctx.input, 0))
+     /* Don't consider this char as a possible match start if it part,
+	yet isn't the head, of a multibyte character.  */
+      if (!sb && !re_string_first_byte (&mctx.input, 0))
+	continue;
 #endif
+
+      /* It seems to be appropriate one, then use the matcher.  */
+      /* We assume that the matching starts from 0.  */
+      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+      match_last = check_matching (&mctx, fl_longest_match,
+				   range >= 0 ? &match_first : NULL);
+      if (match_last != -1)
 	{
-	  /* It seems to be appropriate one, then use the matcher.  */
-	  /* We assume that the matching starts from 0.  */
-	  mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
-	  match_last = check_matching (&mctx, fl_longest_match,
-				       range >= 0 ? &match_first : NULL);
-	  if (match_last != -1)
+	  if (BE (match_last == -2, 0))
 	    {
-	      if (BE (match_last == -2, 0))
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  else
+	    {
+	      mctx.match_last = match_last;
+	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 		{
-		  err = REG_ESPACE;
-		  goto free_return;
+		  re_dfastate_t *pstate = mctx.state_log[match_last];
+		  mctx.last_node = check_halt_state_context (&mctx, pstate,
+							     match_last);
 		}
-	      else
+	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+		  || dfa->nbackref)
 		{
-		  mctx.match_last = match_last;
-		  if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
-		    {
-		      re_dfastate_t *pstate = mctx.state_log[match_last];
-		      mctx.last_node = check_halt_state_context (&mctx, pstate,
-								 match_last);
-		    }
-		  if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
-		      || dfa->nbackref)
-		    {
-		      err = prune_impossible_nodes (&mctx);
-		      if (err == REG_NOERROR)
-			break;
-		      if (BE (err != REG_NOMATCH, 0))
-			goto free_return;
-		      match_last = -1;
-		    }
-		  else
-		    break; /* We found a match.  */
+		  err = prune_impossible_nodes (&mctx);
+		  if (err == REG_NOERROR)
+		    break;
+		  if (BE (err != REG_NOMATCH, 0))
+		    goto free_return;
+		  match_last = -1;
 		}
+	      else
+		break; /* We found a match.  */
 	    }
-	  match_ctx_clean (&mctx);
 	}
-      /* Update counter.  */
-      match_first += incr;
-      if (match_first < left_lim || right_lim < match_first)
-	break;
+
+      match_ctx_clean (&mctx);
     }
 
+#ifdef DEBUG
+  assert (match_last != -1);
+  assert (err == REG_NOERROR);
+#endif
+
   /* Set pmatch[] if we need.  */
-  if (match_last != -1 && nmatch > 0)
+  if (nmatch > 0)
     {
       int reg_idx;
 
@@ -868,7 +879,7 @@
 	    pmatch[reg_idx].rm_eo += match_first;
 	  }
     }
-  err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
+
  free_return:
   re_free (mctx.state_log);
   if (dfa->nbackref)
@@ -1073,6 +1084,20 @@
   while (!re_string_eoi (&mctx->input))
     {
       re_dfastate_t *old_state = cur_state;
+      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+          || (BE (next_char_idx >= mctx->input.valid_len, 0)
+              && mctx->input.valid_len < mctx->input.len))
+        {
+          err = extend_buffers (mctx);
+          if (BE (err != REG_NOERROR, 0))
+	    {
+	      assert (err == REG_ESPACE);
+	      return -2;
+	    }
+        }
+
       cur_state = transit_state (&err, mctx, cur_state);
       if (mctx->state_log != NULL)
 	cur_state = merge_state_with_log (&err, mctx, cur_state);
@@ -1091,10 +1116,10 @@
 	    break;
 	}
 
-      if (at_init_state)
+      if (BE (at_init_state, 0))
 	{
 	  if (old_state == cur_state)
-	    next_start_idx = re_string_cur_idx (&mctx->input);
+	    next_start_idx = next_char_idx;
 	  else
 	    at_init_state = 0;
 	}
@@ -1110,13 +1135,16 @@
 	      /* We found an appropriate halt state.  */
 	      match_last = re_string_cur_idx (&mctx->input);
 	      match = 1;
+
+	      /* We found a match, do not modify match_first below.  */
+	      p_match_first = NULL;
 	      if (!fl_longest_match)
 		break;
 	    }
 	}
-   }
+    }
 
-  if (match_last == -1 && p_match_first)
+  if (p_match_first)
     *p_match_first += next_start_idx;
 
   return match_last;
@@ -2168,22 +2196,12 @@
      re_match_context_t *mctx;
      re_dfastate_t *state;
 {
-  re_dfa_t *const dfa = mctx->dfa;
   re_dfastate_t **trtable;
   unsigned char ch;
 
-  if (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.bufs_len
-      || (re_string_cur_idx (&mctx->input) + 1 >= mctx->input.valid_len
-	  && mctx->input.valid_len < mctx->input.len))
-    {
-      *err = extend_buffers (mctx);
-      if (BE (*err != REG_NOERROR, 0))
-	return NULL;
-    }
-
 #ifdef RE_ENABLE_I18N
       /* If the current state can accept multibyte.  */
-      if (state->accept_mb)
+      if (BE (state->accept_mb, 0))
 	{
 	  *err = transit_state_mb (mctx, state);
 	  if (BE (*err != REG_NOERROR, 0))
@@ -2194,32 +2212,34 @@
   /* Then decide the next state with the single byte.  */
   if (1)
     {
-      /* Use transition table  */
+      /* Use transition table.  Sorry for the goto, but we really need
+         to squeeze every single instruction here.  */
       ch = re_string_fetch_byte (&mctx->input);
+ 
+    retry:
       trtable = state->trtable;
-      if (trtable == NULL)
+      if (BE (trtable != NULL, 1))
+        return trtable[ch];
+
+      trtable = state->word_trtable;
+      if (BE (trtable != NULL, 1))
         {
-          trtable = build_trtable (dfa, state);
-          if (trtable == NULL)
-	    {
-	      *err = REG_ESPACE;
-	      return NULL;
-	    }
-	}
-      if (BE (state->word_trtable, 0))
+          unsigned int context;
+          context = re_string_context_at (&mctx->input,
+                                          re_string_cur_idx (&mctx->input) - 1,
+                                          mctx->eflags);
+          if (IS_WORD_CONTEXT (context))
+            return trtable[ch + SBC_MAX];
+          else
+            return trtable[ch];
+        }
+
+      if (!build_trtable (mctx->dfa, state))
         {
-	  unsigned int context;
-	  context
-	    = re_string_context_at (&mctx->input,
-				    re_string_cur_idx (&mctx->input) - 1,
-				    mctx->eflags);
-	  if (IS_WORD_CONTEXT (context))
-	    return trtable[ch + SBC_MAX];
-	  else
-	    return trtable[ch];
-	}
-      else
-	return trtable[ch];
+          *err = REG_ESPACE;
+          return NULL;
+        }
+      goto retry;
     }
 #if 0
   else
@@ -3228,7 +3248,7 @@
 /* Build transition table for the state.
    Return the new table if succeeded, otherwise return NULL.  */
 
-static re_dfastate_t **
+static int
 build_trtable (dfa, state)
     re_dfa_t *dfa;
     re_dfastate_t *state;
@@ -3238,6 +3258,7 @@
   unsigned int elem, mask;
   int dests_node_malloced = 0, dest_states_malloced = 0;
   int ndests; /* Number of the destination states from `state'.  */
+  int need_word_trtable = 0;
   re_dfastate_t **trtable;
   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
   re_node_set follows, *dests_node;
@@ -3258,14 +3279,11 @@
       dests_node = (re_node_set *)
 		   malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
       if (BE (dests_node == NULL, 0))
-	return NULL;
+	return 0;
       dests_node_malloced = 1;
     }
   dests_ch = (bitset *) (dests_node + SBC_MAX);
 
-  /* Initialize transiton table.  */
-  state->word_trtable = 0;
-
   /* At first, group all nodes belonging to `state' into several
      destinations.  */
   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
@@ -3273,14 +3291,12 @@
     {
       if (dests_node_malloced)
 	free (dests_node);
-      /* Return NULL in case of an error, trtable otherwise.  */
       if (ndests == 0)
-	{
-	  state->trtable = (re_dfastate_t **)
-	    calloc (sizeof (re_dfastate_t *), SBC_MAX);;
-	  return state->trtable;
-	}
-      return NULL;
+	state->trtable = (re_dfastate_t **)
+	  calloc (sizeof (re_dfastate_t *), SBC_MAX);;
+
+      /* Return 0 in case of an error, 1 otherwise.  */
+      return state->trtable != NULL;
     }
 
   err = re_node_set_alloc (&follows, ndests + 1);
@@ -3307,7 +3323,7 @@
 	    re_node_set_free (dests_node + i);
 	  if (dests_node_malloced)
 	    free (dests_node);
-	  return NULL;
+	  return 0;
 	}
       dest_states_malloced = 1;
     }
@@ -3343,9 +3359,11 @@
 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
 	    goto out_free;
 
+#ifdef RE_ENABLE_I18N
 	  if (dest_states[i] != dest_states_word[i]
 	      && dfa->mb_cur_max > 1)
-	    state->word_trtable = 1;
+	    need_word_trtable = 1;
+#endif
 
 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
 							CONTEXT_NEWLINE);
@@ -3360,7 +3378,7 @@
       bitset_merge (acceptable, dests_ch[i]);
     }
 
-  if (!BE (state->word_trtable, 0))
+  if (!BE (need_word_trtable, 0))
     {
       /* We don't care about whether the following character is a word
 	 character, or we are in a single-byte character set so we can
@@ -3389,6 +3407,7 @@
 		trtable[ch] = dest_states[j];
 	    }
     }
+#ifdef RE_ENABLE_I18N
   else
     {
       /* We care about whether the following character is a word
@@ -3418,6 +3437,7 @@
 	      trtable[ch + SBC_MAX] = dest_states_word[j];
 	    }
     }
+#endif
 
   /* new line */
   if (bitset_contain (acceptable, NEWLINE_CHAR))
@@ -3428,7 +3448,7 @@
 	  {
 	    /* k-th destination accepts newline character.  */
 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
-	    if (state->word_trtable)
+	    if (need_word_trtable)
 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
 	    /* There must be only one destination which accepts
 	       newline.  See group_nodes_into_DFAstates.  */
@@ -3446,8 +3466,12 @@
   if (dests_node_malloced)
     free (dests_node);
 
-  state->trtable = trtable;
-  return trtable;
+  if (need_word_trtable)
+    state->word_trtable = trtable;
+  else
+    state->trtable = trtable;
+
+  return 1;
 }
 
 /* Group all nodes belonging to STATE into several destinations.
@@ -4079,28 +4103,6 @@
 match_ctx_clean (mctx)
     re_match_context_t *mctx;
 {
-  match_ctx_free_subtops (mctx);
-  mctx->nsub_tops = 0;
-  mctx->nbkref_ents = 0;
-}
-
-/* Free all the memory associated with MCTX.  */
-
-static void
-match_ctx_free (mctx)
-    re_match_context_t *mctx;
-{
-  match_ctx_free_subtops (mctx);
-  re_free (mctx->sub_tops);
-  re_free (mctx->bkref_ents);
-}
-
-/* Free all the memory associated with MCTX->SUB_TOPS.  */
-
-static void
-match_ctx_free_subtops (mctx)
-     re_match_context_t *mctx;
-{
   int st_idx;
   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
     {
@@ -4120,8 +4122,24 @@
 	}
       free (top);
     }
+
+  mctx->nsub_tops = 0;
+  mctx->nbkref_ents = 0;
 }
 
+/* Free all the memory associated with MCTX.  */
+
+static void
+match_ctx_free (mctx)
+    re_match_context_t *mctx;
+{
+  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
+  match_ctx_clean (mctx);
+  re_free (mctx->sub_tops);
+  re_free (mctx->bkref_ents);
+}
+
+
 /* Add a new backreference entry to MCTX.
    Note that we assume that caller never call this function with duplicate
    entry, and call with STR_IDX which isn't smaller than any existing entry.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]