2003-12-14  Paolo Bonzini  <bonzini@gnu.org>

	* posix/regcomp.c (parse_dup_op): Process OP_DUP_PLUS,
	OP_DUP_ASTERISK, and OP_DUP_QUESTION like OP_OPEN_DUP_NUM,
	in order to lower OP_DUP_PLUS and mark subexpressions as
	OPT_SUBEXP.
	(optimize_utf8, calc_first, calc_next, calc_epsdest):
	Don't consider the OP_DUP_PLUS case.
	* posix/regexec.c (update_regs): OPT_SUBEXP subexpression
	may now happen even when PMATCH[REG_NUM].RM_SO == -1.
	(NUMBER_OF_STATES): Unused, remove it.
	* posix/regex_internal.h (re_token_type_t): Move
	OP_DUP_PLUS among the tokens rather than among the
	epsilon-transiting nodes.

--- regexec.c.sav	2003-12-14 12:02:56.000000000 +0100
+++ regexec.c	2003-12-14 12:10:46.000000000 +0100
@@ -1351,19 +1351,21 @@ update_regs (dfa, pmatch, cur_node, cur_
      int cur_node, cur_idx, nmatch;
 {
   int type = dfa->nodes[cur_node].type;
-  int reg_num;
-  if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
-    return;
-  reg_num = dfa->nodes[cur_node].opr.idx + 1;
-  if (reg_num >= nmatch)
-    return;
   if (type == OP_OPEN_SUBEXP)
     {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+      if (reg_num >= nmatch)
+        return;
+
       if (dfa->nodes[cur_node].opt_subexp)
 	/* We are at the first node of a repeated subexpression.
-	   For now, we leave it as is because we know that this
-	   subexpression starts where the previous copy ends.  */
-	;
+	   Store the index into rm_eo, we will slide it into rm_so
+	   when we find the OP_CLOSE_SUBEXP and if the match is not
+	   empty.  This is actually useless in most cases, because
+	   the subexpression will start where the previous copy
+	   ends, but is needed for the first copy of a (RE)*
+	   subexpression.  */
+        pmatch[reg_num].rm_eo = cur_idx;
       else
 	{
 	  /* We are at the first node of this sub expression.  */
@@ -1373,12 +1375,18 @@ update_regs (dfa, pmatch, cur_node, cur_
     }
   else if (type == OP_CLOSE_SUBEXP)
     {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+      if (reg_num >= nmatch)
+        return;
+
       if (dfa->nodes[cur_node].opt_subexp)
 	{
 	  /* We are at the last node of a repeated subexpression.
-	     If it is not an empty match, we can set it, otherwise
-	     we leave the previous, non-empty match.  */
-	  if (pmatch[reg_num].rm_eo < cur_idx)
+	     If it is not an empty match, or if it is the first match,
+	     we can set it; otherwise, we leave the previous, non-empty
+	     match.  */
+	  if (pmatch[reg_num].rm_eo < cur_idx
+	      || pmatch[reg_num].rm_so == -1)
 	    {
 	      pmatch[reg_num].rm_so = pmatch[reg_num].rm_eo;
 	      pmatch[reg_num].rm_eo = cur_idx;
@@ -1390,8 +1398,6 @@ update_regs (dfa, pmatch, cur_node, cur_
     }
 }
 
-#define NUMBER_OF_STATE 1
-
 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
    and sift the nodes in each states according to the following rules.
    Updated state_log will be wrote to STATE_LOG.
--- regex_internal.h.sav	2003-12-14 10:45:41.000000000 +0100
+++ regex_internal.h	2003-12-14 10:46:04.000000000 +0100
@@ -186,9 +186,8 @@ typedef enum
   OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
   OP_ALT = EPSILON_BIT | 2,
   OP_DUP_ASTERISK = EPSILON_BIT | 3,
-  OP_DUP_PLUS = EPSILON_BIT | 4,
-  OP_DUP_QUESTION = EPSILON_BIT | 5,
-  ANCHOR = EPSILON_BIT | 6,
+  OP_DUP_QUESTION = EPSILON_BIT | 4,
+  ANCHOR = EPSILON_BIT | 5,
 
   /* Tree type, these are used only by tree. */
   CONCAT = 16,
@@ -199,6 +198,7 @@ typedef enum
   OP_CHARSET_RANGE,
   OP_OPEN_DUP_NUM,
   OP_CLOSE_DUP_NUM,
+  OP_DUP_PLUS,
   OP_NON_MATCH_LIST,
   OP_OPEN_COLL_ELEM,
   OP_CLOSE_COLL_ELEM,
--- regcomp.c.sav	2003-12-14 10:36:36.000000000 +0100
+++ regcomp.c	2003-12-14 12:21:56.000000000 +0100
@@ -1031,7 +1031,6 @@ optimize_utf8 (dfa)
       case END_OF_RE:
       case OP_DUP_ASTERISK:
       case OP_DUP_QUESTION:
-      case OP_DUP_PLUS:
       case OP_OPEN_SUBEXP:
       case OP_CLOSE_SUBEXP:
 	break;
@@ -1176,14 +1175,6 @@ calc_first (dfa, node)
     case OP_CLOSE_SUBEXP:
       node->first = idx;
       break;
-    case OP_DUP_PLUS:
-#ifdef DEBUG
-      assert (node->left != NULL);
-#endif
-      if (node->left->first == -1)
-	calc_first (dfa, node->left);
-      node->first = node->left->first;
-      break;
     case OP_ALT:
       node->first = idx;
       break;
@@ -1223,7 +1214,6 @@ calc_next (dfa, node)
   switch (type)
     {
     case OP_DUP_ASTERISK:
-    case OP_DUP_PLUS:
       node->next = idx;
       break;
     case CONCAT:
@@ -1258,7 +1248,6 @@ calc_epsdest (dfa, node)
   if (node->type == 0)
     {
       if (dfa->nodes[idx].type == OP_DUP_ASTERISK
-	  || dfa->nodes[idx].type == OP_DUP_PLUS
 	  || dfa->nodes[idx].type == OP_DUP_QUESTION)
 	{
 	  if (node->left->first == -1)
@@ -2409,15 +2398,15 @@ parse_dup_op (dup_elem, regexp, dfa, tok
      reg_errcode_t *err;
 {
   re_token_t dup_token;
-  bin_tree_t *tree = dup_elem, *work_tree;
+  bin_tree_t *tree = dup_elem, *work_tree, *elem;
   int start_idx = re_string_cur_idx (regexp);
   re_token_t start_token = *token;
+  int start, end, i;
+
   if (token->type == OP_OPEN_DUP_NUM)
     {
-      int i;
-      int end = 0;
-      int start = fetch_number (regexp, token, syntax);
-      bin_tree_t *elem;
+      end = 0;
+      start = fetch_number (regexp, token, syntax);
       if (start == -1)
 	{
 	  if (token->type == CHARACTER && token->opr.c == ',')
@@ -2443,42 +2432,52 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	  else
 	    goto parse_dup_op_ebrace;
 	}
+     
       if (BE (start == 0 && end == 0, 0))
 	{
 	  /* We treat "<re>{0}" and "<re>{0,0}" as null string.  */
 	  fetch_token (token, regexp, syntax);
 	  return NULL;
 	}
+    }
+  else
+    {
+      start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+    }
 
-      /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
-      elem = tree;
-      for (i = 0; i < start; ++i)
-	if (i != 0)
-	  {
-	    work_tree = duplicate_tree (elem, dfa, 0);
-	    tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
-	    if (BE (work_tree == NULL || tree == NULL, 0))
-	      goto parse_dup_op_espace;
-	  }
+  /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
+  elem = tree;
+  for (i = 1; i < start; ++i)
+    {
+      work_tree = duplicate_tree (elem, dfa, 0);
+      tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
+      if (BE (work_tree == NULL || tree == NULL, 0))
+	goto parse_dup_op_espace;
+     }
+
+  if (start != end)
+    {
+      if (start > 0 || elem->type == CONCAT)
+	{
+	  /* Mark this node as a repeated subexpression.  */
+	  elem = duplicate_tree (elem, dfa, 1);
+	  if (BE (elem == NULL, 0))
+	    goto parse_dup_op_espace;
+	}
 
       if (end == -1)
 	{
 	  /* We treat "<re>{0,}" as "<re>*".  */
 	  dup_token.type = OP_DUP_ASTERISK;
+	  work_tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
 	  if (start > 0)
-	    {
-	      elem = duplicate_tree (elem, dfa, 1);
-	      work_tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
-	      tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
-	      if (BE (elem == NULL || work_tree == NULL || tree == NULL, 0))
-		goto parse_dup_op_espace;
-	    }
+	    tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
 	  else
-	    {
-	      tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
-	      if (BE (tree == NULL, 0))
-		goto parse_dup_op_espace;
-	    }
+	    tree = work_tree;
+
+	  if (BE (work_tree == NULL || tree == NULL, 0))
+	    goto parse_dup_op_espace;
 	}
       else if (BE (start > end, 0))
 	{
@@ -2490,20 +2489,15 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	{
 	  /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?".  */
 	  dup_token.type = OP_DUP_QUESTION;
+	  elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
 	  if (start > 0)
-	    {
-	      elem = duplicate_tree (elem, dfa, 1);
-	      elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
-	      tree = create_tree (dfa, tree, elem, CONCAT, 0);
-	      if (BE (elem == NULL || tree == NULL, 0))
-		goto parse_dup_op_espace;
-	    }
+	    tree = create_tree (dfa, tree, elem, CONCAT, 0);
 	  else
-	    {
-	      tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
-	      if (BE (tree == NULL, 0))
-		goto parse_dup_op_espace;
-	    }
+	    tree = elem;
+
+	  if (BE (elem == NULL || tree == NULL, 0))
+	    goto parse_dup_op_espace;
+
 	  for (i = 1; i < end - start; ++i)
 	    {
 	      work_tree = duplicate_tree (elem, dfa, 1);
@@ -2516,15 +2510,7 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	    }
 	}
     }
-  else
-    {
-      tree = re_dfa_add_tree_node (dfa, tree, NULL, token);
-      if (BE (tree == NULL, 0))
-	{
-	  *err = REG_ESPACE;
-	  return NULL;
-	}
-    }
+
   fetch_token (token, regexp, syntax);
   return tree;