This is the mail archive of the libc-ports@sources.redhat.com mailing list for the libc-ports project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

segmentation fault after some modification about regex operation of glibc-2.7


I am a new guy doing glibc dev, so my question may seem silly.
In order to get one transition table just
like(http://lambda.uta.edu/cse5317/notes/node8.html) related to one
regular expression. I add an interface
âint regtrtable (const regex_t *__restrict __preg, char * trantable,
int *state_num_ptr);â in regex.h. It compiles well. However, after
make install, lots of segmentation fault come to the screen. And I
don't know how to rescue my OS. Besides, my modification patch will be
added below. I hope you guys can help me figure out that some mistakes
there may be.

diff -r ace192926b61 -r b5b99be08504 posix/regex.h
--- a/posix/regex.h	Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex.h	Tue Aug 23 02:39:37 2011 -0400
@@ -533,6 +533,11 @@
 # endif
 #endif

+/* Added by peter for opencl acceleration */
+extern int regtrtable (const regex_t *__restrict __preg, char *
trantable, int *state_num_ptr);
+
+extern int freetrtable (char * trantable);
+
 /* POSIX compatibility.  */
 extern int regcomp (regex_t *__restrict __preg,
 		    const char *__restrict __pattern,
diff -r ace192926b61 -r b5b99be08504 posix/regex_internal.c
--- a/posix/regex_internal.c	Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex_internal.c	Tue Aug 23 02:39:37 2011 -0400
@@ -30,6 +30,52 @@
 					  unsigned int context,
 					  unsigned int hash) internal_function;
 
+/* Function to build the global transition table */
+linklist * init_state_linklist(void)
+{
+    linklist *p;
+    p = (linklist *)malloc(sizeof(linklist));
+    p ->next = NULL;
+    r = p;
+    return p;
+}
+
+void insert_state_linklist(re_dfastate_t *state)
+{
+    cur_state_linklist = (linklist *)malloc(sizeof(linklist));
+    cur_state_linklist -> state = state;
+    cur_state_linklist -> next = NULL;
+    r -> next = cur_state_linklist;
+    r = cur_state_linklist;
+    return;
+}
+
+void destroy_state_linklist(void)
+{
+    linklist *p = state_linklist_head, *q = p -> next;
+    while(q != NULL)
+    {
+        free(p);
+        p = q;
+        q = p -> next;
+    }
+    free(p);
+
+}
+
+void disp_state_linklist(void)
+{
+    linklist *p = state_linklist_head->next;
+    printf("\nstate_id_size = %d.", state_id_count);
+
+    while(p != NULL)
+    {
+        printf("\nstateid: %d", p->state->state_id);
+        p = p->next;
+    }
+    return;
+}
+
 /* Functions for string operation.  */

 /* This function allocate the buffers.  It is necessary to call
@@ -1488,6 +1534,7 @@
   if (BE (new_state == NULL, 0))
     *err = REG_ESPACE;

+  insert_state_linklist(new_state);
   return new_state;
 }

@@ -1531,6 +1578,7 @@
   if (BE (new_state == NULL, 0))
     *err = REG_ESPACE;

+  insert_state_linklist(new_state);
   return new_state;
 }

@@ -1611,6 +1659,7 @@
     }

   newstate->entrance_nodes = &newstate->nodes;
+  newstate->state_id = get_state_id();
   for (i = 0 ; i < nodes->nelem ; i++)
     {
       re_token_t *node = dfa->nodes + nodes->elems[i];
@@ -1662,6 +1711,7 @@

   newstate->context = context;
   newstate->entrance_nodes = &newstate->nodes;
+  newstate->state_id = get_state_id();

   for (i = 0 ; i < nodes->nelem ; i++)
     {
diff -r ace192926b61 -r b5b99be08504 posix/regex_internal.h
--- a/posix/regex_internal.h	Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regex_internal.h	Tue Aug 23 02:39:37 2011 -0400
@@ -497,6 +497,7 @@
   re_node_set inveclosure;
   re_node_set *entrance_nodes;
   struct re_dfastate_t **trtable, **word_trtable;
+  unsigned short int state_id;
   unsigned int context : 4;
   unsigned int halt : 1;
   /* If this state can accept `multi byte'.
@@ -509,6 +510,18 @@
 };
 typedef struct re_dfastate_t re_dfastate_t;

+typedef struct lnode
+{
+    re_dfastate_t *state;
+    unsigned short int trtable[256];
+    unsigned short int final; /*final == 1, lnode is final state. */
+    struct lnode *next;
+}linklist;
+
+unsigned int state_id_count;
+linklist *state_linklist_head;
+linklist *r = NULL , *cur_state_linklist = NULL;
+
 struct re_state_table_entry
 {
   int num;
@@ -684,6 +697,21 @@
   } opr;
 } bracket_elem_t;

+static inline void init_state_id_count(void)
+{
+    state_id_count = 0;
+}
+
+static inline unsigned int get_state_id(void)
+{
+    state_id_count++;
+    return state_id_count;
+}
+
+linklist * init_state_linklist(void);
+void insert_state_linklist(re_dfastate_t * state);
+void destroy_state_linklist(void);
+void disp_state_linklist(void);

 /* Inline functions for bitset operation.  */
 static inline void
diff -r ace192926b61 -r b5b99be08504 posix/regexec.c
--- a/posix/regexec.c	Mon Aug 22 07:38:59 2011 -0400
+++ b/posix/regexec.c	Tue Aug 23 02:39:37 2011 -0400
@@ -41,6 +41,8 @@
 					 int start, int range, int stop,
 					 size_t nmatch, regmatch_t pmatch[],
 					 int eflags) internal_function;
+static reg_errcode_t regtrtable_internal (const regex_t *preg,
+                     char * trantable, int *state_num_ptr) internal_function;
 static int re_search_2_stub (struct re_pattern_buffer *bufp,
 			     const char *string1, int length1,
 			     const char *string2, int length2,
@@ -127,6 +129,10 @@
 static re_dfastate_t *transit_state (reg_errcode_t *err,
 				     re_match_context_t *mctx,
 				     re_dfastate_t *state) internal_function;
+static re_dfastate_t *transit_state_by_char (reg_errcode_t *err,
+                     re_dfa_t *dfa,
+                     unsigned char ch,
+                     re_dfastate_t *state) internal_function;
 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 					    re_match_context_t *mctx,
 					    re_dfastate_t *next_state)
@@ -200,7 +206,38 @@
      internal_function;
 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
      internal_function;
-
+
+/* Added by peter for opencl acceleration
+ * return 0: success
+ * return 1: non-success
+ * */
+int
+regtrtable (preg, trantable, state_num_ptr)
+    const regex_t *__restrict preg;
+    char * trantable;
+    int * state_num_ptr;
+{
+    reg_errcode_t err;
+    re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+
+    __libc_lock_lock (dfa->lock);
+
+    err = regtrtable_internal(preg, trantable, state_num_ptr);
+
+    __libc_lock_unlock (dfa->lock);
+
+   return err != REG_NOERROR;
+}
+
+int
+freetrtable (trantable)
+    char * trantable;
+{
+    free(trantable);
+    return 0;
+}
+
+
 /* Entry point for POSIX code.  */

 /* regexec searches for a given pattern, specified by PREG, in the
@@ -607,6 +644,51 @@
 
 /* Internal entry point.  */

+static reg_errcode_t
+regtrtable_internal(preg, trantable, state_num_ptr)
+    const regex_t *preg;
+    char * trantable;
+    int * state_num_ptr;
+{
+    unsigned char ch = 0;
+    int i, item_size;
+    reg_errcode_t err;
+    re_dfastate_t * cur_state, * new_state;
+    re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+    err = REG_NOERROR;
+    state_linklist_head = init_state_linklist();
+
+    cur_state = dfa->init_state;
+    insert_state_linklist(cur_state);
+
+    while(cur_state_linklist -> next != NULL)
+    {
+        for(i = 0; i < 256; i++)
+        {
+            new_state = transit_state_by_char(&err, dfa, ch, cur_state);
+
+            cur_state_linklist->trtable[ch] = new_state->state_id;
+            cur_state_linklist->final =
cur_state_linklist->state->halt ? 1 : 0;
+            ch ++;
+        }
+
+        cur_state = cur_state_linklist -> next -> state;
+    }
+    state_num_ptr[0] = state_id_count;
+
+    item_size = 257 * sizeof(unsigned short int);
+    trantable = (char *)malloc(state_id_count * item_size);
+
+    cur_state_linklist = state_linklist_head -> next;
+    for(i=0; i< state_id_count; i++)
+    {
+        memcpy(trantable + i * item_size,
cur_state_linklist->trtable, item_size);
+        cur_state_linklist = cur_state_linklist -> next;
+    }
+
+    destroy_state_linklist();
+}
+
 /* Searches for a compiled pattern PREG in the string STRING, whose
    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
    mingings with regexec.  START, and RANGE have the same meanings
@@ -2295,6 +2377,31 @@
     }
 }

+static re_dfastate_t *
+internal_function
+transit_state_by_char (reg_errcode_t *err,
+                       re_dfa_t *dfa,
+                       unsigned char ch,
+                       re_dfastate_t *state)
+{
+  re_dfastate_t **trtable;
+
+  /* Use transition table  */
+  for (;;)
+  {
+      trtable = state->trtable;
+      if (BE (trtable != NULL, 1))
+          return trtable[ch];
+
+      if (!build_trtable (dfa, state))
+      {
+          *err = REG_ESPACE;
+          return NULL;
+      }
+
+      /* Retry, we now have a transition table.  */
+  }
+}
 /* Update the state_log if we need */
 re_dfastate_t *
 internal_function

-- 
HOME PAGE:ÂÂhttp://www.peterpy8.me


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]