This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
[PATCH v2] Fix tokenize function and test.
- From: Przemyslaw Pawelczyk <przemyslaw at pawelczyk dot it>
- To: systemtap at sourceware dot org
- Date: Thu, 18 Jun 2009 01:50:31 +0200
- Subject: [PATCH v2] Fix tokenize function and test.
- Mail-from: e890bb6d3a994aff48f317fe5cf8c5ccee939a09 Mon Sep 17 00:00:00 2001
- References: <1244863762.20494.8055@debian> <1244896709.407628.14220@debian>
Previous implementation was error-prone, because allowed returning empty
tokens (mimiced strsep()), which is fine if there is a NULL semantic.
Unfortunately SystemTap doesn't provide it in scripts and has only blank
string (""), therefore testing against it was misleading.
The solution is to return only non-empty tokens (mimic strtok()).
* tapset/string.stp: Fix tokenize.
* testsuite/systemtap.string/tokenize.stp: Improve and add case with
more than one delimiter in the delim string.
* testsuite/systemtap.string/tokenize.exp: Ditto.
* stapfuncs.3stap.in: Update tokenize description.
* doc/langref.tex: Ditto.
---
doc/langref.tex | 4 +-
stapfuncs.3stap.in | 6 +-
tapset/string.stp | 23 ++++++---
testsuite/systemtap.string/tokenize.exp | 16 ++++++-
testsuite/systemtap.string/tokenize.stp | 75 +++++++++++++++++++-----------
5 files changed, 82 insertions(+), 42 deletions(-)
diff --git a/doc/langref.tex b/doc/langref.tex
index 5aefa27..5a149d1 100644
--- a/doc/langref.tex
+++ b/doc/langref.tex
@@ -3160,8 +3160,8 @@ General syntax:
tokenize:string (input:string, delim:string)
\end{verbatim}
\end{vindent}
-This function returns the next token in the given input string, where
-the tokens are delimited by one of the characters in the delim string.
+This function returns the next non-empty token in the given input string,
+where the tokens are delimited by characters in the delim string.
If the input string is non-NULL, it returns the first token. If the input string
is NULL, it returns the next token in the string passed in the previous call
to tokenize. If no delimiter is found, the entire remaining input string
diff --git a/stapfuncs.3stap.in b/stapfuncs.3stap.in
index 518ff2b..3d88b2e 100644
--- a/stapfuncs.3stap.in
+++ b/stapfuncs.3stap.in
@@ -166,11 +166,11 @@ specified by base. For example, strtol("1000", 16) returns 4096. Returns 0 if
string cannot be converted.
.TP
tokenize:string (str:string, delim:string)
-Return the next token in the given str string, where the tokens are delimited
-by one of the characters in the delim string. If the str string is not blank,
+Return the next non-empty token in the given str string, where the tokens are
+delimited by characters in the delim string. If the str string is not blank,
it returns the first token. If the str string is blank, it returns the next
token in the string passed in the previous call to tokenize. If no delimiter
-is found, the entire remaining str string is returned. Returns blank when
+is found, the entire remaining str string is returned. Returns blank when
no more tokens are left.
.SS TIMESTAMP
diff --git a/tapset/string.stp b/tapset/string.stp
index 35ee9fa..cc84292 100644
--- a/tapset/string.stp
+++ b/tapset/string.stp
@@ -70,25 +70,32 @@ function text_strn:string(input:string, len:long, quoted:long)
/*
* tokenize - Given a string and a token delimiter,
- * return the next token in the string
- * input String to tokenize. If NULL, returns the next token in the
- * string passed in the previous call to tokenize().
- * delim Token delimiter. Note this is a string, but only the first
- * character is used as the delimiter.
+ * return the next non-empty token in the string
+ * or blank when no more non-empty tokens are left
+ * input String to tokenize. If NULL, returns the next non-empty token
+ * in the string passed in the previous call to tokenize().
+ * delim Token delimiter. Set of characters that delimit the tokens.
*/
function tokenize:string(input:string, delim:string)
%{ /* pure */
static char str[MAXSTRINGLEN];
static char *str_start;
+ static char *str_end;
char *token = NULL;
+ char *token_end = NULL;
if (THIS->input[0]) {
strncpy(str, THIS->input, MAXSTRINGLEN);
str_start = &str[0];
+ str_end = &str[0] + strlen(str);
+ }
+ do {
+ token = strsep(&str_start, THIS->delim);
+ } while (token && !token[0]);
+ if (token) {
+ token_end = (str_start ? str_start - 1 : str_end);
+ memcpy(THIS->__retvalue, token, token_end - token + 1);
}
- token = strsep(&str_start, THIS->delim);
- if (token)
- strncpy (THIS->__retvalue, token, MAXSTRINGLEN);
%}
/*
diff --git a/testsuite/systemtap.string/tokenize.exp b/testsuite/systemtap.string/tokenize.exp
index 697b7c7..aa28f85 100644
--- a/testsuite/systemtap.string/tokenize.exp
+++ b/testsuite/systemtap.string/tokenize.exp
@@ -9,7 +9,9 @@ seven
eight
nine
ten
+-
one|two|three|four|five|six|seven|eight|nine|ten
+-
a
b
c
@@ -17,10 +19,22 @@ d
e
f
g
+-
1
2
3
4
-this is a string with no delimiters}
+-
+-
+this is a string with no delimiters
+-
+this
+is
+a
+string
+which
+has
+two
+delimiters}
stap_run2 $srcdir/$subdir/$test.stp
diff --git a/testsuite/systemtap.string/tokenize.stp b/testsuite/systemtap.string/tokenize.stp
index 10703d9..1b253c8 100644
--- a/testsuite/systemtap.string/tokenize.stp
+++ b/testsuite/systemtap.string/tokenize.stp
@@ -5,42 +5,61 @@ probe begin
teststr3 = "1,,2,3, ,4"
teststr4 = ""
teststr5 = "this is a string with no delimiters"
+ teststr6 = "this is a string, which has two delimiters"
tok = tokenize(teststr1, "|")
while (tok != "") {
- printf("%s\n", tok)
+ println(tok)
tok = tokenize("", "|")
}
- tok = tokenize(teststr1, ",")
- while (tok != "") {
- printf("%s\n", tok)
- tok = tokenize("", "|")
- }
+ println("-")
+
+ tok = tokenize(teststr1, ",")
+ while (tok != "") {
+ println(tok)
+ tok = tokenize("", "|")
+ }
+ println("-")
+
tok = tokenize(teststr2, ",")
- while (tok != "") {
- printf("%s\n", tok)
- tok = tokenize("", ",")
- }
-
- tok = tokenize(teststr3, ",")
- while (tok != "") {
- printf("%s\n", tok)
- tok = tokenize("", ",")
- }
-
- tok = tokenize(teststr4, ",")
- while (tok != "") {
- printf("%s\n", tok)
- tok = tokenize("", ",")
- }
-
- tok = tokenize(teststr5, ",")
- while (tok != "") {
- printf("%s\n", tok)
- tok = tokenize("", ",")
- }
+ while (tok != "") {
+ println(tok)
+ tok = tokenize("", ",")
+ }
+
+ println("-")
+
+ tok = tokenize(teststr3, ",")
+ while (tok != "") {
+ println(tok)
+ tok = tokenize("", ",")
+ }
+
+ println("-")
+
+ tok = tokenize(teststr4, ",")
+ while (tok != "") {
+ println(tok)
+ tok = tokenize("", ",")
+ }
+
+ println("-")
+
+ tok = tokenize(teststr5, ",")
+ while (tok != "") {
+ println(tok)
+ tok = tokenize("", ",")
+ }
+
+ println("-")
+
+ tok = tokenize(teststr6, ", ")
+ while (tok != "") {
+ println(tok)
+ tok = tokenize("", ", ")
+ }
exit()
}
--
1.5.6.5