This is the mail archive of the guile@sourceware.cygnus.com mailing list for the Guile project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

more guile for perl refugees (split, join)



	Shortly after reading Dirk Herrmann's wonderful little tutorial on
a perl-like readdir and grep, I had a need for guile procedures
resembling some of my other favorite perl builtins.
	Inspired by that example, I determined to clean them up in hopes
that they might make useful examples or even become a part of a
"recovering perl programmer's guile toolkit."

	Below are two complete guile programs, split.scm and join.scm,
that implement and test the split and join procedures.

	Perhaps "split" could even have a more guile-styled name like
seperate-string-discarding-regexp, and join would be
string-append-inserting-seperator.


Seperate these files at the appropriate point - sorry if
mime-attachements, sharfiles, or somthing else would be preferable on the
list.

; split: 
; devide a string into fields, on boundaries determined by a regular expression
; The fields are returned as a list. Split is intended to be like my
; most common usage of the perl function of the same name.
;
; Leading and trailing null fields are always discarded.
;
; by Steve Tell <steve@telltronics.org>

(debug-enable 'debug 'backtrace)
(read-enable 'positions)
(use-modules (ice-9 regex))

; First come several early attempts that evolve into the version to keep.
; Read these only if you're a scheme newcommer like me and want to see the
; evolution.
;
; my first working version of the recursive part of split.

(define (split-recursive1 r s)
  (let ((m (regexp-exec r s)))
    (if (not m)
	(if (< 0 (string-length s))
	    (cons s '())
	    '())
	(if (< 0 (string-length (match:prefix m)))
	    (cons (match:prefix m) (split-recursive1 r (match:suffix m)))
	    (split-recursive1 r (match:suffix m))))))

;
; next, attempt to rewrite split-recursive with the "loop" named-let construct.
;

(define (split-recursive2 r str)
  (let loop ((s str))
    (let ((m (regexp-exec r s)))
      (if (not m)
	  (if (< 0 (string-length s))
	      (cons s '())
	      '())
	  (if (< 0 (string-length (match:prefix m)))
	      (cons (match:prefix m) (loop (match:suffix m)))
	      (loop (match:suffix m))))
      )))

;
; apply "result" trick to allow tail-recursion elimination.
;
; You might ask, does this really help that much?  Doesn't "reverse" require
; stack proportional to the list length anyway?
;
; No, it doesn't, because reverse is written in C and fiddles the pointers
; directly.  Even better, we can use the destructive form, "reverse!", which
; flips the cells around in-place instead of creating a new list.
;

(define (split-recursive r str)
  (let loop ((s str)
	     (result '()))
    (let ((m (regexp-exec r s)))
      (if (not m)
	  (if (< 0 (string-length s))
	      (cons s result)
	      result)
	  (if (< 0 (string-length (match:prefix m)))
	      (loop (match:suffix m) (cons (match:prefix m) result))
	      (loop (match:suffix m) result)))
      )))

;
; This helper procedure or a similar one is required for each of the
; versions of split-recursive above
; 
(define-public (split1 re str)
  (let ((r (make-regexp re)))
    (reverse! (split-recursive r str))))


;
; Lastly, we remove the helper-procedure, resulting in a final
; "production" version of "split"
;

(define (split re str)
  (let ((r (make-regexp re)))
    (let loop ((s str)
	       (result '()))
      (let ((m (regexp-exec r s)))
	(if (not m)
	    (if (< 0 (string-length s))
		(reverse! (cons s result))
		(reverse! result))
	    (if (< 0 (string-length (match:prefix m)))
		(loop (match:suffix m) (cons (match:prefix m) result))
		(loop (match:suffix m) result)))
	))))


;
; And, here's a version that allows null fields at the beginning or end 
; to produce a null string in the returned list.
;

(define (split-nullok re str)
  (let ((r (make-regexp re)))
    (let loop ((s str)
	       (result '()))
      (let ((m (regexp-exec r s)))
	(if (not m)
	    (reverse! (cons s result))
	    (loop (match:suffix m) (cons (match:prefix m) result)))))))




; The rest of this file is a test harness for split.

(define (print-to-port port . l)
    (for-each (lambda (elem) (display elem port)) l))

(define (print . l)
    (apply print-to-port (cons (current-output-port) l)))

;
; Print a list, whose elements are assumed to be strings.
; elements surrounded with double-quotes to make whitespace eaiser to see.
;
(define (stringlist-print l)
  (print "(")
  (for-each (lambda (e)
	      (print " \"" e "\""))
	    l)
  (print " )"))

(define (test-split r s)
  (print "split        \"" s "\" --> ")
  (stringlist-print (split r s))
  (print "\n")

  (print "split-nullok \"" s "\" --> ")
  (stringlist-print (split-nullok r s))
  (print "\n")

)

(test-split "[ \t\n]+" "abc def \taabbc" )
(test-split "[ \t\n]+" "abc    aabbc"    )
(test-split "[ \t\n]+" "abc"             )
(test-split "[ \t\n]+" " abc def"        )
(test-split "[ \t\n]+" "abc def "        )

; end of split.scm

; join.scm

(debug-enable 'debug 'backtrace)
(read-enable 'positions)

; join - a procedure like the perl function "join:"
; concatenate list of strings, putting a seperator string between each
; element of the list.
;
; by Steve Tell <steve@telltronics.org>

(define (join s l)
  (cond ((null? l)     "")
	((= 1 (length l))     (car l))
	(else (string-append (car l) s (join s (cdr l))))))

; The rest of this file contains a test harness for join

(define (print-to-port port . l)
    (for-each (lambda (elem) (display elem port)) l))

(define (print . l)
    (apply print-to-port (cons (current-output-port) l)))

(define (stringlist-print l)
  (print "(")
  (for-each (lambda (e)
	      (print " \"" e "\""))
	    l)
  (print " )"))

(define (test-join sep lst)
  (print "join \"" sep "\" ")
  (stringlist-print lst)
  (print " --> \"" (join sep lst) "\"\n")
)

(test-join "/" '("one" "two" "three"))
(test-join "::" '("one" "two" "three"))
(test-join ":" '("one"))
(test-join ":" '())

; end of join.scm


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]