Python is the winner?-)

Posted on

Please see some info here. I will translate this post into English, i hope.

;;
 
(use-modules (ice-9 rdelim) (ice-9 regex))
 
(define argv (program-arguments))
 
(define filename (car (cdr (cdr argv))))
(define inputfile (open-input-file filename))
 
(define pattern-string (cadr argv))
(define pattern (make-regexp pattern-string))
 
 
(define records 0)
(define lines 0)
(define selected 0)
 
(define (read-all-lines)
    (let loop ((stack '()) (good #f))
        (let ((line (read-line inputfile)))
            (set! stack (append stack (list line)))
            (set! lines (+ 1 lines))
 
            (if (regexp-exec pattern line)
                (set! good #t)
                (if (equal? "" line)
                    (begin
                        (if (eq? good #t)
                            (begin
                                (for-each (lambda (line) (display line) (newline)) stack)
                                (set! selected (+ 1 selected))))
                        (set! good #f)
                        (set! stack '())
                        (set! records (+ 1 records)))))
 
            (if (not (eof-object? (peek-char inputfile)))
                (loop stack good)))))
 
(read-all-lines)
 
(use-modules (ice-9 format))
 
(format (current-error-port)
"~d records (~d lines) processed
~d records matched
Pattern was: '~a'
" records lines selected pattern-string)
 
;; vim: ts=2:

Scheme:

$ time guile -s fradlog_extract.scm 'Station-Id = \"4494.....\"' detail-20090519 > part-scheme
183764 records (4563405 lines) processed
447 records matched
Pattern was: 'Station-Id = "4494....."'
 
real    0m27.653s
user    0m27.550s
sys     0m0.110s

awk:

$ time awk -f fradlog_extract.awk pattern='Station-Id = \"4494.....\"' detail-20090519 > part-awk
183764 records (4563405 lines) processed
447 records matched
Pattern was: 'Station-Id = "4494....."'
 
real    0m21.680s
user    0m21.490s
sys     0m0.090s

python:

$ time python fradlog_extract.py 'Station-Id = "4494....."' detail-20090519 > part-python
183764 records (4563405 lines) processed
447 records matched
Pattern was: 'Station-Id = "4494....."'
 
real    0m9.766s
user    0m9.670s
sys     0m0.060s

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.