Page 1 of 1
Need lisp function to handle text and parse a number
Posted: Thu Apr 01, 2010 11:39 am
by inexperienced
Hi!
I am starting to write a lisp function that will take a text input (this text is obtained by parsing tokens from a text chat app), and removes a specific set of non-numeric English characters and will return a number.
For example:
- given the input "2nd", the function will return 2
- given the input "2,3", the function will return 2.3 (note the comma is changed to a period).
- given "2/3", the function will return 0.666.
Is there already a function that does that handy?
Thanks
inx.
Re: Need lisp function to handle text and parse a number
Posted: Thu Apr 01, 2010 12:29 pm
by Jasper
CL has
parse-integer, but not parse-number weirdly. Not that hard though.
(read-from-string "3/5") and such work, you can write fractions that way in CL, of course there might be security concerns; allowing access to arbitrary expressions probably not a good idea, better something else.
There is
parse-number.. Was looking for the license.. saw the code.. ewww, seems ridiculously long to me... It does support any base =<16. And more with minor modification, presumably. Do i need to prove it?(That it is painfully long)
Re: Need lisp function to handle text and parse a number
Posted: Thu Apr 01, 2010 2:05 pm
by inexperienced
Hmmm..... I can't get the tarball for parse-number to open (I am working on a Windows machine).
Maybe it will be easiest just to write the text handling part myself, then use the usual cl number handlers.
thanks!
Re: Need lisp function to handle text and parse a number
Posted: Thu Apr 01, 2010 5:45 pm
by Jasper
It is 189 lines(versus 324), but it also does fractions, complex functions, and writing. Get
Alexandria, or from
libclCode: Select all
;Author Jasper den Ouden 2010
;Placed in public domain.
(cl:use-package :cl-user)
(defpackage :parse-num
(:use :common-lisp :alexandria)
(:export parse-positive-int parse-int
parse-positive-num parse-num
parse-positive-ratio parse-ratio
*char-nums* *num-chars*
num-string num-stream)
(:documentation "Reads nums from strings.
Documentation strings refuse to be tautological."))
(in-package :parse-num)
(defparameter *nums-chars*
(make-array 256 :element-type 'character :initial-element #\.)
"Array connecting numbers to characters.")
(defparameter *char-nums*
(make-array 256 :element-type 'fixnum :initial-element -1)
"Array connecting characters to numbers.")
(declaim (type (vector character 256) *nums-chars*)
(type (vector fixnum 256) *char-nums*))
(loop for ch across "0123456789abcdefghijklmnopqrstuvxyz"
for n from 0
do (setf (aref *char-nums* (char-int ch)) n
(aref *nums-chars* n) ch))
(defun char-i (ch &key junk-allowed)
(declare (type character ch) (type boolean junk-allowed))
(let ((result (aref *char-nums* (char-int ch))))
(unless junk-allowed
(assert (>= result 0) nil "Character not allowed in number ~a"
result))
(values result (< result 0))))
(defun parse-positive-int (string &key (start 0) (end (length string))
(base 10) junk-allowed)
"Parse integer. With base <35, change *num-chars* for more."
(declare (type string string)
(type fixnum start end base)
(type boolean junk-allowed))
(do*((i start (+ i 1))
(x 0 x))
((>= i end) (values x end))
(declare (type fixnum x))
(multiple-value-bind (+x junk)
(char-i (aref string i) :junk-allowed junk-allowed)
(when junk (return-from parse-positive-int (values x i)))
(setq x (+ (* x base) +x)))))
(defun combine-float (base)
(declare (type fixnum base))
(lambda (pre post postlen)
(declare (type fixnum pre post postlen))
(+ pre (* post (expt base postlen)))))
(defun parse-positive-num
(string &key (start 0) (end (length string))
is-dot (base 10) junk-allowed
(combine (combine-float base)))
"Raw version."
(declare (type string string)
(type (function (character) boolean) is-dot)
(type boolean junk-allowed)
(type fixnum base))
(values
(flet ((parse-i (&key (start start) (end end))
(parse-positive-int string :start start :end end
:base (floor base) :junk-allowed junk-allowed)))
;(declare (inline parse-i))
(if-let (i (position-if
(lambda (ch)
(or
(funcall is-dot ch)
(unless (<= 0 (char-i ch) (- base 1))
(assert junk-allowed nil "Junk in ~s" string)
t)))
string :start start))
(if (funcall is-dot (aref string i)) ;Is an real
(multiple-value-bind (result length)
(parse-i :start (+ i 1))
(funcall combine (parse-i :end i) result (- i -1 length)))
(parse-i :end i))
(parse-i)))
end))
(defun parse-num
(string &key (base 10) (start 0) (end (length string))
(dot #\.) (is-dot (curry #'char= dot)) junk-allowed
(combine (combine-float base)))
(assert (> (length string) start) nil "~s is shorter than ~s."
string start)
(flet ((parse-on (step)
(parse-positive-num string
:start (+ start step) :end end :is-dot is-dot
:base base :junk-allowed junk-allowed :combine combine)))
(case (aref string start)
(#\+ (parse-on 1))
(#\- (- (parse-on 1)))
(t (parse-on 0)))))
(defun combine-ratio ()
(lambda (pre post postlen)
(declare (ignore postlen) (type fixnum pre post))
(/ pre post)))
(defun parse-positive-ratio
(string &key (start 0) (end (length string)) (div #\/)
(is-div (curry #'char= div)) junk-allowed)
(parse-positive-num string :start start :end end :is-dot is-div
:junk-allowed junk-allowed :combine (combine-ratio)))
(defun parse-ratio
(string &key (start 0) (end (length string)) (div #\/)
(is-div (curry #'char= div)) junk-allowed)
(parse-num string :start start :end end :is-dot is-div
:junk-allowed junk-allowed :combine (combine-ratio)))
(defun lowest (num base range)
(expt base (if (/= num 0)
(- (floor (log (abs num) base)) range) 1)))
(defun num-stream
(num &key (base 10) (stream *standard-output*)
(div #\/) (dot #\.) (range 6) (lowest (lowest num base range))
(some nil))
"Write number to stream, default *standard-output*,
range is how many orders to round, lowest, is dependent on range."
(flet ((numstr (x &key (some* some))
(num-stream x :base base :stream stream
:div div :dot dot :lowest lowest :some some*)))
(when (unless (complexp num) (< num 0))
(write-char #\- stream)
(numstr (- num))
(return-from num-stream num))
(typecase num
(integer
(if (= num 0)
(unless some (write-char (aref *nums-chars* 0) stream))
(multiple-value-bind (rest this) (floor num base)
(declare (type fixnum this rest))
(numstr rest :some* t)
(write-char (aref *nums-chars* this) stream))))
(ratio
(numstr (numerator num))
(write-char div stream)
(numstr (denominator num)))
(complex
(write-string "#C(" stream)
(numstr (realpart num))
(write-char #\Space stream)
(numstr (imagpart num))
(write-char #\) stream))
(number
(multiple-value-bind (>1 <1) (floor (* (floor num lowest) lowest))
(numstr >1)
(write-char dot stream)
(numstr (do*((x <1 (* x base))
(f* nil t)
(f t (and f (= 0 (floor x 1)))))
((= (mod x 1) 0) (floor x))
(when (and f f*)
(write-char (aref *nums-chars* 0) stream))))))))
num)
(defun num-string
(num &key (base 10) (div #\/) (dot #\.)
(range 6) (lowest (lowest num base range)))
"Write number to string, see num-stream for more."
(with-output-to-string (stream)
(num-stream num :base base :div div :dot dot :stream stream
:range range :lowest lowest)))
(defun test (&key (fr -10.0) (to 10.0) (x (+ fr (random (- to fr))))
(base 10) (to-test #'parse-num))
"Tests parse-.. Best to enter symbols, otherwise you need to fill\
in if ratio!"
(let*((r (coerce (funcall to-test (num-string x :base base :lowest 1d-6
:lowest 1d-9)
:base base)
(type-of x))))
(assert (< (abs (- x r)) 1d-5) nil "Test failed! ~a != ~a" x r))
(values))
The test seems to have some trouble with rounding in principle it shouldn't round far from lowest, but it seems to do by a larger margin.. maybe need something else for (floor (* (floor num lowest) lowest)) Integers work fine though. Spend too much time on it though, but the code looks good.
Am wondering if the compiler will make variants of parse-num for the its default COMBINE argument and for the combine argument parse-fraction give, and if declaim ftype does so.. I mean:
Code: Select all
(defun sqr (x) (* x x))
(declaim (ftype (function (fixnum) fixnum) sqr))
(sqr 524.325) ;;not fixnum, still works. Did i just ask for a specific version?
Not specifically asking you, though.
Edit:
much better tested code than that vector shit i posted before... I found a bunch of bugs in that..