This repository has been archived by the owner on Jun 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
balaa.l
101 lines (89 loc) · 3.86 KB
/
balaa.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
;;; balaa.l --- Thorough Arabic Stemming tester
;;;
;; Copyright (C) 2023 Salih Muhammed
;; Author: Salih Muhammed <[email protected]>
;; Created: 12 May 2023
;; URL: https://github.com/lugha/balaa
(load "samples.l")
(sb-posix:chdir "/home/l/gits/arch/balaa/stemmers/")
(defstruct stemmer
name
call
cmd)
(defun read-file-as-string (infile)
(with-open-file (instream infile :direction :input :if-does-not-exist nil)
(when instream
(let ((string (make-string (file-length instream))))
(read-sequence string instream)
string))))
(defparameter *my-timings* nil)
(defun run-python-stemmer (cmd word)
(setq word (format nil (read-file-as-string cmd) word))
(let* ((output-stream (make-string-output-stream))
(process (sb-ext:run-program "/usr/bin/python"
(list "-c" word)
:output output-stream)))
(process-wait process)
(let ((output-string (get-output-stream-string output-stream)))
(close output-stream)
(string-trim
'(#\Space #\Newline #\Backspace #\Tab
#\Linefeed #\Page #\Return #\Rubout)
output-string))))
(defun run-cli-stemmer (cmd word)
(let* ((output-stream (make-string-output-stream))
(process (sb-ext:run-program cmd
(list word)
:output output-stream)))
(process-wait process)
(let ((output-string (get-output-stream-string output-stream)))
(close output-stream)
(string-trim
'(#\Space #\Newline #\Backspace #\Tab
#\Linefeed #\Page #\Return #\Rubout)
output-string))))
(defun balaa ()
(let* ((stemmers (list
(make-stemmer :name "Mohamed Eldesouki"
:call 'run-python-stemmer
:cmd
"stemmers/arabic_processing_cog.py")
(make-stemmer :name "ntlk"
:call 'run-python-stemmer
:cmd
"stemmers/ntlk.py")
(make-stemmer :name "Snowball"
:call 'run-cli-stemmer
:cmd "/home/l/gits/balaa/stemmers/slight")
)))
(do* ((stemmer stemmers (cdr stemmer))
(s (car stemmer) (car stemmer))
(total 0 0)
(tright 0 0))
((null stemmer))
(format t "~%Testing: ~A~%" (stemmer-name s))
(do* ((lst *samples* (cdr lst))
(morph (car (car lst)) (car (car lst))))
((null lst))
(format t "Morpho: ~A~%~%" morph)
(do* ((word (cdr (car lst)) (cdr word))
(wrong 0))
((null word) (format t "~%~A:OUT-OF:~A~%~%" (- (length (cdr (car lst))) wrong)
(length (cdr (car lst)))))
(let* ((run (get-internal-run-time))
(real (get-internal-real-time)))
(multiple-value-prog1 (funcall (stemmer-call s) (stemmer-cmd s) (car (car word)))
(push (cons (- (get-internal-run-time) run)
(- (get-internal-real-time) real))
*my-timings*))
(let ((got (funcall (stemmer-call s) (stemmer-cmd s) (car (car word))))
(wanted (car (cdr (car word)))))
(setf total (+ total 1))
(if (not (string-equal got wanted))
(progn
(format t "Got ~A wanted ~A~%" got wanted)
(setf wrong (+ wrong 1)))
(setf tright (+ tright 1))))
(format t "Time: ~A~%" (/ (- (get-internal-real-time) real)
internal-time-units-per-second 0.001)))))
(format t "~%TOTAL ~A OUT-OF ~A~$" tright total))))