-
Notifications
You must be signed in to change notification settings - Fork 9
/
representation.lisp
231 lines (201 loc) · 9.27 KB
/
representation.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
;;; CommonLisp interface to WordNet
;;; 1995, Mark Nahabedian
;;; Artificial Intelligence Laboratory
;;; Massachusetts Institute of Technology
;;; Representation of WordNet data. Uses the lower layers defined in
;;; "wordnet-database-files" and "parse-wordnet-data" to extract data from the
;;; WordNet database files and then constructs an object oriented
;;; representation.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Representation
(in-package #:wordnet)
(defclass wordnet-object () ())
(defclass wordnet-index-entry (wordnet-object)
((word :initarg :word :reader index-entry-word)
(part-of-speech :initarg :part-of-speech
:reader part-of-speech)
(synset-offsets :initarg :synset-offsets
:reader index-entry-synset-offsets)))
(defmethod print-object ((object wordnet-index-entry) stream)
(print-unreadable-object (object stream :type t :identity t)
(format stream "~a ~a"
(ignore-errors (part-of-speech object))
(ignore-errors (index-entry-word object)))))
(defclass wordnet-synset-entry (wordnet-object)
((part-of-speech :initarg :part-of-speech :reader part-of-speech)
(offset :initarg :offset :reader synset-offset)
(words :initarg :words :initform nil :reader synset-words)
(raw-pointers :initarg :pointers :initform nil)
(pointers)
(gloss :initarg :gloss :initform nil :reader synset-gloss)))
(defmethod print-object ((object wordnet-synset-entry) stream)
(print-unreadable-object (object stream :type t :identity t)
(dolist (w (ignore-errors (synset-words object)))
(format stream "~a " (car w)))))
(defclass wordnet-noun-entry (wordnet-synset-entry) ())
(defclass wordnet-adjective-entry (wordnet-synset-entry) ())
(defclass wordnet-adverb-entry (wordnet-synset-entry) ())
(defclass wordnet-verb-entry (wordnet-synset-entry)
((verb-frames :initarg :verb-frames :initform nil)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; constructing the objects
;;; The objects we've made are kept in a cache so that if we ever get a request
;;; for the same object again, we can fetch the existing one rather than read
;;; the WordNet files again and make a new one.
(defvar *wordnet-index-cache*
(mapcar #'(lambda (pos) (list pos (make-hash-table :test #+Genera 'string=
#-Genera 'equal)))
(parts-of-speech)))
(defvar *wordnet-synset-cache*
(mapcar #'(lambda (pos)
(list pos (make-hash-table :test 'equal)))
(parts-of-speech)))
(defun cached-index-lookup (word part-of-speech)
"Looks up the entries for word (a string or a symbol) for the specified
part-of-speech."
(let* ((table (second (assoc part-of-speech *wordnet-index-cache*)))
(index-cache-entry (gethash word table)))
(unless index-cache-entry
(multiple-value-bind (word part-of-speech poly_cnt pointer-types synset-offsets)
(parse-index-file-entry
(index-entry-for-word part-of-speech word))
(declare (ignore poly_cnt pointer-types))
(when word
(setq index-cache-entry (make-instance 'wordnet-index-entry
:word word
:part-of-speech part-of-speech
:synset-offsets synset-offsets))
(setf (gethash word table) index-cache-entry))))
index-cache-entry))
(defun cached-data-lookup (synset-index part-of-speech)
(let* ((table (second (assoc part-of-speech *wordnet-synset-cache*)))
(synset-entry (gethash synset-index table)))
(unless synset-entry
(multiple-value-bind (part-of-speech words pointers gloss verb-frames)
(parse-data-file-entry (read-data-file-entry part-of-speech synset-index))
(setq synset-entry
(apply #'make-instance
(ecase part-of-speech
(:noun 'wordnet-noun-entry)
(:verb 'wordnet-verb-entry)
(:adjective 'wordnet-adjective-entry)
(:adverb 'wordnet-adverb-entry))
:offset synset-index
:part-of-speech part-of-speech
:words words
:pointers pointers
:gloss gloss
(when (eq part-of-speech :verb)
(list :verb-frames verb-frames)))))
(setf (gethash synset-index table) synset-entry))
synset-entry))
(defun index-entry-synsets (index-entry)
(when index-entry
(mapcar #'(lambda (offset)
(cached-data-lookup offset (part-of-speech index-entry)))
(index-entry-synset-offsets index-entry))))
(defun morphology-exception-lookup (word part-of-speech)
(parse-exception-file-entry (exception-entry-for-word part-of-speech word)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Relationships among words and synsets
;;; I don't have a good idea about what the best way to represent WordNet
;;; pointers is. Let's try this.
(defclass wordnet-pointer (wordnet-object)
((type :initarg :type
:reader wordnet-pointer-type)
(from-synset :initarg :from
:reader wordnet-pointer-from-synset)
(from-word-index :initarg :from-index
:reader wordnet-pointer-from-synset-index)
(to-synset :initarg :to
:reader wordnet-pointer-to-synset)
(to-word-index :initarg :to-index
:reader wordnet-pointer-to-synset-index)))
(defmethod print-object ((object wordnet-pointer) stream)
(print-unreadable-object (object stream :type t :identity t)
(format stream "~a" (ignore-errors (wordnet-pointer-type object)))))
(defmethod transitive-relation-p ((pointer wordnet-pointer))
(transitive-relation-p (wordnet-pointer-type pointer)))
(defmethod relation-direction ((pointer wordnet-pointer))
(relation-direction (wordnet-pointer-type pointer)))
(defmethod wordnet-pointer-from-word ((pointer wordnet-pointer))
(with-slots (from-synset from-word-index) pointer
(if (zerop from-word-index)
from-synset
(elt (synset-words from-synset) (1- from-word-index)))))
(defmethod wordnet-pointer-to-word ((pointer wordnet-pointer))
(with-slots (to-synset to-word-index) pointer
(if (zerop to-word-index)
to-synset
(elt (synset-words to-synset) (1- to-word-index)))))
(defmethod reify-pointers ((synset wordnet-synset-entry))
(with-slots (raw-pointers pointers) synset
(let ((new-pointers nil))
(dolist (p raw-pointers)
(destructuring-bind (pointer-type target part-of-speech
source-index target-index) p
(let ((to-synset (cached-data-lookup target part-of-speech)))
(unless to-synset
(error "Pointer ~s has invalid target" p))
(push (make-instance 'wordnet-pointer
:type pointer-type
:from synset
:from-index source-index
:to to-synset
:to-index target-index)
new-pointers))))
(setq pointers (nreverse new-pointers)))))
(defmethod wordnet-pointers ((synset wordnet-synset-entry))
(unless (slot-boundp synset 'pointers)
(reify-pointers synset))
(slot-value synset 'pointers))
(defmacro do-synset-pointers ((pointer-var synset &optional (pointer-types nil pt?))
&body body)
(assert (symbolp pointer-var))
`(dolist (,pointer-var (wordnet-pointers ,synset))
(when ,@(if pt?
`((member (wordnet-pointer-type ,pointer-var)
,pointer-types))
t)
,@body)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; fancy printing
(defun pretty-print-synset (stream synset &key (word-index 0) gloss word-sense-tags)
(flet ((call-with-text-face (stream face function)
#+Genera
(typecase stream
(clim-internals::output-protocol-mixin
(clim:with-text-face (stream face)
(funcall function stream)))
(t (scl:with-character-face (face stream)
(funcall function stream))))
#-Genera
(funcall function stream))
(print-pos (stream)
(write-string
(ecase (part-of-speech synset)
(:noun "n") (:verb "v") (:adjective "adj") (:adverb "adv"))
stream))
(print-word+sense (word+sense stream)
(if word-sense-tags
(write-string (first word+sense) stream)
(format stream "~a:~d" (first word+sense) (second word+sense)))))
(call-with-text-face stream :italic #'print-pos)
(write-char #\{ stream)
(do* ((words (synset-words synset) (cdr words))
(word+sense (car words) (car words))
(index 1 (1+ index)))
((null words))
(if (= index word-index)
(call-with-text-face stream :bold
#'(lambda (stream)
(print-word+sense word+sense stream)))
(print-word+sense word+sense stream))
(unless (null words)
(write-char #\space stream)))
(when gloss
(let ((gloss (synset-gloss synset)))
(when gloss
(format stream " | ~a" gloss))))
(write-char #\} stream))
synset)