radk: compute number of entries
modules/nani/radk.scm
| 17 | 17 | ;;; along with the Nani Project website. If not, see <http://www.gnu.org/licenses/>. | |
| 18 | 18 | ||
| 19 | 19 | (define-module (nani radk) | |
| 20 | + | #:use-module (ice-9 binary-ports) | |
| 20 | 21 | #:use-module (ice-9 match) | |
| 21 | 22 | #:use-module (ice-9 peg) | |
| 22 | 23 | #:use-module (ice-9 rdelim) | |
| 24 | + | #:use-module (nani parse-result) | |
| 23 | 25 | #:use-module (nani serialize) | |
| 24 | 26 | #:use-module (rnrs bytevectors) | |
| 25 | 27 | #:use-module (sxml simple) | |
… | |||
| 27 | 29 | get-kanji-stroke | |
| 28 | 30 | get-rad-kanji | |
| 29 | 31 | get-rad-stroke | |
| 30 | - | serialize-radk)) | |
| 32 | + | serialize-radk | |
| 33 | + | kanji-count)) | |
| 31 | 34 | ||
| 32 | 35 | (define-peg-pattern comment none (and "#" (* (or "\t" (range #\x20 #\x10ffff))) "\n")) | |
| 33 | 36 | (define-peg-pattern space none " ") | |
… | |||
| 163 | 166 | ("???" "???") | |
| 164 | 167 | ("???" "???") | |
| 165 | 168 | (_ kanji))) | |
| 169 | + | ||
| 170 | + | (define (parse-result file) | |
| 171 | + | (define (parse-result-rad-kanji-element port) | |
| 172 | + | (let ((radical (parse-result-string port)) | |
| 173 | + | (kanji-list (parse-result-string port))) | |
| 174 | + | (cons radical (string->list kanji-list)))) | |
| 175 | + | (define (parse-result-rad-kanji port) | |
| 176 | + | (parse-result-list port parse-result-rad-kanji-element)) | |
| 177 | + | ||
| 178 | + | (define (parse-result-rad-stroke-element port) | |
| 179 | + | (let ((radical (parse-result-string port)) | |
| 180 | + | (stroke (parse-result-char port))) | |
| 181 | + | (cons radical stroke))) | |
| 182 | + | (define (parse-result-rad-stroke port) | |
| 183 | + | (parse-result-list port parse-result-rad-stroke-element)) | |
| 184 | + | ||
| 185 | + | (define (parse-result-kanji-stroke-element port) | |
| 186 | + | (let ((kanji (parse-result-string port)) | |
| 187 | + | (stroke (parse-result-char port))) | |
| 188 | + | (cons kanji stroke))) | |
| 189 | + | (define (parse-result-kanji-stroke port) | |
| 190 | + | (parse-result-list port parse-result-kanji-stroke-element)) | |
| 191 | + | ||
| 192 | + | (call-with-input-file file | |
| 193 | + | (lambda (port) | |
| 194 | + | (let* ((header (utf8->string (get-bytevector-n port 12))) | |
| 195 | + | (rad-kanji (parse-result-rad-kanji port)) | |
| 196 | + | (rad-stroke (parse-result-rad-stroke port)) | |
| 197 | + | (kanji-stroke (parse-result-kanji-stroke port))) | |
| 198 | + | (list rad-kanji rad-stroke kanji-stroke))))) | |
| 199 | + | ||
| 200 | + | (define (get-kanji-list content) | |
| 201 | + | (let loop ((result '()) (content content)) | |
| 202 | + | (match content | |
| 203 | + | (() result) | |
| 204 | + | (((_ kanji ...) content ...) | |
| 205 | + | (loop (append result (filter (lambda (k) (not (member k result))) kanji)) | |
| 206 | + | content))))) | |
| 207 | + | ||
| 208 | + | (define (kanji-count file) | |
| 209 | + | (match (parse-result file) | |
| 210 | + | ((rad-kanji _ _) | |
| 211 | + | (length (get-kanji-list rad-kanji))))) | |