radk: compute number of entries

Julien LepillerWed Jun 03 20:44:44+0200 2020

f1f570b

radk: compute number of entries

modules/nani/radk.scm

1717
;;; along with the Nani Project website.  If not, see <http://www.gnu.org/licenses/>.
1818
1919
(define-module (nani radk)
20+
  #:use-module (ice-9 binary-ports)
2021
  #:use-module (ice-9 match)
2122
  #:use-module (ice-9 peg)
2223
  #:use-module (ice-9 rdelim)
24+
  #:use-module (nani parse-result)
2325
  #:use-module (nani serialize)
2426
  #:use-module (rnrs bytevectors)
2527
  #:use-module (sxml simple)

2729
            get-kanji-stroke
2830
            get-rad-kanji
2931
            get-rad-stroke
30-
            serialize-radk))
32+
            serialize-radk
33+
            kanji-count))
3134
3235
(define-peg-pattern comment none (and "#" (* (or "\t" (range #\x20 #\x10ffff))) "\n"))
3336
(define-peg-pattern space none " ")

163166
    ("???" "???")
164167
    ("???" "???")
165168
    (_ kanji)))
169+
170+
(define (parse-result file)
171+
  (define (parse-result-rad-kanji-element port)
172+
    (let ((radical (parse-result-string port))
173+
          (kanji-list (parse-result-string port)))
174+
       (cons radical (string->list kanji-list))))
175+
  (define (parse-result-rad-kanji port)
176+
    (parse-result-list port parse-result-rad-kanji-element))
177+
178+
  (define (parse-result-rad-stroke-element port)
179+
    (let ((radical (parse-result-string port))
180+
          (stroke (parse-result-char port)))
181+
       (cons radical stroke)))
182+
  (define (parse-result-rad-stroke port)
183+
    (parse-result-list port parse-result-rad-stroke-element))
184+
185+
  (define (parse-result-kanji-stroke-element port)
186+
    (let ((kanji (parse-result-string port))
187+
          (stroke (parse-result-char port)))
188+
      (cons kanji stroke)))
189+
  (define (parse-result-kanji-stroke port)
190+
    (parse-result-list port parse-result-kanji-stroke-element))
191+
192+
  (call-with-input-file file
193+
    (lambda (port)
194+
      (let* ((header (utf8->string (get-bytevector-n port 12)))
195+
             (rad-kanji (parse-result-rad-kanji port))
196+
             (rad-stroke (parse-result-rad-stroke port))
197+
             (kanji-stroke (parse-result-kanji-stroke port)))
198+
        (list rad-kanji rad-stroke kanji-stroke)))))
199+
200+
(define (get-kanji-list content)
201+
  (let loop ((result '()) (content content))
202+
    (match content
203+
      (() result)
204+
      (((_ kanji ...) content ...)
205+
       (loop (append result (filter (lambda (k) (not (member k result))) kanji))
206+
             content)))))
207+
208+
(define (kanji-count file)
209+
  (match (parse-result file)
210+
    ((rad-kanji _ _)
211+
     (length (get-kanji-list rad-kanji)))))