Improve dictionary generation time

Julien LepillerThu Jul 29 14:37:12+0200 2021

73bd8f0

Improve dictionary generation time

modules/nani/result/jibiki.scm

281281
      ('exemples #f))))
282282
283283
(define (create-parser frq)
284+
  (define results '())
284285
  (ssax:make-parser
285286
    NEW-LEVEL-SEED
286287
    (lambda (elem-gi attributes namespaces expected-content seed)

288289
    
289290
    FINISH-ELEMENT
290291
    (lambda (elem-gi attributes namespaces parent-seed seed)
291-
      (if (equal? elem-gi 'volume)
292-
          (filter (lambda (a) a) seed)
293-
          (let* ((seed (reverse seed))
294-
                 (element (sxml->element seed elem-gi frq)))
295-
            (cons element parent-seed))))
292+
      (cond
293+
        ((equal? elem-gi 'volume)
294+
         results)
295+
        ((equal? elem-gi 'article)
296+
         (set! results (cons seed results))
297+
         #f)
298+
        (else
299+
         (let* ((seed (reverse seed))
300+
                (element (sxml->element seed elem-gi frq)))
301+
           (cons element parent-seed)))))
296302
    
297303
    CHAR-DATA-HANDLER
298304
    (lambda (string1 string2 seed)

modules/nani/result/jmdict.scm

408408
    ('entry (sxml->result lst frq))))
409409
410410
(define (create-parser frq)
411+
  (define results '())
411412
  (ssax:make-parser
412413
    NEW-LEVEL-SEED
413414
    (lambda (elem-gi attributes namespaces expected-content seed)

421422
422423
    FINISH-ELEMENT
423424
    (lambda (elem-gi attributes namespaces parent-seed seed)
424-
      (if (equal? elem-gi 'JMdict)
425-
          seed
426-
          (let* ((seed (reverse seed))
427-
                 (element (sxml->element seed elem-gi frq)))
428-
            (cons element parent-seed))))
425+
      (cond
426+
        ((equal? elem-gi 'JMdict)
427+
         results)
428+
        ((equal? elem-gi 'entry)
429+
         (set! results (cons seed results))
430+
         #f)
431+
        (else
432+
         (let* ((seed (reverse seed))
433+
                (element (sxml->element seed elem-gi frq)))
434+
           (cons element parent-seed)))))
429435
430436
    CHAR-DATA-HANDLER
431437
    (lambda (string1 string2 seed)

modules/nani/result/result.scm

199199
    (when (not (result? result)) (throw 'not-result result))
200200
    (+ ((list-size (huffman-string-size kanji-huffman-code)) (result-kanjis result))
201201
       ((list-size (reading-size reading-huffman-code)) (result-readings result))
202-
       ((list-size (meaning-size meaning-huffman-code)) (result-meanings result)))))
202+
       ((list-size (meaning-size meaning-huffman-code)) (result-meanings result))
203+
       (char-size (result-score result)))))
203204
204205
;; creating tries
205206
(define (make-key key)

317318
      (trie-node-size reading-trie))
318319
    (format #t "Number of nodes in meanings: ~a~%"
319320
      (trie-node-size meaning-trie))
321+
    (format #t "First trie is at ~a~%" pos-kanji)
320322
    ((serialize-list (serialize-result kanji-huffman-code reading-huffman-code
321323
                                       meaning-huffman-code)
322324
                     #:size? #f)

385387
      (cond
386388
        ((> (result-score a) (result-score b)) #t)
387389
        ((= (result-score a) (result-score b))
388-
         (string>? (get-string a) (get-string b)))))))
390+
         (string<? (get-string a) (get-string b)))
391+
        ((< (result-score a) (result-score b)) #f)))))

modules/nani/result/wadoku.scm

450450
      ('gramGrp (gram->info lst)))))
451451
452452
(define (create-parser frq)
453+
  (define results '())
453454
  (ssax:make-parser
454455
    NEW-LEVEL-SEED
455456
    (lambda (elem-gi attributes namespaces expected-content seed)

457458
    
458459
    FINISH-ELEMENT
459460
    (lambda (elem-gi attributes namespaces parent-seed seed)
460-
      (if (equal? elem-gi 'entries)
461-
          seed
462-
          (let* ((seed (reverse seed))
463-
                 (element (sxml->element seed elem-gi frq)))
464-
            (cons element parent-seed))))
461+
      (cond
462+
        ((equal? elem-gi 'entries)
463+
         results)
464+
        ((equal? elem-gi 'entry)
465+
         (set! results (cons seed results))
466+
         #f)
467+
        (else
468+
         (let* ((seed (reverse seed))
469+
                (element (sxml->element seed elem-gi frq)))
470+
           (cons element parent-seed)))))
465471
    
466472
    CHAR-DATA-HANDLER
467473
    (lambda (string1 string2 seed)