Suffix Tree
在负暄琐话里看到关于 suffix tree 的帖子,感觉很牛逼,于是用 scheme 实现了一个,写了一个多小时。唉,以我现在的水平,用 scheme 写点东西还是很费劲的... 话说用 suffix tree 可以在线性时间内实现很多字符串匹配和查找的功能,无边强大~
(define (gen-suffix x) ; x is a list (if (null? (cdr x)) (list x) (cons x (gen-suffix (cdr x)))))
(define (compare-two stringcons) ;stringcons is a cons ;; anyone is empty? (if (or (null? (car stringcons)) (null? (cdr stringcons))) '() ; one of them is empty (if (equal? (car (car stringcons)) ; none of them is empty (car (cdr stringcons))) (cons (car (car stringcons)) (compare-two (cons (cdr (car stringcons)) (cdr (cdr stringcons))))) '() )))
(define (next-to-common stringcons common) (if (null? common) (cons (if (null? (car stringcons)) '() (car (car stringcons))) (if (null? (cdr stringcons)) '() (car (cdr stringcons)))) (next-to-common (cons (cdr (car stringcons)) (cdr (cdr stringcons))) (cdr common))))
(define (make-tri index node childs) (cons index (cons node childs)))
(define (node tri) (car (cdr tri))) (define (index tri) (car tri)) (define (childs tri) (cdr (cdr tri))) (define (find-index char kids) (if (null? kids) '() (if (equal? char (index (car kids))) (car kids) (find-index char (cdr kids)))))
(define (replace-by-index idx kids newkid) (if (null? kids) '() (if (equal? (index (car kids)) idx) (cons newkid (cdr kids)) (cons (car kids) (replace-by-index idx (cdr kids) newkid)))))
(define (insert-string string tri) ;; tri: (list index node child1 chind2 ...) (let* ((common (compare-two (cons string (node tri)))) (nextchar (next-to-common (cons string (node tri)) common))) (if (equal? common (node tri)) (if (null? (find-index (car nextchar) (childs tri))) (make-tri (index tri) (node tri) (cons (make-tri (car nextchar) string '()) (childs tri))) (make-tri (index tri) (node tri) (replace-by-index (car nextchar) (childs tri) (insert-string string (find-index (car nextchar) (childs tri)))))) (make-tri (index tri) common (list (make-tri (cdr nextchar) (node tri) (childs tri)) (make-tri (car nextchar) string '()))))))
(define (insert-string-list strlist tri) (if (null? strlist) tri (insert-string (car strlist) (insert-string-list (cdr strlist) tri))))
(define (gen-suffix-tree string) (insert-string-list (gen-suffix string) (make-tri '() '() '()))) 运行一下: > (gen-suffix-tree (string->list "bananas$")) (() () (#\b (#\b #\a #\n #\a #\n #\a #\s #\$)) (#\n (#\n #\a) (#\s (#\n #\a #\s #\$)) (#\n (#\n #\a #\n #\a #\s #\$))) (#\a (#\a) (#\s (#\a #\s #\$)) (#\n (#\a #\n #\a) (#\s (#\a #\n #\a #\s #\$)) (#\n (#\a #\n #\a #\n #\a #\s #\$)))) (#\s (#\s #\$)) (#\$ (#\$))) 很乱很正确~~
一些链接:
- Suffix tree @ wikipedia
- 讲解生成方法的视频,在 mac os 里用一个未知软件做的
- Tries 和 suffix trees