本文原創(chuàng),轉(zhuǎn)載請注明出處
<?php
/**
* Trie樹
*
* @author iamlaobie@gmail.com
* @since 2012-08-19
*/
class Trie
{
/**
* 樹的存儲
*/
private $_tree = array();
/**
* 插入一個(gè)新詞
*
* @param $word 詞
* @param $data 詞附加屬性
*/
public function insert($word, $data = array())
{
$cur = & $this->_tree;
for($i = 0, $len = strlen($word); $i < $len; $i++){
$ascValue = ord($word[$i]);
if(!isset($cur[$ascValue])){
$cur[$ascValue] = array();
}
if($i == $len - 1){
$data['text'] = $word;
$cur[$ascValue]['$'] = $data;
break;
}else{
$cur = & $cur[$ascValue];
}
}
}
/**
* 查找詞
*/
public function find($word)
{
$cur = & $this->_tree;
for($i = 0, $len = strlen($word); $i < $len; $i++){
$ascValue = ord($word[$i]);
if(!isset($cur[$ascValue])){
return null;
}
if($i == $len - 1){
if(isset($cur[$ascValue]['$'])){
return $cur[$ascValue]['$'];
}
}else{
$cur = $cur[$ascValue];
}
}
return null;
}
/**
* 獲取以$word為前綴的詞
*
* @param $depth 后綴長度
*/
public function suffixes($word, $depth = 1)
{
$cur = & $this->_tree;
$words = array();
for($i = 0, $len = strlen($word); $i < $len; $i++){
$ascValue = ord($word[$i]);
if(!isset($cur[$ascValue])){
return null;
}
if($i == $len - 1){
return $this->traverse($cur, $depth);
}else{
$cur = $cur[$ascValue];
}
}
return null;
}
/**
* 遍歷
*/
public function traverse($tree = null, $depth = 1, $words = array(), $recuDepth = 0)
{
if($tree === null){
$tree = $this->_tree;
}
$recuDepth += 1;
unset($tree['$']);
//中文utf8編碼長度為3
$realDepth = $depth * 3;
foreach($tree as $ascValue => $subTree){
if(isset($subTree['$'])){
$words[] = $subTree['$'];
}
if(!empty($subTree) && $recuDepth <= $realDepth){
$words = $this->traverse($subTree, $depth, $words, $recuDepth);
}
}
return $words;
}
/**
* 打印樹
*/
public function tree()
{
print_r($this->_tree);
}
}
//測試
$trie = new Trie();
$trie->insert("中國人", array('a' => 1));
$trie->insert("中國通", array('a' => 1));
$trie->insert("中國人地", array('a' => 1));
$trie->insert("中國人民", array('a' => 1));
$trie->insert("中方通", array('a' => 2));
$trie->insert("abc", array('a' => 2));
$trie->insert("外方", array('a' => 2));
var_dump($trie->suffixes('中國人', 1));