-
Notifications
You must be signed in to change notification settings - Fork 5
/
testAhoCorasick.php
154 lines (146 loc) · 3.63 KB
/
testAhoCorasick.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
<?php
include("AhoCorasick.class.php");
class ACAppClass{
private $showtimeFlag;// 是否显示运行时间,false:不显示;true:显示,默认为true
/**
* @function 构造函数
* @param
* @return
*/
public function ACAppClass(){
$this->showtimeFlag = true;
}
/**
* @function 从字符串中查找单个关键词
* @param string word 关键词
* @param string text 被查找的字符串
* @return Array
*/
public function findSingleWord($word, $text){
try{
if(strlen(trim($word))==0){
throw new Exception("Key word's content is empty.");
}
}catch(Exception $e){
echo $e->getMessage();
return;
}
$arr = array(trim($word));
return $this->findWordsInArray($arr, $text);
}
/**
* @function 从字符串中查找多个关键词
* @param Array words 关键词数组
* @param string text 被查找的字符串
* @return Array
*/
public function findWordsInArray($words, $text){
$len = count($words);
try{
if($len==0){
throw new Exception("Array of keywords is empty.");
}
}catch(Exception $e){
echo $e->getMessage();
return;
}
if($this->showtimeFlag){
$starttime = $this->getmicrotime();
}
$tree = new AhoCorasick();
try{
for ($i=0; $i<$len; $i++) {
if(trim($words[$i])==""){
throw new Exception("Key word's content is empty.");
}
$tree->add(trim($words[$i]));
}
}catch(Exception $e){
echo $e->getMessage();
return;
}
$tree->prepare();
$res = array();
$obj = $tree->search($text);
while($obj->hasNext()){
$result = $obj->next();
$res = array_unique(array_merge($res, $result->getOutputs()));
}
if($this->showtimeFlag){
$endtime = $this->getmicrotime();
echo "<br>run time is: ".($endtime-$starttime)."ms<br>";
}
return $res;
}
/**
* @function 从文件中查找关键词
* @param string $keyfile 关键词所在的文件名称及路径
* @param string $textfile 被查找的内容所在的文件名称及路径
* @return Array
*/
public function findWordsInFile($keyfile, $textfile){
try{
if(!is_file($keyfile) || !is_file($textfile)){
throw new Exception("Can not find the file.");
}
}catch(Exception $e){
echo $e->getMessage();
return;
}
// 搜索词所在的文件内容为空时,抛出异常
try{
if(strlen(trim(file_get_contents($keyfile)))==0){
throw new Exception("File's content is empty.");
}
}catch(Exception $e){
echo $e->getMessage();
return;
}
// 打开文件
$handle1 = fopen($keyfile, "r");
$handle2 = fopen($textfile, "r");
$arr = array();
$contents = "";
try{
while (!feof($handle1)) {
$line = trim(fgets($handle1));
if(strlen($line)!=0){
$arr[] = $line;
}
}
while (!feof($handle2)) {
$line = trim(fgets($handle2));
if(strlen($line)!=0){
$contents .= $line;
}
}
}catch(Excption $e){
echo $e->getMessage();
return;
}
// 关闭文件
fclose($handle1);
fclose($handle2);
return $this->findWordsInArray($arr, $contents);
}
/**
* @function 获取时间戳,单位为毫秒
* @param
* @return float
*/
function getmicrotime(){
list($usec, $sec) = explode(" ",microtime());
$value = (float)$usec*1000+(float)$sec;
return round($value, 3);
}
}
$a = new ACAppClass();
$haha = array();
$haha[] = '逗比';
$haha[] = '逗死人';
$haha[] = 'hello world';
$haha[] = 'hello hetou';
$haha[] = 'hello1';
$haha[] = 'hello';
var_dump($a->findWordsInArray($haha,'hello死人'));
?>