-
Notifications
You must be signed in to change notification settings - Fork 0
/
WikipediaReader.php
73 lines (59 loc) · 1.76 KB
/
WikipediaReader.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
<?php
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
/**
* Description of WikipediaReader
*
* @author ed
*/
class WikipediaReader extends XMLReader {
protected $nb_pages, $file;
public function __construct($wikifile) {
$this->open($wikifile);
$this->file = $wikifile;
$this->nb_pages = 0;
}
public function __destruct() {
$this->close();
//parent::__destruct();
}
// readString() est normalement présente dans XMLReader mais pas toujours...
// Cette fonction n'est disponible que si PHP est compilé à l'aide de la
// librarie libxml 20620 ou ultérieure.
//(Cf. http://www.php.net/manual/fr/xmlreader.readstring.php)
function readString() {
$node = $this->expand();
return $node->textContent;
}
protected function isRobot($comment) {
return preg_match("/^robot /i", $comment) ||
preg_match("/^bot /i", $comment) ||
preg_match("/^Med - bot /i", $comment) ||
preg_match("/ bot /i", $comment) ||
preg_match("/ robot /i", $comment);
}
protected function isIP($ip) {
return utils::isIP($ip);
}
protected function openElement($element) {
return $this->next();
}
protected function closeElement($element) {
return $this->next();
}
public function run() {
$ok = $this->read();
while ($ok) {
if ($this->nodeType === XMLReader::ELEMENT) {
$ok = $this->openElement($this->name);
} elseif ($this->nodeType === XMLReader::END_ELEMENT) {
$ok = $this->closeElement($this->name);
}
else
$ok = $this->next();
}
}
}
?>