-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaol_24_uid_create.php
46 lines (37 loc) · 977 Bytes
/
aol_24_uid_create.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
<?php
// this program want to additional url into `uid_clean`
// (the uid_clean table orginially created by `url_clean` TB with todo)
// We just enlarge the url set, then we can crawl the url's corresponding webpage
// to train our doc model
include_once("kit_lib.php");
$para = ParameterParser($argc, $argv);
//source table
if (!isset($para["TB"])){
$para["TB"] = "tmp";
}
//uid table
if (!isset($para["UTB"])){
$para["UTB"] = "uid_clean";
}
$database_cnn = "b95119";
mysql_select_db($database_cnn,$b95119_cnn);
$sql = sprintf("
SELECT distinct `url`
FROM `%s`
WHERE `url` NOT
IN (
SELECT `url`
FROM `%s`
)",
$para["TB"], $para["UTB"]);
$result = mysql_query($sql) or die($sql."\n".mysql_error());
while ($row = mysql_fetch_row($result)){
$counter = 0;
$url = preg_replace("/'/", "\\\'", $row[0]);
$sql = sprintf(
"insert into `%s` (`url`)
values ('%s')",
$para["UTB"],$url);
mysql_query($sql) or die($sql."\n".mysql_error());
}
?>