|
  
- UID
- 1
- 帖子
- 3792
- 精华
- 66
- 积分
- 8334
- 威望
- 8334 度
- 论坛币
- 12595 元
- 阅读权限
- 200
- 在线时间
- 1526 小时
- 注册时间
- 2007-1-15
- 最后登录
- 2009-1-7
|
1楼
发表于 2007-4-11 13:53
| 只看该作者
phpchina新闻小偷聚合器
<?
/*
phpchina聚合器
聚合phpchina的文章,并且每次运行不添加重复数据。
在 cdb_threads 表里面增加两个字段
tinyint(1):isCollection
char(255):collectionURL
适用于 Discuz!5.0 版本,当然稍微改动一下可以放到任何系统里。
http://www.phpmy.net,langwan<langwanluo@gmail.com>
*/
set_time_limit(0);
$pagePasswd = 'http://www.zybbs.net'; //自己改
if($pagePasswd != $_GET['pagePasswd']) {
exit('拒绝访问');
}
$userID = 1;
$userName = 'admin';
$fid = 6;
$dbHost = 'localhost';
$dbName = 'dbName'; //自己改
$dbUser = 'dbUser'; //自己改
$dbPass = 'dbPass'; //自己改
mysql_connect($dbHost, $dbUser, $dbPass);
mysql_select_db($dbName);
mysql_query("SET character_set_connection=gbk, character_set_results=gbk, character_set_client=binary");
mysql_query("SET sql_mode=''");
$blockStart = '<div id="newslist">';
$blockEnd = '<!--新闻(标题)-->';
$pageStart = '<td class="xspace-current">';
$pageEnd = '></a></td></tr></table>';
$TitleStart = '<div id="net">';
$TitleEnd = '</div>';
$TimeStart = '发布时间: ';
$AuthorStart = ' 作者: ';
$sourceStart = ' 信息来源: ';
$sourceEnd = '</div>';
$contentStart = '<div id="nec">';
$contentEnd = '<center><input type="image" onclick=copyToClipBoard()';
$urlArray = array(
'http://www.phpchina.com/category_71.html',
'http://www.phpchina.com/category_69.html',
'http://www.phpchina.com/category_72.html',
'http://www.phpchina.com/category_81.html'
);
foreach($urlArray as $url) {
$pageArray = $subjectArray = $contentArray = array();
$sql = "SELECT * FROM cdb_threads WHERE isCollection = '1' AND collectionURL = '$url' ORDER BY dateline DESC";
$res = mysql_query($sql);
$row = mysql_fetch_array($res);
$data = file_get_contents($url);
$page = getBlock($data, $pageStart, $pageEnd);
$pageArray = getPage($page);
$pageArray[0] = $url;
foreach($pageArray as $page) {
$data = file_get_contents($page);
$list = getBlock($data, $blockStart, $blockEnd);
$subjectArray = array_merge($subjectArray, getSubject($list, $stopSubject));
if($subjectArray[count($subjectArray) - 1]['link'] == '') {
break;
}
}
foreach($subjectArray as $v) {
if($v['link'] != '') {
$content = getContent($v);
importBBS($content);
}
}
}
function importBBS($content) {
$content['content'] = addslashes($content['content']);
global $fid, $userID, $userName;
$sql = "INSERT INTO cdb_threads(
fid, author, subject,
lastpost, lastposter,
authorid, dateline, isCollection, collectionURL
) VALUES (
'$fid', '$userName', '{$content['subject']}',
'{$content['createTime']}', '$userName',
'$userID', '{$content['createTime']}', '1', '{$content['url']}'
)";
mysql_query($sql);
echo mysql_error();
$tid = mysql_insert_id();
$sql = "INSERT INTO cdb_posts(
fid, tid, subject,
first, author, authorid,
dateline, message, htmlon, bbcodeoff, smileyoff
) VALUES (
'$fid', '$tid', '{$content['subject']}',
'1', '$userName', '$userID',
'{$content['createTime']}', '{$content['content']}', '1', 1, 1
)";
mysql_query($sql);
echo mysql_error();
$sql = "UPDATE cdb_forums SET threads=threads+1, posts=posts+1 WHERE fid='$fid'";
mysql_query($sql);
}
function getContent($url) {
global $TimeStart, $AuthorStart, $sourceStart, $sourceEnd, $contentStart, $contentEnd;
$ret = $array = array();
$data = @file_get_contents($url['link']);
if(!empty($data)) {
$explode = explode($TimeStart, $data);
$explode = explode($AuthorStart, $explode[1]);
$ret['createTime'] = strtotime($explode[0]);
$explode = explode($sourceStart, $explode[1]);
$ret['author'] = $explode[0];
$explode = explode($sourceEnd, $explode[1]);
$ret['source'] = $explode[0];
$explode = explode($contentStart, $explode[1]);
$explode = explode($contentEnd, $explode[1]);
$ret['content'] = $explode[0];
$ret['subject'] = $url['subject'];
$ret['url'] = $url['url'];
}
return $ret;
}
function getBlock($data, $start, $end) {
$explode = explode($start, $data);
$explode = explode($end, $explode[1]);
return $explode[0];
}
function getPage($data) {
$linkArray = $ret = Array();
$pattern = '/<a href=\"([^ ]+)\" target="_self">([^<>]+)<\/a>/ies';
preg_match_all($pattern, $data, $linkArray, PREG_SET_ORDER);
$i = 1;
foreach($linkArray as $v) {
$ret[$i++] = $v[1];
}
return $ret;
}
function getSubject($data, $stopSubject) {
$linkArray = Array();
$pattern = '/<a href=\"([^ ]+)\" target="_blank" class="link12">([^<>]+)<\/a>/ies';
preg_match_all($pattern, $data, $linkArray, PREG_SET_ORDER);
$i = 0;
foreach($linkArray as $v) {
if($stopSubject['subject'] == $v[2]) {
$ret[$i]['link'] = '';
$ret[$i]['subject'] = '';
return $ret;
}
$ret[$i]['link'] = $v[1];
$ret[$i]['subject'] = $v[2];
$ret[$i++]['url'] = $stopSubject['url'];
}
return $ret;
}
?> |
|