提供一款免费的php 问问采集代码,如果你正想采集问问,但是又不知道怎么写采集程序,这里为你提供一款经典的问问采集程序,代码如下:
-
<?php
-
session_start();
-
header("content-type:text/html;charset=utf-8");
-
require("stole_config.php");
-
require("conn.php");
-
require("keyword.php");
-
if(!emptyempty($_post['ask']))
-
{
-
$ask=urlencode(trim($_post['ask']));
-
$sp="s".$ask;
-
}else
-
{
-
$sp=urlencode($_get['sp']);
-
}
-
if(emptyempty($_get['jl']))
-
{
-
$_get['jl']=1;
-
}
-
$jl=$_get['jl'];
-
$pg=intval($_get['pg']);
-
$rs=intval($_get['rs']);
-
if($rs>9)
-
{
-
$rs=0;
-
$pg++;
-
}
-
if($pg>51)
-
{
-
echo "采集完毕! 总共采集 ".urldecode($sp)." ".$jl."条记录";
-
exit();
-
}
-
if($sp)
-
{
-
$str=@file_get_contents("http://wenwen.soso.com/z/search.e?sp={$sp}&pg={$pg}");
-
@preg_match("/<ol class="result_list">(.*)</ol>/ius",$str,$asklist);
-
-
$url="/<a target="_blank" href="/z/(q.*.htm)/ius";
-
@preg_match_all($url,$asklist[1],$urllist);
-
$t=$urllist[1][$rs];
-
$uid=$t;
-
$suid="ww{$uid}";
-
$sct=mysql教程_query("select count(*) from {$table_prefix}c_article where suid='$suid' ");
-
$sct=mysql_fetch_array($sct);
-
$sct=$sct[0];
-
if($sct==0)
-
{
-
$html=@file_get_contents("http://wenwen.soso.com/z/${t}");
-
$html=str_replace("<pre>","",str_replace("</pre>","",$html));
-
$html=str_replace("<br/><br/><br/>","<br/><br/>",$html);
-
-
@preg_match("/<div class="question_main">.*<h3>(.*)</h3>/ius",$html,$ask_title);
-
$art_title=$ask_title[1];
-
@preg_match("/<div class="answer_con">(.*)</div>/ius",$html,$answer);
-
$j=count($answer)-1;
-
$art_content="";
-
for($i=$j;$i>=1;$i--)
-
{
-
if(strlen($answer[$i])>$min_t1)
-
{
-
$art_content .= $answer[$i];
-
}
-
}
-
$art_content=trim($art_content);
-
$s1="/(<a .*>)(.*)</a>/ius";
-
$art_content=preg_replace($s1,${2},trim($art_content));
-
$word_arr=explode(",",iconv("gbk","utf-8",$cj_word));
-
$word_allow=false;
-
$word_count=count($word_arr);
-
for($i=0;$i<$word_count;$i++)
-
{
-
if(substr_count($art_title,$word_arr[$i])>0)
-
{
-
$word_allow=1;
-
$i=$word_count;
-
}
-
}
-
if($word_allow)
-
{
-
if(strlen($art_content)>$min_t2)
-
{
-
echo "<font color=red>添加中............................</font><br>";
-
echo $art_title."<br>";
-
$art_title=iconv('utf-8','gbk', $art_title);
-
$title_ct=mysql_query("select count(*) from {$table_prefix}c_article where art_title ='$art_title' ");
-
$title_ct=@mysql_fetch_array($title_ct);
-
$title_ct=$title_ct[0];
-
if($title_ct>0)
-
{
-
$art_title .="{$same_title}{$title_ct}";
-
}
-
$art_content=iconv('utf-8','gbk',str_replace(" ","<br>",$art_content));
-
$art_content=strtr($art_content,$keyword);
-
$art_time=date("y-m-d");
-
$sql="insert into {$table_prefix}c_article(art_title,art_content,art_time,art_author,suid) values('$art_title','$art_content','$art_time','$art_author','$suid')";
-
mysql_query($sql);
-
if(emptyempty($t_catx_id))
-
{
-
$sql2="insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author}) values('$art_title','$art_content','$art_time','$art_author')";
-
}else
-
{
-
$sql2="insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author},{$t_catx_id}) values('$art_title','$art_content','$art_time','$art_author','$cat_id')";
-
}
-
mysql_query($sql2);
-
$jl++;
-
-
}else
-
{
-
echo "长度不够";
-
}
-
}else
-
{
-
echo "主题不符合要求";
-
}
-
}else
-
{
-
echo "已经存在";
-
}
-
$rs++;
-
-
$f_tt= urldecode($sp)."--页数".$pg." 记录数 ".$jl ;
-
file_put_contents("ss.txt",$f_tt);
-
echo "<script>location.href='wenwen.php?jl=".$jl."&sp=".$sp."&pg=".$pg."&rs=".$rs." ';</script>";
-
exit();
-
}
-
?>
-
<link href="style.css教程" rel="stylesheet" type="text/css" />
-
<table width="700" border="0" align="center" cellspacing="1" bgcolor="#cccccc">
-
<tr>
-
<td height="50" align="center" bgcolor="#00cc00"><h1><a href="http://www.111cn.net">荐礼啦</a>知道问问采集插件</h1></td>
-
</tr>
-
</table>
-
<table width="700" border="0" align="center" cellspacing="1" bgcolor="#cccccc" style="margin-top:6px; margin-bottom:6px;">
-
<tr>
-
<td height="30" align="center" bgcolor="#ffffff"><a href="cj_config.php">采集设置</a> <a href="uninstall.php" onclick="return confirm('您确定要卸载采集插件吗');">卸载采集</a> <a href="cj_view.php">查看采集记录</a> <a href="cj_help.php">采集帮助</a> <a href="baidu.php" target="_blank">知道采集</a> <a href="wenwen.php" target="_blank">问问采集</a></td>
-
</tr>
-
</table>
-
<form action="wenwen.php" method="post">
-
<table width="628" height="49" border="0" align="center">
-
<tr>
-
<td width="413" align="right"><input name="ask" type="text" id="ask" size="50"></td>
-
<td width="205"><input type="submit" name="button" id="button" value="问问采集" style=" padding-left:15px; padding-right:15px; height:25px; line-height:25px;"></td>
-
</tr>
-
</table>
-
</form>
-
(责任编辑:最模板) |