<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>最优良人 &#187; 采集</title>
	<atom:link href="http://www.zui88.com/view-tag/%e9%87%87%e9%9b%86/feed" rel="self" type="application/rss+xml" />
	<link>http://www.zui88.com/blog</link>
	<description>中山php&#124;最优网络</description>
	<lastBuildDate>Mon, 13 May 2013 04:56:43 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.1.4</generator>
		<item>
		<title>php采集程序，提取网页超链接，邮箱或其他特定内容</title>
		<link>http://www.zui88.com/blog/view-366.html</link>
		<comments>http://www.zui88.com/blog/view-366.html#comments</comments>
		<pubDate>Wed, 30 May 2012 08:48:29 +0000</pubDate>
		<dc:creator>lin</dc:creator>
				<category><![CDATA[后端程序]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[采集]]></category>

		<guid isPermaLink="false">http://www.zui88.com/blog/?p=366</guid>
		<description><![CDATA[以下代码从上一篇文章修改而来，专门用于提取网页所有超链接，邮箱或其他特定内容 &#60;?php function fetch_urlpage_contents($url){ $c=file_get_contents($url); return $c; } //获取匹配内容 function fetch_match_contents($begin,$end,$c) { $begin=change_match_string($begin); $end=change_match_string($end); $p = "#{$begin}(.*){$end}#iU";//i表示忽略大小写，U禁止贪婪匹配 if(preg_match_all($p,$c,$rs)) { return $rs;} else { return "";} }//转义正则表达式字符串 function change_match_string($str){ //注意，以下只是简单转义 $old=array("/","$",'?'); $new=array("\/","\$",'\?'); $str=str_replace($old,$new,$str); return $str; } //采集网页 function pick($url,$ft,$th) { $c=fetch_urlpage_contents($url); foreach($ft as $key =&#62; $value) { $rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c); if(is_array($th[$key])) { foreach($th[$key] as $old =&#62; $new) { [...]]]></description>
			<content:encoded><![CDATA[<p>以下代码从上一篇文章修改而来，专门用于提取网页所有超链接，邮箱或其他特定内容</p>
<p>&lt;?php</p>
<p>function fetch_urlpage_contents($url){<br />
$c=file_get_contents($url);<br />
return $c;<br />
}<br />
//获取匹配内容<br />
function fetch_match_contents($begin,$end,$c)<br />
{<br />
$begin=change_match_string($begin);<br />
$end=change_match_string($end);<br />
$p = "#{$begin}(.*){$end}#iU";//i表示忽略大小写，U禁止贪婪匹配<br />
if(preg_match_all($p,$c,$rs))<br />
{<br />
return $rs;}<br />
else { return "";}<br />
}//转义正则表达式字符串<br />
function change_match_string($str){<br />
//注意，以下只是简单转义<br />
$old=array("/","$",'?');<br />
$new=array("\/","\$",'\?');<br />
$str=str_replace($old,$new,$str);<br />
return $str;<br />
}</p>
<p>//采集网页<br />
function pick($url,$ft,$th)<br />
{<br />
$c=fetch_urlpage_contents($url);<br />
foreach($ft as $key =&gt; $value)<br />
{<br />
$rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c);<br />
if(is_array($th[$key]))<br />
{ foreach($th[$key] as $old =&gt; $new)<br />
{<br />
$rs[$key]=str_replace($old,$new,$rs[$key]);<br />
}<br />
}<br />
}<br />
return $rs;<br />
}</p>
<p>$url="http://www.zui88.com"; //要采集的地址<br />
$ft["a"]["begin"]='&lt;a'; //截取的开始点&lt;br /&gt;<br />
$ft["a"]["end"]='&gt;'; //截取的结束点</p>
<p>$rs=pick($url,$ft,$th); //开始采集</p>
<p>print_r($rs["a"]);</p>
<p>?&gt;</p>
]]></content:encoded>
			<wfw:commentRss>http://www.zui88.com/blog/view-366.html/feed</wfw:commentRss>
		<slash:comments>190</slash:comments>
		</item>
		<item>
		<title>php小偷程序，简单的php采集代码</title>
		<link>http://www.zui88.com/blog/view-364.html</link>
		<comments>http://www.zui88.com/blog/view-364.html#comments</comments>
		<pubDate>Wed, 30 May 2012 08:08:32 +0000</pubDate>
		<dc:creator>lin</dc:creator>
				<category><![CDATA[后端程序]]></category>
		<category><![CDATA[php]]></category>
		<category><![CDATA[采集]]></category>

		<guid isPermaLink="false">http://www.zui88.com/blog/?p=364</guid>
		<description><![CDATA[&#60;?php function fetch_urlpage_contents($url){ $c=file_get_contents($url); return $c; } //获取匹配内容 function fetch_match_contents($begin,$end,$c) { $begin=change_match_string($begin); $end=change_match_string($end); $p = "{$begin}(.*){$end}"; if(eregi($p,$c,$rs)) { return $rs[1];} else { return "";} }//转义正则表达式字符串 function change_match_string($str){ //注意，以下只是简单转义 //$old=array("/","$"); //$new=array("\/","\$"); $str=str_replace($old,$new,$str); return $str; } //采集网页 function pick($url,$ft,$th) { $c=fetch_urlpage_contents($url); foreach($ft as $key =&#62; $value) { $rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c); if(is_array($th[$key])) { foreach($th[$key] as $old =&#62; $new) { $rs[$key]=str_replace($old,$new,$rs[$key]); [...]]]></description>
			<content:encoded><![CDATA[<p>&lt;?php</p>
<p>function fetch_urlpage_contents($url){<br />
$c=file_get_contents($url);<br />
return $c;<br />
}<br />
//获取匹配内容<br />
function fetch_match_contents($begin,$end,$c)<br />
{<br />
$begin=change_match_string($begin);<br />
$end=change_match_string($end);<br />
$p = "{$begin}(.*){$end}";<br />
if(eregi($p,$c,$rs))<br />
{<br />
return $rs[1];}<br />
else { return "";}<br />
}//转义正则表达式字符串<br />
function change_match_string($str){<br />
//注意，以下只是简单转义<br />
//$old=array("/","$");<br />
//$new=array("\/","\$");<br />
$str=str_replace($old,$new,$str);<br />
return $str;<br />
}</p>
<p>//采集网页<br />
function pick($url,$ft,$th)<br />
{<br />
$c=fetch_urlpage_contents($url);<br />
foreach($ft as $key =&gt; $value)<br />
{<br />
$rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c);<br />
if(is_array($th[$key]))<br />
{ foreach($th[$key] as $old =&gt; $new)<br />
{<br />
$rs[$key]=str_replace($old,$new,$rs[$key]);<br />
}<br />
}<br />
}<br />
return $rs;<br />
}</p>
<p>$url="http://www.zui88.com"; //要采集的地址<br />
$ft["title"]["begin"]="&lt;title&gt;"; //截取的开始点<br />
$ft["title"]["end"]="&lt;/title&gt;"; //截取的结束点<br />
$th["title"]["中山"]="广东"; //截取部分的替换</p>
<p>$ft["body"]["begin"]="&lt;body&gt;"; //截取的开始点<br />
$ft["body"]["end"]="&lt;/body&gt;"; //截取的结束点<br />
$th["body"]["中山"]="广东"; //截取部分的替换</p>
<p>$rs=pick($url,$ft,$th); //开始采集</p>
<p>echo $rs["title"];<br />
echo $rs["body"]; //输出<br />
?&gt;</p>
]]></content:encoded>
			<wfw:commentRss>http://www.zui88.com/blog/view-364.html/feed</wfw:commentRss>
		<slash:comments>289</slash:comments>
		</item>
		<item>
		<title>dede采集的文章在栏目列表页无法显示</title>
		<link>http://www.zui88.com/blog/view-213.html</link>
		<comments>http://www.zui88.com/blog/view-213.html#comments</comments>
		<pubDate>Fri, 26 Aug 2011 15:20:43 +0000</pubDate>
		<dc:creator>lin</dc:creator>
				<category><![CDATA[后端程序]]></category>
		<category><![CDATA[dede]]></category>
		<category><![CDATA[采集]]></category>

		<guid isPermaLink="false">http://www.zui88.com/blog/?p=213</guid>
		<description><![CDATA[利用dedecms的采集功能，大批量采集数据之后，没办法一个一个审核，所有在sql命令行运行了 update `dede_archives` set `arcrank` = 0 批量审核文章，然后运行 update `dede_archives` set `ismake` = -1 把所有文章改成动态浏览 更新首页html之后再最新更新可以看到新发布的文章，但是在栏目列表页不显示最新的数据，原来dede还有一个文章的索引表dede_arctiny，只要运行下面sql把审核状态置为0就正常了 update `dede_arctiny` set `arcrank` = 0 效果如 最优资讯中山新闻频道]]></description>
			<content:encoded><![CDATA[<p>利用dedecms的采集功能，大批量采集数据之后，没办法一个一个审核，所有在sql命令行运行了</p>
<p>update `dede_archives` set `arcrank` = 0 </p>
<p>批量审核文章，然后运行</p>
<p>update `dede_archives` set `ismake` = -1</p>
<p>把所有文章改成动态浏览</p>
<p>更新首页html之后再最新更新可以看到新发布的文章，但是在栏目列表页不显示最新的数据，原来dede还有一个文章的索引表dede_arctiny，只要运行下面sql把审核状态置为0就正常了</p>
<p>update `dede_arctiny` set `arcrank` = 0</p>
<p>效果如 <a href="http://www.zui88.com/news/list-8.html"><span style="color: #ff4b33;">最优资讯中山新闻频道</span></a></p>
]]></content:encoded>
			<wfw:commentRss>http://www.zui88.com/blog/view-213.html/feed</wfw:commentRss>
		<slash:comments>412</slash:comments>
		</item>
	</channel>
</rss>
