`

[转] 贴Snoopy.class.php代码学习参考

 
阅读更多


Snoopy.class.php 是一个关于HTTP协议访问操作的类库,主要是使用在 MagpieRSS 中用于远程文件的抓取,我原来转载的一篇文章大致有简单的介绍这个东西,今天无聊,把代码帖出来,大家参考学习。

<?php

/*************************************************

Snoopy-thePHPnetclient
Author:MonteOhrt<monte@ispi.net>
Copyright(c):1999-2000ispi,allrightsreserved
Version:1.01

*Thislibraryisfreesoftware;youcanredistributeitand/or
*modifyitunderthetermsoftheGNULesserGeneralPublic
*LicenseaspublishedbytheFreeSoftwareFoundation;either
*version2.1oftheLicense,or(atyouroption)anylaterversion.
*
*Thislibraryisdistributedinthehopethatitwillbeuseful,
*butWITHOUTANYWARRANTY;withouteventheimpliedwarrantyof
*MERCHANTABILITYorFITNESSFORAPARTICULARPURPOSE.SeetheGNU
*LesserGeneralPublicLicenseformoredetails.
*
*YoushouldhavereceivedacopyoftheGNULesserGeneralPublic
*Licensealongwiththislibrary;ifnot,writetotheFreeSoftware
*Foundation,Inc.,59TemplePlace,Suite330,Boston,MA02111-1307USA

YoumaycontacttheauthorofSnoopybye-mailat:
monte@ispi.net

Or,writeto:
MonteOhrt
CTO,ispi
237S.70thsuite220
Lincoln,NE68510

ThelatestversionofSnoopycanbeobtainedfrom:
http://snoopy.sourceforge.net/

************************************************
*/

classSnoopy
{
/****Publicvariables****/

/*userdefinablevars*/

var$host="www.php.net";//hostnameweareconnectingto
var$port=80;//portweareconnectingto
var$proxy_host="";//proxyhosttouse
var$proxy_port="";//proxyporttouse
var$proxy_user="";//proxyusertouse
var$proxy_pass="";//proxypasswordtouse

var$agent="Snoopyv1.2.3";//agentwemasqueradeas
var$referer="";//refererinfotopass
var$cookies=array();//arrayofcookiestopass
//$cookies["username"]="joe";

var$rawheaders=array();//arrayofrawheaderstosend
//$rawheaders["Content-type"]="text/html";


var$maxredirs=5;//httpredirectiondepthmaximum.0=disallow
var$lastredirectaddr="";//containsaddressoflastredirectedaddress
var$offsiteok=true;//allowsredirectionoff-site
var$maxframes=0;//framecontentdepthmaximum.0=disallow
var$expandlinks=true;//expandlinkstofullyqualifiedURLs.
//thisonlyappliestofetchlinks()
//submitlinks(),andsubmittext()

var$passcookies=true;//passsetcookiesbackthroughredirects
//NOTE:thiscurrentlydoesnotrespect
//dates,domainsorpaths.


var$user="";//userforhttpauthentication
var$pass="";//passwordforhttpauthentication

//httpaccepttypes

var$accept="image/gif,image/x-xbitmap,image/jpeg,image/pjpeg,*/*";

var$results="";//wherethecontentisput

var$error="";//errormessagessenthere
var$response_code="";//responsecodereturnedfromserver
var$headers=array();//headersreturnedfromserversenthere
var$maxlength=500000;//maxreturndatalength(body)
var$read_timeout=0;//timeoutonreadoperations,inseconds
//supportedonlysincePHP4Beta4
//setto0todisallowtimeouts

var$timed_out=false;//ifareadoperationtimedout
var$status=0;//httprequeststatus

var$temp_dir="/tmp";//temporarydirectorythatthewebserver
//haspermissiontowriteto.
//underWindows,thisshouldbeC: emp


var$curl_path="/usr/local/bin/curl";
//SnoopywillusecURLforfetching
//SSLcontentifafullsystempathto
//thecURLbinaryissuppliedhere.
//settofalseifyoudonothave
//cURLinstalled.Seehttp://curl.haxx.se
//fordetailsoninstallingcURL.
//Snoopydoes*not*usethecURL
//libraryfunctionsbuiltintophp,
//asthesefunctionsarenotstable
//asofthisSnoopyrelease.


/****Privatevariables****/

var$_maxlinelen=4096;//maxlinelength(headers)

var$_httpmethod="GET";//defaulthttprequestmethod
var$_httpversion="HTTP/1.0";//defaulthttprequestversion
var$_submit_method="POST";//defaultsubmitmethod
var$_submit_type="application/x-www-form-urlencoded";//defaultsubmittype
var$_mime_boundary="";//MIMEboundaryformultipart/form-datasubmittype
var$_redirectaddr=false;//willbesetifpagefetchedisaredirect
var$_redirectdepth=0;//incrementsonanhttpredirect
var$_frameurls=array();//framesrcurls
var$_framedepth=0;//incrementsonframedepth

var$_isproxy=false;//setifusingaproxyserver
var$_fp_timeout=30;//timeoutforsocketconnection

/*======================================================================*
Function:fetch
Purpose:fetchthecontentsofawebpage
(andpossiblyotherprotocolsinthe
futurelikeftp,nntp,gopher,etc.)
Input:$URIthelocationofthepagetofetch
Output:$this->resultstheoutputtextfromthefetch
*======================================================================
*/

functionfetch($URI)
{

//preg_match("|^([^:]+)://([^:/]+)(:[d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS=parse_url($URI);
if(!empty($URI_PARTS["user"]))
$this->user=$URI_PARTS["user"];
if(!empty($URI_PARTS["pass"]))
$this->pass=$URI_PARTS["pass"];
if(empty($URI_PARTS["query"]))
$URI_PARTS["query"]='';
if(empty($URI_PARTS["path"]))
$URI_PARTS["path"]='';

switch(strtolower($URI_PARTS["scheme"]))
{
case"http":
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httprequest($path,$fp,$URI,$this->_httpmethod);
}

$this->_disconnect($fp);

if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}

if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();

while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
}
else
{
returnfalse;
}
returntrue;
break;
case"https":
if(!$this->curl_path)
returnfalse;
if(function_exists("is_executable"))
if(!is_executable($this->curl_path))
returnfalse;
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httpsrequest($URI,$URI,$this->_httpmethod);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httpsrequest($path,$URI,$this->_httpmethod);
}

if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}

if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();

while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
returntrue;
break;
default:
//notavalidprotocol
$this->error='Invalidprotocol"'.$URI_PARTS["scheme"].'" ';
returnfalse;
break;
}
returntrue;
}

/*======================================================================*
Function:submit
Purpose:submitanhttpform
Input:$URIthelocationtopostthedata
$formvarstheformvarstouse.
format:$formvars["var"]="val";
$formfilesanarrayoffilestosubmit
format:$formfiles["var"]="/dir/filename.ext";
Output:$this->resultsthetextoutputfromthepost
*======================================================================
*/

functionsubmit($URI,$formvars="",$formfiles="")
{
unset($postdata);

$postdata=$this->_prepare_post_body($formvars,$formfiles);

$URI_PARTS=parse_url($URI);
if(!empty($URI_PARTS["user"]))
$this->user=$URI_PARTS["user"];
if(!empty($URI_PARTS["pass"]))
$this->pass=$URI_PARTS["pass"];
if(empty($URI_PARTS["query"]))
$URI_PARTS["query"]='';
if(empty($URI_PARTS["path"]))
$URI_PARTS["path"]='';

switch(strtolower($URI_PARTS["scheme"]))
{
case"http":
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httprequest($path,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
}

$this->_disconnect($fp);

if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
if(!preg_match("|^".$URI_PARTS["scheme"]."://|",$this->_redirectaddr))
$this->_redirectaddr=$this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);

//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
if(strpos($this->_redirectaddr,"?")>0)
$this->fetch($this->_redirectaddr);//theredirecthaschangedtherequestmethodfromposttoget
else
$this->submit($this->_redirectaddr,$formvars,$formfiles);
}
}
}

if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();

while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}

}
else
{
returnfalse;
}
returntrue;
break;
case"https":
if(!$this->curl_path)
returnfalse;
if(function_exists("is_executable"))
if(!is_executable($this->curl_path))
returnfalse;
$this->host=$URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port=$URI_PARTS["port"];
if($this->_isproxy)
{
//usingproxy,sendentireURI
$this->_httpsrequest($URI,$URI,$this->_submit_method,$this->_submit_type,$postdata);
}
else
{
$path=$URI_PARTS["path"].($URI_PARTS["query"]?"?".$URI_PARTS["query"]:"");
//noproxy,sendonlythepath
$this->_httpsrequest($path,$URI,$this->_submit_method,$this->_submit_type,$postdata);
}

if($this->_redirectaddr)
{
/*urlwasredirected,checkifwe'vehitthemaxdepth*/
if($this->maxredirs>$this->_redirectdepth)
{
if(!preg_match("|^".$URI_PARTS["scheme"]."://|",$this->_redirectaddr))
$this->_redirectaddr=$this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);

//onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr)||$this->offsiteok)
{
/*followtheredirect*/
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
if(strpos($this->_redirectaddr,"?")>0)
$this->fetch($this->_redirectaddr);//theredirecthaschangedtherequestmethodfromposttoget
else
$this->submit($this->_redirectaddr,$formvars,$formfiles);
}
}
}

if($this->_framedepth<$this->maxframes&&count($this->_frameurls)>0)
{
$frameurls=$this->_frameurls;
$this->_frameurls=array();

while(list(,$frameurl)=each($frameurls))
{
if($this->_framedepth<$this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
returntrue;
break;

default:
//notavalidprotocol
$this->error='Invalidprotocol"'.$URI_PARTS["scheme"].'" ';
returnfalse;
break;
}
returntrue;
}

/*======================================================================*
Function:fetchlinks
Purpose:fetchthelinksfromawebpage
Input:$URIwhereyouarefetchingfrom
Output:$this->resultsanarrayoftheURLs
*======================================================================
*/

functionfetchlinks($URI)
{
if($this->fetch($URI))
{
if($this->lastredirectaddr)
$URI=$this->lastredirectaddr;
if(is_array($this->results))
{
for($x=0;$x<count($this->results);$x++)
$this->results[$x]=$this->_striplinks($this->results[$x]);
}
else
$this->results=$this->_striplinks($this->results);

if($this->expandlinks)
$this->results=$this->_expandlinks($this->results,$URI);
returntrue;
}
else
returnfalse;
}

/*======================================================================*
Function:fetchform
Purpose:fetchtheformelementsfromawebpage
Input:$URIwhereyouarefetchingfrom
Output:$this->resultstheresultinghtmlform
*======================================================================
*/

functionfetchform($URI)
{

if($this->fetch($URI))
{

if(is_array($this->results))
{
for($x=0;$x<count($this->results);$x++)
$this->results[$x]=$this->_stripform($this->results[$x]);
}
else
$this->results=$this->_stripform($this->results);

returntrue;
}
else
returnfalse;
}


/*======================================================================*
Function:fetchtext
Purpose:fetchthetextfromawebpage,strippingthelinks
Input:$URIwhereyouarefetchingfrom
Output:$this->resultsthetextfromthewebpage
*======================================================================
*/

functionfetchtext($URI)
{
if($this->fetch($URI))
{
if(is_array($this->results))
{
for($x=0;$x<count($this->results);$x++)
$this->results[$x]=$this->_striptext($this->results[$x]);
}
else
$this->results=$this->_striptext($this->results);
returntrue;
}
else
returnfalse;
}

/*======================================================================*
Function:submitlinks
Purpose:grablinksfromaformsubmission
Input:$URIwhereyouaresubmittingfrom
Output:$this->resultsanarrayofthelinksfromthepost
*======================================================================
*/

functionsubmitlinks($URI,$formvars="",$formfiles="")
{
if($this->submit($URI,$formvars,$formfiles))
{
if($this->lastredirectaddr)
$URI=$this->lastredirectaddr;
if(is_array($this->results))
{
for($x=0;$x<count($this->results);$x++)
{
$this->results[$x]=$this->_striplinks($this->results[$x]);
if($this->expandlinks)
$this->results[$x]=$this->_expandlinks($this->results[$x],$URI);
}
}
else
{
$this->results=$this->_striplinks($this->results);
if($this->expandlinks)
$this->results=$this->_expandlinks($this->results,$URI);
}
returntrue;
}
else
returnfalse;
}

/*======================================================================*
Function:submittext
Purpose:grabtextfromaformsubmission
Input:$URIwhereyouaresubmittingfrom
Output:$this->resultsthetextfromthewebpage
*======================================================================
*/

functionsubmittext($URI,$formvars="",$formfiles="")
{
if($this->submit($URI,$formvars,$formfiles))
{
if($this->lastredirectaddr)
$URI=$this->lastredirectaddr;
if(is_array($this->results))
{
for($x=0;$x<count($this->results);$x++)
{
$this->results[$x]=$this->_striptext($this->results[$x]);
if($this->expandlinks)
$this->results[$x]=$this->_expandlinks($this->results[$x],$URI);
}
}
else
{
$this->results=$this->_striptext($this->results);
if($this->expandlinks)
$this->results=$this->_expandlinks($this->results,$URI);
}
returntrue;
}
else
returnfalse;
}



/*======================================================================*
Function:set_submit_multipart
Purpose:Settheformsubmissioncontenttypeto
multipart/form-data
*======================================================================
*/
functionset_submit_multipart()
{
$this->_submit_type="multipart/form-data";
}


/*======================================================================*
Function:set_submit_normal
Purpose:Settheformsubmissioncontenttypeto
application/x-www-form-urlencoded
*======================================================================
*/
functionset_submit_normal()
{
$this->_submit_type="application/x-www-form-urlencoded";
}




/*======================================================================*
Privatefunctions
*======================================================================
*/


/*======================================================================*
Function:_striplinks
Purpose:stripthehyperlinksfromanhtmldocument
Input:$documentdocumenttostrip.
Output:$matchanarrayofthelinks
*======================================================================
*/

function_striplinks($document)
{
preg_match_all("'<s*as.*?hrefs*=s*#find<ahref=
(["'])?#findsingleordoublequote
(?(1)(.*?)/1|([^s>]+))#ifquotefound,matchuptonextmatching
#quote,otherwisematchuptonextspace
'isx
",$document,$links);


//catenatethenon-emptymatchesfromtheconditionalsubpattern

while(list($key,$val)=each($links[2]))
{
if(!empty($val))
$match[]=$val;
}

while(list($key,$val)=each($links[3]))
{
if(!empty($val))
$match[]=$val;
}

//returnthelinks
return$match;
}

/*======================================================================*
Function:_stripform
Purpose:striptheformelementsfromanhtmldocument
Input:$documentdocumenttostrip.
Output:$matchanarrayofthelinks
*======================================================================
*/

function_stripform($document)
{
preg_match_all("'</?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=</?(option|select)[^<>]*>[ ]*)|(?=[ ]*))|(?=[ ]*))'Usi",$document,$elements);

//catenatethematches
$match=implode(" ",$elements[0]);

//returnthelinks
return$match;
}



/*======================================================================*
Function:_striptext
Purpose:stripthetextfromanhtmldocument
Input:$documentdocumenttostrip.
Output:$texttheresultingtext
*======================================================================
*/

function_striptext($document)
{

//Ididn'tusepregeval(//e)sincethatisonlyavailableinPHP4.0.
//so,listyourentitiesonebyonehere.Iincludedsomeofthe
//morecommonones.


$search=array("'<script[^>]*?>.*?</script>'si",//stripoutjavascript
"'<[/!]*?[^<>]*?>'si",//stripouthtmltags
"'([ ])[s]+'",//stripoutwhitespace
"'&(quot|#34|#034|#x22);'i",//replacehtmlentities
"'&(amp|#38|#038|#x26);'i",//addedhexadecimalvalues
"'&(lt|#60|#060|#x3c);'i",
"'&(gt|#62|#062|#x3e);'i",
"'&(nbsp|#160|#xa0);'i",
"'&(iexcl|#161);'i",
"'&(cent|#162);'i",
"'&(pound|#163);'i",
"'&(copy|#169);'i",
"'&(reg|#174);'i",
"'&(deg|#176);'i",
"'&(#39|#039|#x27);'",
"'&(euro|#8364);'i",//europe
"'&a(uml|UML);'",//german
"'&o(uml|UML);'",
"'&u(uml|UML);'",
"'&A(uml|UML);'",
"'&O(uml|UML);'",
"'&U(uml|UML);'",
"'&szlig;'i",
);
$replace=array("",
"",
"/1",
""",
"&",
"<",
">",
"",
chr(161),
chr(162),
chr(163),
chr(169),
chr(174),
chr(176),
chr(39),
chr(128),
"?,
"?,
"?,
"?,
"?,
"?,
"?,
);

$text=preg_replace($search,$replace,$document);

return$text;
}

/*======================================================================*
Function:_expandlinks
Purpose:expandeachlinkintoafullyqualifiedURL
Input:$linksthelinkstoqualify
$URIthefullURItogetthebasefrom
Output:$expandedLinkstheexpandedlinks
*======================================================================*/

function_expandlinks($links,$URI)
{

preg_match(
"/^[^?]+/",$URI,$match);

$match=preg_replace(
"|/[^/.]+.[^/.]+$|","",$match[0]);
$match=preg_replace(
"|/$|","",$match);
$match_part=parse_url($match);
$match_root=
$match_part[
"scheme"]."://".$match_part["host"];

$search=array("|^http://".preg_quote($this->host)."|i",
"|^(/)|i",
"|^(?!http://)(?!mailto:)|i",
"|/./|",
"|/[^/]+/../|"
);

$replace=array("",
$match_root."/",
$match."/",
"/",
"/"
);

$expandedLinks=preg_replace($search,$replace,$links);

return$expandedLinks;
}

/*======================================================================*
Function:_httprequest
Purpose:gogetthehttpdatafromtheserver
Input:$urltheurltofetch
$fpthecurrentopenfilepointer
$URIthefullURI
$bodybodycontentstosendifany(POST)
Output:
*======================================================================
*/

function_httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
{
$cookie_headers='';
if($this->passcookies&&$this->_redirectaddr)
$this->setcookies();

$URI_PARTS=parse_url($URI);
if(empty($url))
$url="/";
$headers=$http_method."".$url."".$this->_httpversion." ";
if(!empty($this->agent))
$headers.="User-Agent:".$this->agent." ";
if(!empty($this->host)&&!isset($this->rawheaders['Host'])){
$headers.="Host:".$this->host;
if(!empty($this->port))
$headers.=":".$this->port;
$headers.=" ";
}
if(!empty($this->accept))
$headers.="Accept:".$this->accept." ";
if(!empty($this->referer))
$headers.="Referer:".$this->referer." ";
if(!empty($this->cookies))
{
if(!is_array($this->cookies))
$this->cookies=(array)$this->cookies;

reset($this->cookies);
if(count($this->cookies)>0){
$cookie_headers.='Cookie:';
foreach($this->cookiesas$cookieKey=>$cookieVal){
$cookie_headers.=$cookieKey."=".urlencode($cookieVal).";";
}
$headers.=substr($cookie_headers,0,-2)." ";
}
}
if(!empty($this->rawheaders))
{
if(!is_array($this->rawheaders))
$this->rawheaders=(array)$this->rawheaders;
while(list($headerKey,$headerVal)=each($this->rawheaders))
$headers.=$headerKey.":".$headerVal." ";
}
if(!empty($content_type)){
$headers.="Content-type:$content_type";
if($content_type=="multipart/form-data")
$headers.=";boundary=".$this->_mime_boundary;
$headers.=" ";
}
if(!empty($body))
$headers.="Content-length:".strlen($body)." ";
if(!empty($this->user)||!empty($this->pass))
$headers.="Authorization:Basic".base64_encode($this->user.":".$this->pass)." ";

//addproxyauthheaders
if(!empty($this->proxy_user))
$headers.='Proxy-Authorization:'.'Basic'.base64_encode($this->proxy_user.':'.$this->proxy_pass)." ";


$headers.=" ";

//setthereadtimeoutifneeded
if($this->read_timeout>0)
socket_set_timeout($fp,$this->read_timeout);
$this->timed_out=false;

fwrite($fp,$headers.$body,strlen($headers.$body));

$this->_redirectaddr=false;
unset($this->headers);

while($currentHeader=fgets($fp,$this->_maxlinelen))
{
if($this->read_timeout>0&&$this->_check_timeout($fp))
{
$this->status=-100;
returnfalse;
}

if($currentHeader==" ")
break;

//ifaheaderbeginswithLocation:orURI:,settheredirect
if(preg_match("/^(Location:|URI:)/i",$currentHeader))
{
//getURLportionoftheredirect
preg_match("/^(Location:|URI:)[]+(.*)/i",chop($currentHeader),$matches);
//lookfor://intheLocationheadertoseeifhostnameisincluded
if(!preg_match("|://|",$matches[2]))
{
//nohostinthepath,soprepend
$this->_redirectaddr=$URI_PARTS["scheme"]."://".$this->host.":".$this->port;
//eliminatedoubleslash
if(!preg_match("|^/|",$matches[2]))
$this->_redirectaddr.="/".$matches[2];
else
$this->_redirectaddr.=$matches[2];
}
else
$this->_redirectaddr=$matches[2];
}

if(preg_match("|^HTTP/|",$currentHeader))
{
if(preg_match("|^HTTP/[^s]*s(.*?)s|",$currentHeader,$status))
{
$this->status=$status[1];
}
$this->response_code=$currentHeader;
}

$this->headers[]=$currentHeader;
}

$results='';
do{
$_data=fread($fp,$this->maxlength);
if(strlen($_data)==0){
break;
}
$results.=$_data;
}
while(true);

if($this->read_timeout>0&&$this->_check_timeout($fp))
{
$this->status=-100;
returnfalse;
}

//checkifthereisaaredirectmetatag

if(preg_match("'<meta[s]*http-equiv[^>]*?content[s]*=[s]*["']?d+;[s]*URL[s]*=[s]*([^"']*?)["']?>'i",$results,$match))

{
$this->_redirectaddr=$this->_expandlinks($match[1],$URI);
}

//havewehitourframedepthandisthereframesrctofetch?
if(($this->_framedepth<$this->maxframes)&&preg_match_all("'<frames+.*src[s]*=['"]?([^'">]+)'i",$results,$match))
{
$this->results[]=$results;
for($x=0;$x<count($match[1]);$x++)
$this->_frameurls[]=$this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
}
//havewealreadyfetchedframedcontent?
elseif(is_array($this->results))
$this->results[]=$results;
//noframedcontent
else
$this->results=$results;

returntrue;
}

/*======================================================================*
Function:_httpsrequest
Purpose:gogetthehttpsdatafromtheserverusingcurl
Input:$urltheurltofetch
$URIthefullURI
$bodybodycontentstosendifany(POST)
Output:
*======================================================================
*/

function_httpsrequest($url,$URI,$http_method,$content_type="",$body="")
{
if($this->passcookies&&$this->_redirectaddr)
$this->setcookies();

$headers=array();

$URI_PARTS=parse_url($URI);
if(empty($url))
$url="/";
//GET...headernotneededforcurl
//$headers[]=$http_method."".$url."".$this->_httpversion;

if(!empty($this->agent))
$headers[]="User-Agent:".$this->agent;
if(!empty($this->host))
if(!empty($this->port))
$headers[]="Host:".$this->host.":".$this->port;
else
$headers[]="Host:".$this->host;
if(!empty($this->accept))
$headers[]="Accept:".$this->accept;
if(!empty($this->referer))
$headers[]="Referer:".$this->referer;
if(!empty($this->cookies))
{
if(!is_array($this->cookies))
$this->cookies=(array)$this->cookies;

reset($this->cookies);
if(count($this->cookies)>0){
$cookie_str='Cookie:';
foreach($this->cookiesas$cookieKey=>$cookieVal){
$cookie_str.=$cookieKey."=".urlencode($cookieVal).";";
}
$headers[]=substr($cookie_str,0,-2);
}
}
if(!empty($this->rawheaders))
{
if(!is_array($this->rawheaders))
$this->rawheaders=(array)$this->rawheaders;
while(list($headerKey,$headerVal)=each($this->rawheaders))
$headers[]=$headerKey.":".$headerVal;
}
if(!empty($content_type)){
if($content_type=="multipart/form-data")
$headers[]="Content-type:$content_type;boundary=".$this->_mime_boundary;
else
$headers[]="Content-type:$content_type";
}
if(!empty($body))
$headers[]="Content-length:".strlen($body);
if(!empty($this->user)||!empty($this->pass))
$headers[]="Authorization:BASIC".base64_encode($this->user.":".$this->pass);

for($curr_header=0;$curr_header<count($headers);$curr_header++){
$safer_header=strtr($headers[$curr_header],""","");
$cmdline_params.="-H"".$safer_header.""";
}

if(!empty($body))
$cmdline_params.="-d"$body"";

if($this->read_timeout>0)
$cmdline_params.="-m".$this->read_timeout;

$headerfile=tempnam($temp_dir,"sno");

$safer_URI=strtr($URI,""","");//stripquotesfromtheURItoavoidshellaccess
exec($this->curl_path."-D"$headerfile"".$cmdline_params.""".$safer_URI.""",$results,$return);

if($return)
{
$this->error="Error:cURLcouldnotretrievethedocument,error$return.";
returnfalse;
}


$results=implode(" ",$results);

$result_headers=file("$headerfile");

$this->_redirectaddr=false;
unset($this->headers);

for($currentHeader=0;$currentHeader<count($result_headers);$currentHeader++)
{

//ifaheaderbeginswithLocation:orURI:,settheredirect
if(preg_match("/^(Location:|URI:)/i",$result_headers[$currentHeader]))
{
//getURLportionoftheredirect
preg_match("/^(Location:|URI:)s+(.*)/",chop($result_headers[$currentHeader]),$matches);
//lookfor://intheLocationheadertoseeifhostnameisincluded
if(!preg_match("|://|",$matches[2]))
{
//nohostinthepath,soprepend
$this->_redirectaddr=$URI_PARTS["scheme"]."://".$this->host.":".$this->port;
//eliminatedoubleslash
if(!preg_match("|^/|",$matches[2]))
$this->_redirectaddr.="/".$matches[2];
else
$this->_redirectaddr.=$matches[2];
}
else
$this->_redirectaddr=$matches[2];
}

if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
$this->response_code=$result_headers[$currentHeader];

$this->headers[]=$result_headers[$currentHeader];
}

//checkifthereisaaredirectmetatag

if(preg_match("'<meta[s]*http-equiv[^>]*?content[s]*=[s]*["']?d+;[s]*URL[s]*=[s]*([^"']*?)["']?>'i",$results,$match))
{
$this->_redirectaddr=$this->_expandlinks($match[1],$URI);
}

//havewehitourframedepthandisthereframesrctofetch?
if(($this->_framedepth<$this->maxframes)&&preg_match_all("'<frames+.*src[s]*=['"]?([^'">]+)'i",$results,$match))
{
$this->results[]=$results;
for($x=0;$x<count($match[1]);$x++)
$this->_frameurls[]=$this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
}
//havewealreadyfetchedframedcontent?
elseif(is_array($this->results))
$this->results[]=$results;
//noframedcontent
else
$this->results=$results;

unlink("$headerfile");

returntrue;
}

/*======================================================================*
Function:setcookies()
Purpose:setcookiesforaredirection
*======================================================================
*/

functionsetcookies()
{
for($x=0;$x<count($this->headers);$x++)
{
if(preg_match('/^set-cookie:[s]+([^=]+)=([^;]+)/i',$this->headers[$x],$match))
$this->cookies[$match[1]]=urldecode($match[2]);
}
}


/*======================================================================*
Function:_check_timeout
Purpose:checkswhethertimeouthasoccurred
Input:$fpfilepointer
*======================================================================
*/

function_check_timeout($fp)
{
if($this->read_timeout>0){
$fp_status=socket_get_status($fp);
if($fp_status["timed_out"]){
$this->timed_out=true;
returntrue;
}
}
returnfalse;
}

/*======================================================================*
Function:_connect
Purpose:makeasocketconnection
Input:$fpfilepointer
*======================================================================
*/

function_connect(&$fp)
{
if(!empty($this->proxy_host)&&!empty($this->proxy_port))
{
$this->_isproxy=true;

$host=$this->proxy_host;
$port=$this->proxy_port;
}
else
{
$host=$this->host;
$port=$this->port;
}

$this->status=0;

if($fp=fsockopen(
$host,
$port,
$errno,
$errstr,
$this->_fp_timeout
))
{
//socketconnectionsucceeded

returntrue;
}
else
{
//socketconnectionfailed
$this->status=$errno;
switch($errno)
{
case-3:
$this->error="socketcreationfailed(-3)";
case-4:
$this->error="dnslookupfailure(-4)";
case-5:
$this->error="connectionrefusedortimedout(-5)";
default:
$this->error="connectionfailed(".$errno.")";
}
returnfalse;
}
}
/*======================================================================*
Function:_disconnect
Purpose:disconnectasocketconnection
Input:$fpfilepointer
*======================================================================
*/

function_disconnect($fp)
{
return(fclose($fp));
}


/*======================================================================*
Function:_prepare_post_body
Purpose:Preparepostbodyaccordingtoencodingtype
Input:$formvars-formvariables
$formfiles-formuploadfiles
Output:postbody
*======================================================================
*/

function_prepare_post_body($formvars,$formfiles)
{
settype($formvars,"array");
settype($formfiles,"array");
$postdata='';

if(count($formvars)==0&&count($formfiles)==0)
return;

switch($this->_submit_type){
case"application/x-www-form-urlencoded":
reset($formvars);
while(list($key,$val)=each($formvars)){
if(is_array($val)||is_object($val)){
while(list($cur_key,$cur_val)=each($val)){
$postdata.=urlencode($key)."[]=".urlencode($cur_val)."&";
}
}
else
$postdata.=urlencode($key)."=".urlencode($val)."&";
}
break;

case"multipart/form-data":
$this->_mime_boundary="Snoopy".md5(uniqid(microtime()));

reset($formvars);
while(list($key,$val)=each($formvars)){
if(is_array($val)||is_object($val)){
while(list($cur_key,$cur_val)=each($val)){
$postdata.="--".$this->_mime_boundary." ";
$postdata.="Content-Disposition:form-data;name="$key[]" ";
$postdata.="$cur_val ";
}
}
else{
$postdata.="--".$this->_mime_boundary." ";
$postdata.="Content-Disposition:form-data;name="$key" ";
$postdata.="$val ";
}
}

reset($formfiles);
while(list($field_name,$file_names)=each($formfiles)){
settype($file_names,"array");
while(list(,$file_name)=each($file_names)){
if(!is_readable($file_name))continue;

$fp=fopen($file_name,"r");
$file_content=fread($fp,filesize($file_name));
fclose($fp);
$base_name=basename($file_name);

$postdata.="--".$this->_mime_boundary." ";
$postdata.="Content-Disposition:form-data;name="$field_name";filename="$base_name" ";
$postdata.="$file_content ";
}
}
$postdata.="--".$this->_mime_boundary."-- ";
break;
}

return$postdata;
}
}

?>

分享到:
评论

相关推荐

    Snoopy.class.php类及中文说明

    Snoopy.class.php类及中文说明

    2014最新Snoopy.class.php

    Snoopy 是一个非常强大的PHP类,下面是详细介绍Snoopy.class.php源文件的内容。希望对大家有所帮助 Snoopy是一个php类,用来模拟浏览器的功能,可以获取网页内容,发送表单。 Snoopy的一些特点: 1抓取网页的内容 ...

    Snoopy.class.rar_Snoopy.class.php

    php网络源码,提供php网络服务,很方便使用

    Snoopy.class.php

    $snoopyx = new Snoopy ; $snoopyx-&gt;fetch($furl) ; if($snoopyx-&gt;results !="") { $handle = fopen($filename, 'w') ; fwrite($handle, $snoopyx-&gt;results) ; //把抓取得内容写到 临时文件中 fclose($...

    Snoopy.class.php——登陆采集利器

    Snoopy是一个php类,用来模拟浏览器的功能,可以获取网页内容,发送表单。使用Snoopy来模拟登陆,然后采集登陆后的页面非常管用! Snoopy的特点: 1、抓取网页的内容 fetch 2、抓取网页的文本内容 (去除HTML标签) ...

    94.纯色史努比Snoopy.docx

    94.纯色史努比Snoopy.docx

    93.彩色史努比Snoopy.docx

    93.彩色史努比Snoopy.docx

    Snoopy-1.2.4

    Snoopy是一个php类,用来模拟浏览器的功能,可以获取网页内容,发送表单。Snoopy正确运行需要你的服务器的PHP版本在4以上,并且支持PCRE(Perl Compatible Regular Expressions),基本的LAMP服务都支持。

    基于Snoopy的PHP近似完美获取网站编码的代码

    先要到网上下载Snoopy.class.php 调用方法: 复制代码 代码如下: &lt;?php require ‘lib/Snoopy.class.php’; require ‘lib/WebCrawl.class.php’;//包含下面代码 $go=new WebCrawl(‘http://www.baidu.com’); ...

    php中Snoopy类用法实例

    本文实例讲述了php中...You need the snoopy.class.php from http://snoopy.sourceforge.net/ */ include("snoopy.class.php"); $snoopy = new Snoopy; // need an proxy?: //$snoopy-&gt;proxy_host = "my.proxy.host

    snoopy.rar_The Client

    Snoopy - the PHP net client php 远程调用方法 使用curl实现

    网页采集类Snoopy类及中文说明.rar

    Snoopy.class.php,实现网页下载, 网页采集类,Snoopy的一些功能特点: 抓取网页的内容 fetch() 抓取网页的文本内容 (去除HTML标签) fetchtext() 抓取网页的链接,表单 fetchlinks() fetchform() 支持代理...

    php 下载远程图片

    php下载图片 php下载远程图片 Snoopy.class.php 类

    HA-WPE.Pro0.9F-Snoopy.zip

    强大的网络封包编辑器,wpe可以截取网络上的信息, 修改封包数据,是外挂制作的常用工具 。

    Snoopy:一个用来模拟浏览器的一些简单功能的php类库.zip

    Snoopy是一个用来模拟浏览器的一些简单功能的php类,可以获取网页内容,发送表单等操作,Snoopy正确运行需要你的服务器的PHP版本在4以上,并且支持PCRE(Perl Compatible ...需要的朋友可以参考下,方便大家学习php的代码。

    PHP正则+Snoopy抓取框架实现的抓取淘宝店信誉功能实例

    include Snoopy.class.php; $snoopy = new Snoopy; $snoopy-&gt;fetch(http://rate.taobao.com/user-rate-f01d9cb1245a22fcea470c11665de90e.htm?spm=0.0.0.37.TOtKhZ); $html=$snoopy-&gt;results; //echo $htm

    PHP采集类Snoopy抓取图片实例

    用了两天php的Snoopy这个类,发现很好用。获取请求网页里面的所有链接,直接使用fetchlinks就可以,获取所有文本信息使用...include ‘snoopy/Snoopy.class.php’;   $snoopy = new Snoopy();   $sourceURL =

    Snoopy:一个用来模拟浏览器的一些简单功能的php类库

    Snoopy是一个用来模拟浏览器的一些简单功能的php类,可以获取网页内容,发送表单等操作,Snoopy正确运行需要你的服务器的PHP版本在4以上,并且支持PCRE(Perl Compatible Regular Expressions),基本的LAMP服务都支持,由于...

Global site tag (gtag.js) - Google Analytics