167 lines
5.2 KiB
PHP
Raw Normal View History

<?
//include_once('/var/www/default/_includes/timer.php');
/** Parse une page Html et retourne son contenu dans un tableau :
** "code" => Code r<EFBFBD>ponse Serveur
** "header" => Headers du serveur
** "body" => Page HTML
**/
function parse_response($this_response) {
// Split response into header and body sections
list($response_headers, $response_body) = explode("\r\n\r\n", $this_response, 2);
$response_header_lines = explode("\r\n", $response_headers);
// First line of headers is the HTTP response code
$http_response_line = array_shift($response_header_lines);
if(preg_match('@^HTTP/[0-9]\.[0-9] ([0-9]{3})@',$http_response_line, $matches)) { $response_code = $matches[1]; }
// put the rest of the headers in an array
$response_header_array = array();
$nbRMID=0;
foreach($response_header_lines as $header_line)
{
list($header,$value) = explode(': ', $header_line, 2);
if ($header=='Set-cookie' && substr($value,0,5)=='RMID=' && $nbRMID<5)//{
$nbRMID++;
// echo ("Je gicle le RMID n<>$nbRMID\r\n");}
else
$response_header_array[$header] .= $value."\n";
}
return array('code' => $response_code, 'header' => $response_header_array, 'body' => $response_body);
}
/** R<EFBFBD>cup<EFBFBD>re une page HTML en fonction des param<EFBFBD>tres :
** $url Url distante de la page <EFBFBD> r<EFBFBD>cup<EFBFBD>rer
** $strCookies Chaine de caract<EFBFBD>re contenant les cookies
** $postData Tableau des donn<EFBFBD>es <EFBFBD> passer en POST uniquement
** $referer Referer <EFBFBD> indiquer lors de l'appel de la page
** $debug Activer le d<EFBFBD>bogage (True/False)
**
** ... et retourne son contenu dans un tableau :
** "code" => Code r<EFBFBD>ponse Serveur
** "header" => Headers du serveur
** "body" => Page HTML
**/
function getUrl($url, $strCookies='', $postData='', $referer='', $debug=false, $host='', $proxy='', $timeout=0, $acceptRedir=0) {
$ch = curl_init();
if ($host=='') {
$tabTmp = parse_url($url);
$hostUrl = $tabTmp['host'];
$this_header = array('Host: '. $hostUrl);
} else
$this_header = array('Host: '. $host);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
if ($proxy<>'') curl_setopt($ch, CURLOPT_PROXY, $proxy);
//curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'username:password'); // Pas n<>cessaire en authentification NT
if (((int)$timeout)<>0) curl_setopt($ch, CURLOPT_TIMEOUT, (int)$timeout);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
if ($acceptRedir>0) {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_MAXREDIRS, $acceptRedir);
}
$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt($ch, CURLOPT_REFERER, $referer);
// Add each cookie that has been returned in the response
// If cookies need to be added/deleted or value changed, then add code here
if ($strCookies!='') {
//die('"'.$strCookies.'"');
//echo $strCookies."\r\n";
$cookies = explode("\n", $strCookies);
// Create the basic header
foreach($cookies as $this_cookie) {
if (trim($this_cookie)<>'')
array_push($this_header, 'Cookie: '.$this_cookie);
}
}
if ($postData!='') {
if (is_array($postData))
$post_data=$postData;
$o="";
foreach ($post_data as $k=>$v)
{
$o.= "$k=".utf8_encode($v)."&";
}
$post_data=substr($o,0,-1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
//if in_array('',$this_header
/*array_push($this_header, "Content-type: application/x-www-form-urlencoded");
array_push($this_header, "Content-Length: 44");*/
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $this_header);
//print_r($this_header);
$page=curl_exec($ch);
$response = parse_response($page);
if ($debug){
$url2=str_replace('http://', '', $url);
$url2=str_replace('/', '_', $url2);
$url2=str_replace('?', '(param)', $url2);
$url2=str_replace('&', '(et)', $url2);
$fp=fopen('insee/'. date('Ymd-His') .'-'. microtime_float(true) .'-'. $url2 . '.html', 'a');
fwrite($fp, $url."\r\n");
fwrite($fp, $page);
fclose($fp);
//echo strip_tags(html_entity_decode($response['body']), '<td>');
}
//print_r(curl_getinfo($ch));
curl_close($ch);
return $response;
}
/** Recherche un texte dans une page HTML
**
**/
function getTextInHtml($pageHtml, $strToFind, $strDeb, $strEnd, $include_strDeb=false, $include_strEnd=false, $ltrim=true, $rtrim=true, &$fin, $nbOcc=1) {
$tabRet=array();
$deb=$nbOccTrouve=0;
while( is_int(($deb=strpos($pageHtml,$strToFind,$fin))) ) {
$deb++;
$deb2 = strpos($pageHtml,$strDeb, $deb);
$fin = strpos($pageHtml,$strEnd, $deb2);
if (!$include_strDeb)
$deb2+=strlen($strDeb);
$s_temp = substr($pageHtml, $deb2, ($fin-$deb2));
if ($ltrim) $s_temp=ltrim($s_temp);
if ($rtrim) $s_temp=rtrim($s_temp);
if ($nbOcc==1) return $s_temp;
//echo $s_temp."\r\n";
//$a_temp = explode('" class="basic">', $s_temp);
$tabUrl[$nbOccTrouve]=$s_temp;
$nbOccTrouve++;
if ($nbOcc==$nbOccTrouve) {
// echo "j'ai trouv<75> le nb demand<6E>, je sort\r\n";
break;
};
}
return $tabUrl;
/*<span class="mongrasvert">
<li>Le type de voie a <EFBFBD>t<EFBFBD> modifi<EFBFBD><br>
<li>L'orthographe du mot directeur a <EFBFBD>t<EFBFBD> modifi<EFBFBD>e<br>
<li>Le code postal a <EFBFBD>t<EFBFBD> forc<EFBFBD> <EFBFBD> partir du d<EFBFBD>partement et de la localit<EFBFBD><br> </span>
*/
}
?>