301 lines
10 KiB
PHP
301 lines
10 KiB
PHP
<?
|
|
|
|
/** Parse une page Html et retourne son contenu dans un tableau :
|
|
** "code" => Code réponse Serveur
|
|
** "header" => Headers du serveur
|
|
** "body" => Page HTML
|
|
**/
|
|
function parse_response($this_response) {
|
|
|
|
|
|
// Split response into header and body sections
|
|
list($response_headers, $response_body) = explode("\r\n\r\n", $this_response, 2);
|
|
$response_header_lines = explode("\r\n", $response_headers);
|
|
|
|
// First line of headers is the HTTP response code
|
|
$http_response_line = array_shift($response_header_lines);
|
|
if(preg_match('@^HTTP/[0-9]\.[0-9] ([0-9]{3})@',$http_response_line, $matches)) { $response_code = $matches[1]; }
|
|
|
|
// put the rest of the headers in an array
|
|
$response_header_array = array();
|
|
$nbRMID=0;
|
|
foreach($response_header_lines as $header_line)
|
|
{
|
|
list($header,$value) = explode(': ', $header_line, 2);
|
|
|
|
if ($header=='Set-cookie' && substr($value,0,5)=='RMID=' && $nbRMID<5)//{
|
|
$nbRMID++;
|
|
// echo ("Je gicle le RMID n°$nbRMID\r\n");}
|
|
else
|
|
@$response_header_array[$header] .= $value."\n";
|
|
}
|
|
return array('code' => $response_code, 'header' => $response_header_array, 'body' => $response_body);
|
|
}
|
|
|
|
/** Récupère une page HTML en fonction des paramètres :
|
|
** $url Url distante de la page à récupérer
|
|
** $strCookies Chaine de caractère contenant les cookies
|
|
** $postData Tableau des données à passer en POST uniquement
|
|
** $referer Referer à indiquer lors de l'appel de la page
|
|
** $debug Activer le débogage (True/False)
|
|
**
|
|
** ... et retourne son contenu dans un tableau :
|
|
** "code" => Code réponse Serveur
|
|
** "header" => Headers du serveur
|
|
** "body" => Page HTML
|
|
**/
|
|
function getUrl($url, $strCookies='', $postData='', $referer='', $debug=false, $host='', $proxy='', $timeout=0, $nbRetry=0) {
|
|
|
|
$ch = curl_init();
|
|
|
|
if ($host=='')
|
|
$this_header = array('Host: '. parse_url($url, PHP_URL_HOST));
|
|
else
|
|
$this_header = array('Host: '. $host);
|
|
|
|
curl_setopt($ch, CURLOPT_URL, $url);
|
|
curl_setopt($ch, CURLOPT_HEADER, 1);
|
|
if ($proxy<>'') curl_setopt($ch, CURLOPT_PROXY, $proxy);
|
|
//curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'username:password'); // Pas nécessaire en authentification NT
|
|
|
|
if (((int)$timeout)<>0) {
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, (int)$timeout);
|
|
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, (int)$timeout);
|
|
}
|
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
|
//curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); // New
|
|
//curl_setopt($ch, CURLOPT_FRESH_CONNECT, 1);
|
|
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
|
//curl_setopt($ch, CURLOPT_MAXREDIRS, 1);
|
|
/* curl_setopt($ch, CURLOPT_WRITEFUNCTION, receiveResponse);
|
|
function receiveResponse($curlHandle,$xmldata)
|
|
{
|
|
$this->responseString = $xmldata;
|
|
$this->responseXML .= $this->responseString;
|
|
$this->length = strlen($xmldata);
|
|
$this->size += $this->length;
|
|
return $this->length;
|
|
|
|
}
|
|
*/
|
|
|
|
if (preg_match('/^https/i',$url))
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
|
|
|
$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
|
|
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
|
|
curl_setopt($ch, CURLOPT_REFERER, $referer);
|
|
|
|
// Add each cookie that has been returned in the response
|
|
// If cookies need to be added/deleted or value changed, then add code here
|
|
if ($strCookies!='') {
|
|
//die('"'.$strCookies.'"');
|
|
//echo $strCookies."\r\n";
|
|
$cookies = explode("\n", $strCookies);
|
|
// Create the basic header
|
|
foreach($cookies as $this_cookie) {
|
|
if (trim($this_cookie)<>'')
|
|
array_push($this_header, 'Cookie: '.$this_cookie);
|
|
}
|
|
}
|
|
|
|
if ($postData!='') {
|
|
if (is_array($postData))
|
|
$post_data=$postData;
|
|
|
|
$o="";
|
|
foreach ($post_data as $k=>$v)
|
|
{
|
|
$o.= "$k=".utf8_encode($v)."&";
|
|
}
|
|
$post_data=substr($o,0,-1);
|
|
|
|
curl_setopt($ch, CURLOPT_POST, 1);
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
|
|
//if in_array('',$this_header
|
|
/*array_push($this_header, "Content-type: application/x-www-form-urlencoded");
|
|
array_push($this_header, "Content-Length: 44");*/
|
|
}
|
|
|
|
/* curl_setopt($ch, CURLOPT_STDERR, LOG_PATH.'/curlerror.log');
|
|
curl_setopt($ch, CURLOPT_VERBOSE, true);
|
|
*/
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, $this_header);
|
|
//print_r($this_header);
|
|
|
|
if ($nbRetry==0) {
|
|
$page=curl_exec($ch);
|
|
if($page === false) {
|
|
if (curl_errno($ch) == 28) //TIMEOUT
|
|
$response=array('code' =>408, 'header' =>array('curl_errno'=>curl_errno($ch),'curl_error'=>curl_error($ch)), 'body' =>'Connexion impossible au site du partenaire/Timeout');
|
|
else
|
|
$response=array('code' =>400, 'header' =>array('curl_errno'=>curl_errno($ch),'curl_error'=>curl_error($ch)), 'body' =>'Erreur Curl : ' . curl_error($ch));
|
|
} else
|
|
$response = parse_response($page);
|
|
} else {
|
|
$numTry=0;
|
|
while($numTry<=$nbRetry) {
|
|
$page=curl_exec($ch);
|
|
if($page === false) {
|
|
if (curl_errno($ch) == 28) //TIMEOUT
|
|
$response=array('code' =>408, 'header' =>array('curl_errno'=>curl_errno($ch),'curl_error'=>curl_error($ch)), 'body' =>'Connexion impossible au site du partenaire');
|
|
else
|
|
$response=array('code' =>400, 'header' =>array('curl_errno'=>curl_errno($ch),'curl_error'=>curl_error($ch)), 'body' =>'Erreur Curl : ' . curl_error($ch));
|
|
} else {
|
|
$response = parse_response($page);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
if ($debug){
|
|
$url2=str_replace('http://', '', $url);
|
|
$url2=str_replace('/', '_', $url2);
|
|
$url2=str_replace('?', '(param)', $url2);
|
|
$url2=str_replace('&', '(et)', $url2);
|
|
|
|
|
|
$fp=fopen('/tmp/curl-'. date('Ymd-His') .'-'. microtime_float(true) .'-'. $url2 . '.html', 'a');
|
|
fwrite($fp, $url."\r\n");
|
|
fwrite($fp, $page);
|
|
fclose($fp);
|
|
//echo strip_tags(html_entity_decode($response['body']), '<td>');
|
|
}
|
|
//print_r(curl_getinfo($ch));
|
|
curl_close($ch);
|
|
return $response;
|
|
}
|
|
|
|
/** Recherche un texte dans une page HTML
|
|
**
|
|
**/
|
|
function getTextInHtml($pageHtml, $strToFind, $strDeb, $strEnd, $include_strDeb=false, $include_strEnd=false, $ltrim=true, $rtrim=true, &$fin, $nbOcc=1) {
|
|
$tabRet=array();
|
|
$deb=$nbOccTrouve=0;
|
|
while( is_int(($deb=strpos($pageHtml,$strToFind,$fin))) ) {
|
|
$deb++;
|
|
$deb2 = strpos($pageHtml,$strDeb, $deb);
|
|
$fin = strpos($pageHtml,$strEnd, $deb2);
|
|
if (!$include_strDeb)
|
|
$deb2+=strlen($strDeb);
|
|
$s_temp = substr($pageHtml, $deb2, ($fin-$deb2));
|
|
|
|
if ($ltrim) $s_temp=ltrim($s_temp);
|
|
if ($rtrim) $s_temp=rtrim($s_temp);
|
|
|
|
if ($nbOcc==1) return $s_temp;
|
|
//echo $s_temp."\r\n";
|
|
//$a_temp = explode('" class="basic">', $s_temp);
|
|
$tabUrl[$nbOccTrouve]=$s_temp;
|
|
$nbOccTrouve++;
|
|
|
|
if ($nbOcc==$nbOccTrouve) {
|
|
// echo "j'ai trouvé le nb demandé, je sort\r\n";
|
|
break;
|
|
};
|
|
}
|
|
|
|
return $tabUrl;
|
|
/*<span class="mongrasvert">
|
|
<li>Le type de voie a été modifié<br>
|
|
<li>L'orthographe du mot directeur a été modifiée<br>
|
|
<li>Le code postal a été forcé à partir du département et de la localité<br> </span>
|
|
*/
|
|
}
|
|
|
|
function getPdfInfo($f){
|
|
$tabInfo=array( 'file'=>$f,
|
|
'fileName'=>basename($f));
|
|
$handle = @fopen($f, 'r');
|
|
if ($handle) {
|
|
//echo '1'.EOL;
|
|
$i=$nbPages=$buffer=0;
|
|
while (!feof($handle)) {
|
|
$prev_buffer=$buffer;
|
|
$buffer = fgets($handle, 4096);
|
|
if ($i==0 && preg_match("/^\%PDF\-(.*)\s/U", $buffer, $matches))
|
|
$tabInfo['version']=$matches[1];
|
|
elseif (preg_match("/Type\s*\/Page[^s]/", $buffer) )
|
|
++$nbPages;
|
|
$i++;
|
|
}
|
|
//echo '2'.EOL;
|
|
if (preg_match("/\%\%EOF$/", $prev_buffer) || preg_match("/\%\%EOF/", $prev_buffer) || preg_match("/\%\%EOF/", $buffer)) {
|
|
//echo '3'.EOL;
|
|
$tabInfo['pdfEOF']=true;
|
|
} else {
|
|
/*echo '4 prev:'.$prev_buffer.EOL;
|
|
echo '4 last:'.$buffer.EOL;*/
|
|
$tabInfo['debugBuffer']=$prev_buffer;
|
|
return false;
|
|
}
|
|
fclose($handle);
|
|
} else {
|
|
//echo '5'.EOL;
|
|
return false;
|
|
}
|
|
|
|
$tabInfo['pdfSize']=filesize($f);
|
|
$tabInfo['nbPages']=0+$nbPages;
|
|
//$tabInfo['debugBuffer']=$prev_buffer;
|
|
return $tabInfo;
|
|
}
|
|
|
|
|
|
function simpleWhois($domain, $tdl, $debug=false) {
|
|
// Fonction de traitement
|
|
// -----------------------------------------------
|
|
// 0 => Info, le nom est pris
|
|
// 1 => Info, le nom est libre
|
|
// 2 => Info, le nom est en pending
|
|
// 3 => Avertissement, trop de requêtes
|
|
// 4 => Erreur, il faut spécifier une chaine à rechercher (regexp)
|
|
// 5 => Erreur, la requête retournée était vide
|
|
// -----------------------------------------------
|
|
|
|
// Informations spécifiques aux extensions
|
|
$info = array(
|
|
'fr' => array(
|
|
'host' => 'whois.nic.fr',
|
|
'regexp' => 'No[s]*entries[s]*found',
|
|
'pending' => 'status[s]*:[s]*REDEMPTION'
|
|
),
|
|
'com' => array(
|
|
'host' => 'whois.crsnic.net',
|
|
'regexp' => 'No[s]*match[s]*for'
|
|
),
|
|
'net' => array(
|
|
'host' => 'whois.crsnic.net',
|
|
'regexp' => 'No[s]*match[s]*for'
|
|
),
|
|
);
|
|
|
|
// Initialisation de la sortie
|
|
$output = '';
|
|
// Initialisation de la requête
|
|
$req = fsockopen($info[$tdl]['host'], 43, $errno, $errstr, 5); // le 5 permet de stopper la requete si aucune réponse au bout de 5 secondes
|
|
// Récupération de la requête
|
|
if($req){
|
|
fputs($req, $domain.'.'.$tdl."rn");
|
|
while(!feof($req)) $output .= fgets($req, 4096);
|
|
fclose($req);
|
|
}else unset($req);
|
|
// mode debug
|
|
if($debug) return $output;
|
|
// Sortie vide
|
|
if(empty($output)) return 5;
|
|
// Trop de requêtes
|
|
if(preg_match("/(Too[s]+many[s]+requests|Your[s]+connection[s]+limit[s]+exceeded|daily[s]+whois[s]+limit[s]+exceeded|Maximum[s]+queries|WHOIS[s]+LIMIT[s]+EXCEEDED|referral[s]+host[s]+not[s]+responding|Excessive[s]+querying)/i", $output)) return 3;
|
|
// Pending
|
|
if(isset($info[$tdl]['pending']) && !empty($info[$tdl]['pending']) && preg_match("/".$info[$tdl]['pending']."/i", $output)) return 2;
|
|
// Info de recheche manquant
|
|
if(!isset($info[$tdl]['regexp']) || empty($info[$tdl]['regexp'])) return 4;
|
|
// Libre
|
|
if(preg_match("/".$info[$tdl]['regexp']."/i", $output)) return 1;
|
|
// Pris
|
|
return 0;
|
|
}
|
|
|
|
?>
|