2010-10-14 14:06:05 +00:00
< ?
/** Parse une page Html et retourne son contenu dans un tableau :
2010-10-15 07:25:50 +00:00
** " code " => Code réponse Serveur
2010-10-14 14:06:05 +00:00
** " header " => Headers du serveur
** " body " => Page HTML
**/
function parse_response ( $this_response ) {
// Split response into header and body sections
list ( $response_headers , $response_body ) = explode ( " \r \n \r \n " , $this_response , 2 );
$response_header_lines = explode ( " \r \n " , $response_headers );
// First line of headers is the HTTP response code
$http_response_line = array_shift ( $response_header_lines );
if ( preg_match ( '@^HTTP/[0-9]\.[0-9] ([0-9]{3})@' , $http_response_line , $matches )) { $response_code = $matches [ 1 ]; }
// put the rest of the headers in an array
$response_header_array = array ();
$nbRMID = 0 ;
foreach ( $response_header_lines as $header_line )
{
list ( $header , $value ) = explode ( ': ' , $header_line , 2 );
if ( $header == 'Set-cookie' && substr ( $value , 0 , 5 ) == 'RMID=' && $nbRMID < 5 ) //{
$nbRMID ++ ;
2010-10-15 07:25:50 +00:00
// echo ("Je gicle le RMID n°$nbRMID\r\n");}
2010-10-14 14:06:05 +00:00
else
@ $response_header_array [ $header ] .= $value . " \n " ;
}
return array ( 'code' => $response_code , 'header' => $response_header_array , 'body' => $response_body );
}
2010-10-15 07:25:50 +00:00
/** Récupère une page HTML en fonction des paramètres :
** $url Url distante de la page à récupérer
** $strCookies Chaine de caractère contenant les cookies
** $postData Tableau des données à passer en POST uniquement
** $referer Referer à indiquer lors de l ' appel de la page
** $debug Activer le débogage ( True / False )
2010-10-14 14:06:05 +00:00
**
** ... et retourne son contenu dans un tableau :
2010-10-15 07:25:50 +00:00
** " code " => Code réponse Serveur
2010-10-14 14:06:05 +00:00
** " header " => Headers du serveur
** " body " => Page HTML
**/
2011-03-25 16:18:01 +00:00
function getUrl ( $url , $strCookies = '' , $postData = '' , $referer = '' , $debug = false , $host = '' , $proxy = '' , $timeout = 0 , $nbRetry = 0 ) {
2010-10-14 14:06:05 +00:00
$ch = curl_init ();
if ( $host == '' )
$this_header = array ( 'Host: ' . parse_url ( $url , PHP_URL_HOST ));
else
$this_header = array ( 'Host: ' . $host );
curl_setopt ( $ch , CURLOPT_URL , $url );
curl_setopt ( $ch , CURLOPT_HEADER , 1 );
if ( $proxy <> '' ) curl_setopt ( $ch , CURLOPT_PROXY , $proxy );
2010-10-15 07:25:50 +00:00
//curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'username:password'); // Pas nécessaire en authentification NT
2010-10-14 14:06:05 +00:00
if ((( int ) $timeout ) <> 0 ) {
curl_setopt ( $ch , CURLOPT_TIMEOUT , ( int ) $timeout );
curl_setopt ( $ch , CURLOPT_CONNECTTIMEOUT , ( int ) $timeout );
}
curl_setopt ( $ch , CURLOPT_RETURNTRANSFER , 1 );
2011-08-29 14:32:39 +00:00
//curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); // New
//curl_setopt($ch, CURLOPT_FRESH_CONNECT, 1);
2010-10-14 14:06:05 +00:00
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
//curl_setopt($ch, CURLOPT_MAXREDIRS, 1);
2011-08-29 14:32:39 +00:00
/* curl_setopt ( $ch , CURLOPT_WRITEFUNCTION , receiveResponse );
function receiveResponse ( $curlHandle , $xmldata )
{
$this -> responseString = $xmldata ;
$this -> responseXML .= $this -> responseString ;
$this -> length = strlen ( $xmldata );
$this -> size += $this -> length ;
return $this -> length ;
}
*/
2011-06-10 10:08:21 +00:00
if ( preg_match ( '/^https/i' , $url ))
curl_setopt ( $ch , CURLOPT_SSL_VERIFYPEER , false );
2010-10-14 14:06:05 +00:00
$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)' ;
curl_setopt ( $ch , CURLOPT_USERAGENT , $user_agent );
curl_setopt ( $ch , CURLOPT_REFERER , $referer );
// Add each cookie that has been returned in the response
// If cookies need to be added/deleted or value changed, then add code here
if ( $strCookies != '' ) {
//die('"'.$strCookies.'"');
//echo $strCookies."\r\n";
$cookies = explode ( " \n " , $strCookies );
// Create the basic header
foreach ( $cookies as $this_cookie ) {
if ( trim ( $this_cookie ) <> '' )
array_push ( $this_header , 'Cookie: ' . $this_cookie );
}
}
if ( $postData != '' ) {
if ( is_array ( $postData ))
$post_data = $postData ;
$o = " " ;
foreach ( $post_data as $k => $v )
{
$o .= " $k = " . utf8_encode ( $v ) . " & " ;
}
$post_data = substr ( $o , 0 , - 1 );
curl_setopt ( $ch , CURLOPT_POST , 1 );
curl_setopt ( $ch , CURLOPT_POSTFIELDS , $post_data );
//if in_array('',$this_header
/* array_push ( $this_header , " Content-type: application/x-www-form-urlencoded " );
array_push ( $this_header , " Content-Length: 44 " ); */
}
2011-09-07 16:03:34 +00:00
/* curl_setopt ( $ch , CURLOPT_STDERR , LOG_PATH . '/curlerror.log' );
2010-10-14 14:06:05 +00:00
curl_setopt ( $ch , CURLOPT_VERBOSE , true );
*/
curl_setopt ( $ch , CURLOPT_HTTPHEADER , $this_header );
//print_r($this_header);
2011-03-25 16:18:01 +00:00
if ( $nbRetry == 0 ) {
2010-10-14 14:06:05 +00:00
$page = curl_exec ( $ch );
if ( $page === false ) {
2011-08-29 14:32:39 +00:00
if ( curl_errno ( $ch ) == 28 ) //TIMEOUT
2011-03-25 16:18:01 +00:00
$response = array ( 'code' => 408 , 'header' => array ( 'curl_errno' => curl_errno ( $ch ), 'curl_error' => curl_error ( $ch )), 'body' => 'Connexion impossible au site du partenaire/Timeout' );
2010-10-14 14:06:05 +00:00
else
2011-03-25 16:18:01 +00:00
$response = array ( 'code' => 400 , 'header' => array ( 'curl_errno' => curl_errno ( $ch ), 'curl_error' => curl_error ( $ch )), 'body' => 'Erreur Curl : ' . curl_error ( $ch ));
2010-10-14 14:06:05 +00:00
} else
$response = parse_response ( $page );
2011-03-25 16:18:01 +00:00
} else {
$numTry = 0 ;
while ( $numTry <= $nbRetry ) {
$page = curl_exec ( $ch );
if ( $page === false ) {
if ( curl_errno ( $ch ) == 28 ) //TIMEOUT
$response = array ( 'code' => 408 , 'header' => array ( 'curl_errno' => curl_errno ( $ch ), 'curl_error' => curl_error ( $ch )), 'body' => 'Connexion impossible au site du partenaire' );
else
$response = array ( 'code' => 400 , 'header' => array ( 'curl_errno' => curl_errno ( $ch ), 'curl_error' => curl_error ( $ch )), 'body' => 'Erreur Curl : ' . curl_error ( $ch ));
} else {
$response = parse_response ( $page );
break ;
}
}
}
2011-08-29 14:32:39 +00:00
2010-10-14 14:06:05 +00:00
if ( $debug ){
$url2 = str_replace ( 'http://' , '' , $url );
$url2 = str_replace ( '/' , '_' , $url2 );
$url2 = str_replace ( '?' , '(param)' , $url2 );
$url2 = str_replace ( '&' , '(et)' , $url2 );
2011-03-25 16:18:01 +00:00
$fp = fopen ( '/tmp/curl-' . date ( 'Ymd-His' ) . '-' . microtime_float ( true ) . '-' . $url2 . '.html' , 'a' );
2010-10-14 14:06:05 +00:00
fwrite ( $fp , $url . " \r \n " );
fwrite ( $fp , $page );
fclose ( $fp );
//echo strip_tags(html_entity_decode($response['body']), '<td>');
}
//print_r(curl_getinfo($ch));
curl_close ( $ch );
return $response ;
}
/** Recherche un texte dans une page HTML
**
**/
function getTextInHtml ( $pageHtml , $strToFind , $strDeb , $strEnd , $include_strDeb = false , $include_strEnd = false , $ltrim = true , $rtrim = true , & $fin , $nbOcc = 1 ) {
$tabRet = array ();
$deb = $nbOccTrouve = 0 ;
while ( is_int (( $deb = strpos ( $pageHtml , $strToFind , $fin ))) ) {
$deb ++ ;
$deb2 = strpos ( $pageHtml , $strDeb , $deb );
$fin = strpos ( $pageHtml , $strEnd , $deb2 );
if ( ! $include_strDeb )
$deb2 += strlen ( $strDeb );
$s_temp = substr ( $pageHtml , $deb2 , ( $fin - $deb2 ));
if ( $ltrim ) $s_temp = ltrim ( $s_temp );
if ( $rtrim ) $s_temp = rtrim ( $s_temp );
if ( $nbOcc == 1 ) return $s_temp ;
//echo $s_temp."\r\n";
//$a_temp = explode('" class="basic">', $s_temp);
$tabUrl [ $nbOccTrouve ] = $s_temp ;
$nbOccTrouve ++ ;
if ( $nbOcc == $nbOccTrouve ) {
2010-10-15 07:25:50 +00:00
// echo "j'ai trouvé le nb demandé, je sort\r\n";
2010-10-14 14:06:05 +00:00
break ;
};
}
return $tabUrl ;
/*< span class = " mongrasvert " >
2010-10-15 07:25:50 +00:00
< li > Le type de voie a été modifié < br >
< li > L ' orthographe du mot directeur a été modifiée < br >
< li > Le code postal a été forcé à partir du département et de la localité < br > </ span >
2010-10-14 14:06:05 +00:00
*/
}
function getPdfInfo ( $f ){
$tabInfo = array ( 'file' => $f ,
'fileName' => basename ( $f ));
$handle = @ fopen ( $f , 'r' );
if ( $handle ) {
//echo '1'.EOL;
$i = $nbPages = $buffer = 0 ;
while ( ! feof ( $handle )) {
$prev_buffer = $buffer ;
$buffer = fgets ( $handle , 4096 );
if ( $i == 0 && preg_match ( " /^ \ %PDF \ -(.*) \ s/U " , $buffer , $matches ))
$tabInfo [ 'version' ] = $matches [ 1 ];
elseif ( preg_match ( " /Type \ s* \ /Page[^s]/ " , $buffer ) )
++ $nbPages ;
$i ++ ;
}
//echo '2'.EOL;
if ( preg_match ( " / \ % \ %EOF $ / " , $prev_buffer ) || preg_match ( " / \ % \ %EOF/ " , $prev_buffer ) || preg_match ( " / \ % \ %EOF/ " , $buffer )) {
//echo '3'.EOL;
$tabInfo [ 'pdfEOF' ] = true ;
} else {
/* echo '4 prev:' . $prev_buffer . EOL ;
echo '4 last:' . $buffer . EOL ; */
$tabInfo [ 'debugBuffer' ] = $prev_buffer ;
return false ;
}
fclose ( $handle );
} else {
//echo '5'.EOL;
return false ;
}
$tabInfo [ 'pdfSize' ] = filesize ( $f );
$tabInfo [ 'nbPages' ] = 0 + $nbPages ;
//$tabInfo['debugBuffer']=$prev_buffer;
return $tabInfo ;
}
function simpleWhois ( $domain , $tdl , $debug = false ) {
// Fonction de traitement
// -----------------------------------------------
// 0 => Info, le nom est pris
// 1 => Info, le nom est libre
// 2 => Info, le nom est en pending
2010-10-15 07:25:50 +00:00
// 3 => Avertissement, trop de requêtes
// 4 => Erreur, il faut spécifier une chaine à rechercher (regexp)
// 5 => Erreur, la requête retournée était vide
2010-10-14 14:06:05 +00:00
// -----------------------------------------------
2010-10-15 07:25:50 +00:00
// Informations spécifiques aux extensions
2010-10-14 14:06:05 +00:00
$info = array (
'fr' => array (
'host' => 'whois.nic.fr' ,
'regexp' => 'No[s]*entries[s]*found' ,
'pending' => 'status[s]*:[s]*REDEMPTION'
),
'com' => array (
'host' => 'whois.crsnic.net' ,
'regexp' => 'No[s]*match[s]*for'
),
'net' => array (
'host' => 'whois.crsnic.net' ,
'regexp' => 'No[s]*match[s]*for'
),
);
// Initialisation de la sortie
$output = '' ;
2010-10-15 07:25:50 +00:00
// Initialisation de la requête
$req = fsockopen ( $info [ $tdl ][ 'host' ], 43 , $errno , $errstr , 5 ); // le 5 permet de stopper la requete si aucune réponse au bout de 5 secondes
// Récupération de la requête
2010-10-14 14:06:05 +00:00
if ( $req ){
fputs ( $req , $domain . '.' . $tdl . " rn " );
while ( ! feof ( $req )) $output .= fgets ( $req , 4096 );
fclose ( $req );
} else unset ( $req );
// mode debug
if ( $debug ) return $output ;
// Sortie vide
if ( empty ( $output )) return 5 ;
2010-10-15 07:25:50 +00:00
// Trop de requêtes
2010-10-14 14:06:05 +00:00
if ( preg_match ( " /(Too[s]+many[s]+requests|Your[s]+connection[s]+limit[s]+exceeded|daily[s]+whois[s]+limit[s]+exceeded|Maximum[s]+queries|WHOIS[s]+LIMIT[s]+EXCEEDED|referral[s]+host[s]+not[s]+responding|Excessive[s]+querying)/i " , $output )) return 3 ;
// Pending
if ( isset ( $info [ $tdl ][ 'pending' ]) && ! empty ( $info [ $tdl ][ 'pending' ]) && preg_match ( " / " . $info [ $tdl ][ 'pending' ] . " /i " , $output )) return 2 ;
// Info de recheche manquant
if ( ! isset ( $info [ $tdl ][ 'regexp' ]) || empty ( $info [ $tdl ][ 'regexp' ])) return 4 ;
// Libre
if ( preg_match ( " / " . $info [ $tdl ][ 'regexp' ] . " /i " , $output )) return 1 ;
// Pris
return 0 ;
}
?>