From 8dac441775de69b824620ee469053417a56204fc Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Thu, 20 Dec 2012 14:18:11 +0000 Subject: [PATCH] getKbis fonctionnelle --- batch/getKbis.php | 170 ++++++++++++++++++++++++++-------------------- 1 file changed, 95 insertions(+), 75 deletions(-) diff --git a/batch/getKbis.php b/batch/getKbis.php index a9bdcd65..1205950a 100644 --- a/batch/getKbis.php +++ b/batch/getKbis.php @@ -67,24 +67,25 @@ function getPageHeader($start,$end,$header) } } -function getPage($url, $curl_data = '', $verbose=false) +function getPage($url, $curl_data = '', $override = null) { global $ckfile; - + //$user_agent = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'; //$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)'; $user_agent = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)'; + $verbose = false; $post = false; $fields = ''; if (is_array($curl_data) && count($curl_data)>0) { - foreach($curl_data as $key=>$value) { - $fields .= $key.'='.$value.'&'; + foreach($curl_data as $key=>$value) { + $fields .= $key.'='.$value.'&'; } rtrim($fields,'&'); $post = true; } - + $options = array( CURLOPT_RETURNTRANSFER => true, // return web page CURLOPT_HEADER => false, // don't return headers @@ -104,7 +105,12 @@ function getPage($url, $curl_data = '', $verbose=false) CURLOPT_COOKIEFILE => $ckfile, CURLOPT_COOKIEJAR => $ckfile, // Stockage du cookie de session ); - + + //Override define CURL option + if (is_array($override) && count($override)>0 ) { + $options = $override + $options; + } + $ch = curl_init($url); curl_setopt_array($ch,$options); $content = curl_exec($ch); @@ -118,7 +124,7 @@ function getPage($url, $curl_data = '', $verbose=false) $encoding = getPageHeader('text\/html; charset=', '', $header['content_type']); //$encoding = 'ISO-8859-1'; $content = iconv($encoding, 'UTF-8//IGNORE', $content); - + // $header['errno'] = $err; // $header['errmsg'] = $errmsg; // $header['content'] = $content; @@ -147,11 +153,11 @@ function infogreffeConnexion() $url = "http://www.infogreffe.fr/infogreffe/index.jsp"; $result = getPage($url); if (DEBUG) file_put_contents('kbis-connexion1.html', $result['content']); - + $url = "http://www.infogreffe.fr/infogreffe/popupLog.jsp?type=0&url=index.jsp"; $result = getPage($url); if (DEBUG) file_put_contents('kbis-connexion2.html', $result['content']); - + $url = "https://www.infogreffe.fr/infogreffe/login.do?redirect=index.jsp"; // 302 Moved Temporarily // => http://www.infogreffe.fr/infogreffe/index.jsp @@ -163,11 +169,11 @@ function infogreffeConnexion() ); $result = getPage($url, $data); if (DEBUG) file_put_contents('kbis-connexion3.html', $result['content']); - + /* We need to have
- + Abonné | Mon compte | Documents Commandés @@ -184,7 +190,7 @@ function infogreffeKbis($ref) $url = "http://www.infogreffe.fr/infogreffe/chargement.jsp?oups=".$ref."_0_V_0_"; $result = getPage($url); if (DEBUG) file_put_contents('kbis-afficheproduit.html', $result['content']); - + //Redirection javascript qui fait patienter sleep(1); /************************************************** @@ -193,33 +199,33 @@ function infogreffeKbis($ref) $url = "http://www.infogreffe.fr/infogreffe/serviceProduit.do?cdePro=".$ref."_0_V_0_"; $result = getPage($url); if (DEBUG) file_put_contents('kbis-serviceproduit.html', $result['content']); - + $output = $result['content']; - + //Modification du code HTML pour impression $output = removeTag('script', $output, true, true); - + $output = str_replace('/infogreffe/styles/infogreffe_base.css', '../styles/infogreffe_base.css', $output); $output = str_replace('/infogreffe/styles/infogreffe.css', '../styles/infogreffe.css', $output); - + $output = preg_replace( '/\/', '', $output ); - + //Récupération des informations dans le kbis //Numéro d'identification :509 536 371 R.C.S. PONTOISE - + preg_match('/([0-9]{3}\s[0-9]{3}\s[0-9]{3})\sR\.C\.S\./', $output, $matches); - + if (count($matches)>1){ $identifiant = str_replace(' ', '',$matches[1]); } else { $identifiant = 'unknown'; } $fichier = $identifiant . '-' . $ref . '.html'; - + $dir = DOC_WEB_LOCAL.'kbis/'.date('Ymd'); if (!file_exists($dir)) mkdir($dir); file_put_contents($dir . '/' . $fichier, $output); @@ -261,7 +267,7 @@ function parseRef($document) $doc->preserveWhiteSpace = false; @$doc->loadHTML($document); $xpath = new DOMXpath($doc); - + //Recherche des infos de la première commande $nodelist = $xpath->query("//a/img[@alt='visualiser']"); foreach ($nodelist as $n){ @@ -297,29 +303,29 @@ if (file_exists($ckfile)) unlink($ckfile); if ( $opts->commandes ) { $referer = ''; - + /************************************************** Connexion **************************************************/ infogreffeConnexion(); - + $url = "http://www.infogreffe.fr/infogreffe/jsp/information/monCompte.jsp"; $result = getPage($url); if (DEBUG) file_put_contents('kbis-moncompte.html', $result['content']); - + /************************************************** Documents commandés **************************************************/ $url = "http://www.infogreffe.fr/infogreffe/afficherMesAchats.do?refCde=N"; $result = getPage($url); if (DEBUG) file_put_contents('documents.html', $result['content']); - + //On parse le document pour trouver les références de commandes $refs = array(); - + $tmp = parseRef($result['content']); $refs[] = $tmp; - + //Liste des commandes $nodelist = $xpath->query("//a[@class='folded-fond-gris']"); foreach ($nodelist as $n){ @@ -327,7 +333,7 @@ if ( $opts->commandes ) preg_match("/javascript:reveal\(\'(.*)\'\)/", $href, $matches); $refs[] = array( 'ref' => $matches[1] ); } - + $listeRef = array(); foreach($refs as $item){ $listeRef[] = $item['ref']; @@ -341,12 +347,12 @@ if ( $opts->commandes ) $result = getPage($url); if (DEBUG) file_put_contents('documents-'.$ref['ref'].'.html', $result['content']); } - + echo "Téléchargement du kbis...\n"; infogreffeKbis($opts->visu); //Lancer WKHTMLTOPDF pour le PDF - + } elseif ( !$opts->visu ) { /************************************************** @@ -385,19 +391,19 @@ if ( $opts->siren ) echo "Erreur SIREN invalide\n"; exit; } $referer = ''; - + /************************************************** Connexion **************************************************/ infogreffeConnexion(); - + /************************************************** Affichage formulaire **************************************************/ $url = "http://www.infogreffe.fr/infogreffe/index.jsp"; $result = getPage($url); if (DEBUG) file_put_contents('kbis1.html', $result['content']); - + /************************************************** Soumission formulaire **************************************************/ @@ -405,32 +411,42 @@ if ( $opts->siren ) 'commune' => '', 'denomination' => '', 'departement' => '', + //'elargirSecondaire' => 'on', 'elargirRadie' => 'on', 'siren' => $opts->siren, ); $url = "http://www.infogreffe.fr/infogreffe/newRechercheEntreprise.xml"; $result = getPage($url, $fields); if (DEBUG) file_put_contents('kbis2.html', $result['content']); - - // 302 Moved Temporarily + + // 302 Moved Temporarily - But we always use this URL $url = 'http://www.infogreffe.fr/infogreffe/entrepRech.do'; - $redirectUrl = getUrl302($result['content']); - if ($redirectUrl !== false) { - $url = $redirectUrl; - } - + /************************************************** Affichage identite entreprise **************************************************/ //url defini plus haut - $result = getPage($url); + $result = getPage($url, '', array(CURLOPT_FOLLOWLOCATION => true)); if (DEBUG) file_put_contents('kbis3.html', $result['content']); - - $doc = new DOMDocument(); - $doc->strictErrorChecking = false; - $doc->preserveWhiteSpace = false; + + /* + * !! Attention !! Elargir aux radiés peut retourner plusieurs résultats + */ + $doc = new DOMDocument(); + $doc->strictErrorChecking = false; + $doc->preserveWhiteSpace = false; @$doc->loadHTML($result['content']); $xpath = new DOMXpath($doc); + $nodelist = $xpath->query("//div[@id='includeEntrepListe']"); + if ($nodelist->length>0) { + $entries = $xpath->query("//a[@class='company']"); + foreach ($entries as $n) { + $url = 'http://www.infogreffe.fr'. $n->getAttribute('href'); + break; + } + $result = getPage($url); + } + /* * Try to detect we can pass an order, else send message and exit * @@ -440,9 +456,14 @@ if ( $opts->siren ) * Si pas de résultats lors de la recherche alors * table[@class='liste-res-rech']/tbody/tr[1]/td[1]/div/span[1] */ - $nodelist = $xpath->query("//div[@id='libelleRcsGreffe']"); + $doc = new DOMDocument(); + $doc->strictErrorChecking = false; + $doc->preserveWhiteSpace = false; + @$doc->loadHTML($result['content']); + $xpath = new DOMXpath($doc); + $nodelist = $xpath->query("//div[@id='libelleRcsGreffe']"); if ($nodelist->length==0) { - $entries = $xpath->query("//table[@class='liste-res-rech']/tbody/tr[1]/td[1]/div/span"); + $entries = $xpath->query("//table[@class='liste-res-rech']/tbody/tr[1]/td[1]/div/span"); if ($entries->length>0) { echo trim($entries->item(0)->nodeValue); } else { @@ -451,7 +472,7 @@ if ( $opts->siren ) if (file_exists($ckfile)) unlink($ckfile); exit; } - + /* * Si les documents sont disponibles * Extrait RCS (Kbis) @@ -469,14 +490,14 @@ if ( $opts->siren ) if (file_exists($ckfile)) unlink($ckfile); exit; } - + // 302 Moved Temporarily //http://www.infogreffe.fr/infogreffe/entrepListe.do?index=rcs if ( $result['header']['http_code']=='302' && array_key_exists('redirect_url', $result['header']) ) { $url = $result['header']['redirect_url']; $result = getPage($url); if (DEBUG) file_put_contents('kbis3-1.html', $result['content']); - + // => /infogreffe/getEntrepDetail.do?docId=780308B042410000 $doc = new DOMDocument(); $doc->strictErrorChecking = false; @@ -491,32 +512,31 @@ if ( $opts->siren ) $result = getPage($url); if (DEBUG) file_put_contents('kbis3-2.html', $result['content']); } elseif ( $result['header']['http_code']=='302' ) { - $url = 'http://www.infogreffe.fr/weblogic/infogreffe/entrepListe.do?index=rcs'; - $result = getPage($url); + $url = 'http://www.infogreffe.fr/weblogic/infogreffe/entrepListe.do?index=rcs'; + $result = getPage($url); if (DEBUG) file_put_contents('kbis3-1.html', print_r($result,1)); - - $doc = new DOMDocument(); - $doc->strictErrorChecking = false; - $doc->preserveWhiteSpace = false; - @$doc->loadHTML($result['content']); - $xpath = new DOMXpath($doc); - $nodelist = $xpath->query("//a[@class='company']"); - foreach ($nodelist as $n) { - $url = 'http://www.infogreffe.fr'. $n->getAttribute('href'); - break; - } - $result = getPage($url); + + $doc = new DOMDocument(); + $doc->strictErrorChecking = false; + $doc->preserveWhiteSpace = false; + @$doc->loadHTML($result['content']); + $xpath = new DOMXpath($doc); + $nodelist = $xpath->query("//a[@class='company']"); + foreach ($nodelist as $n) { + $url = 'http://www.infogreffe.fr'. $n->getAttribute('href'); + break; + } + $result = getPage($url); if (DEBUG) file_put_contents('kbis3-2.html', $result['content']); } - + + $fields = array(); + //Recherche des infos pour la validation du formulaire $doc = new DOMDocument(); $doc->strictErrorChecking = false; $doc->preserveWhiteSpace = false; @$doc->loadHTML($result['content']); $xpath = new DOMXpath($doc); - - $fields = array(); - //Recherche des infos pour la validation du formulaire $nodelist = $xpath->query("//form[@name='FicheEntrepriseForm']/div[@id='conteneur']/input[@type='hidden']"); foreach ($nodelist as $n) { $key = $n->getAttribute('name'); @@ -536,7 +556,7 @@ if ( $opts->siren ) $fields['historiqueModificationsForm.tri'] = '0'; $fields['procedureCollectiveForm.formVisible'] = 'true'; $fields['dossierCompletForm.formVisible'] = 'true'; - + if (DEBUG) print_r($fields); /************************************************** Validation de la selection @@ -544,7 +564,7 @@ if ( $opts->siren ) $url = "http://www.infogreffe.fr/infogreffe/validerSelectionFicheEntreprise.xml"; $result = getPage($url, $fields); if (DEBUG) file_put_contents('kbis4.html', print_r($result,1)); - + /************************************************** Valider la commande **************************************************/ @@ -560,18 +580,18 @@ if ( $opts->siren ) $fields = array(); $result = getPage($url, $fields); if (DEBUG) file_put_contents('kbis6.html', $result['content']); - + $info = parseRef($result['content']); $ref = $info['ref']; - + /************************************************** Visualiser **************************************************/ $identifiant = infogreffeKbis($ref); - - //Suppression fichier + + //Suppression fichier if (file_exists($ckfile)) unlink($ckfile); - + if ($identifiant===false || $identifiant!=$opts->siren) { echo "ERREUR"; exit;