webservice/scripts/jobs/getKbis.php

603 lines
19 KiB
PHP
Raw Normal View History

2012-11-30 13:51:48 +00:00
#!/usr/bin/php
<?php
// Define path to application directory
defined('APPLICATION_PATH')
|| define('APPLICATION_PATH', realpath(dirname(__FILE__) . '/../../application'));
// Define application environment
define('APPLICATION_ENV', 'development');
// Ensure library/ is on include_path
set_include_path(implode(PATH_SEPARATOR, array(
realpath(APPLICATION_PATH . '/../library'),
get_include_path(),
)));
require_once realpath(dirname(__FILE__)).'/../config/config.php';
/** Zend_Application */
require_once 'Zend/Application.php';
// Create application, bootstrap, and run
$application = new Zend_Application(
APPLICATION_ENV,
APPLICATION_PATH . '/configs/application.ini'
);
try {
$opts = new Zend_Console_Getopt(
//Options
array(
'help|?' => "Affiche l'aide.",
'siren=s' => "Commander un kbis",
'commandes' => "Liste les commandes passees (Reference), si reference alors recupere le kbis",
'visu=s' => "Telechargement du kbis avec une reference (--commandes obligatoire)",
'debug' => "Mode debug",
)
);
$opts->parse();
} catch (Zend_Console_Getopt_Exception $e) {
echo $e->getUsageMessage();
exit;
}
$c = new Zend_Config($application->getOptions());
Zend_Registry::set('config', $c);
if ( isset($opts->debug) ) {
define('DEBUG', true);
} else {
define('DEBUG', false);
}
//Usage
if(count($opts->getOptions())==0 || isset($opts->help))
{
echo "Telecharge le kbis chez infogreffe.";
echo "\n\n";
echo $opts->getUsageMessage();
echo "\n";
exit;
}
function getPageHeader($start,$end,$header)
{
$pattern = '/'. $start .'(.*)'. $end .'/';
if (preg_match($pattern, $header, $result)) {
return $result[1];
} else {
return false;
}
}
function getPage($url, $curl_data = '', $override = null)
{
global $ckfile;
//$user_agent = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1';
//$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
$user_agent = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)';
$verbose = false;
$post = false;
$fields = '';
if (is_array($curl_data) && count($curl_data)>0) {
foreach($curl_data as $key=>$value) {
$fields .= $key.'='.$value.'&';
}
rtrim($fields,'&');
$post = true;
}
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => false, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_USERAGENT => $user_agent, // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLOPT_POST => $post, // i am sending post data
CURLOPT_POSTFIELDS => $fields, // this are my post vars
CURLOPT_SSL_VERIFYHOST => 0, // don't verify ssl
CURLOPT_SSL_VERIFYPEER => false, //
CURLOPT_VERBOSE => $verbose , //
//CURLOPT_COOKIESESSION => true,
CURLOPT_COOKIEFILE => $ckfile,
CURLOPT_COOKIEJAR => $ckfile, // Stockage du cookie de session
);
//Override define CURL option
if (is_array($override) && count($override)>0 ) {
$options = $override + $options;
}
$ch = curl_init($url);
curl_setopt_array($ch,$options);
$content = curl_exec($ch);
$err = curl_errno($ch);
$errmsg = curl_error($ch) ;
$header = curl_getinfo($ch);
curl_close($ch);
//Rewrite encoding to UTF-8
//text/html; charset=ISO-8859-1
$encoding = getPageHeader('text\/html; charset=', '', $header['content_type']);
//$encoding = 'ISO-8859-1';
$content = iconv($encoding, 'UTF-8//IGNORE', $content);
// $header['errno'] = $err;
// $header['errmsg'] = $errmsg;
// $header['content'] = $content;
return array('header'=>$header, 'content'=>$content);
}
/**
* Retourne l'url après une page 302
* @param string $content
* @return string|boolean
*/
function getUrl302($content)
{
$url = false;
preg_match('/\<title\>(.*)\<\/title\>/', $content, $matches);
if ($matches[1]=='302 Moved Temporarily') {
preg_match('/\<a href="(.*)"\>/', $content, $matches);
$url = $matches[1];
}
return $url;
}
function infogreffeConnexion()
{
$url = "http://www.infogreffe.fr/infogreffe/index.jsp";
$result = getPage($url);
if (DEBUG) file_put_contents('kbis-connexion1.html', $result['content']);
$url = "http://www.infogreffe.fr/infogreffe/popupLog.jsp?type=0&url=index.jsp";
$result = getPage($url);
if (DEBUG) file_put_contents('kbis-connexion2.html', $result['content']);
$url = "https://www.infogreffe.fr/infogreffe/login.do?redirect=index.jsp";
// 302 Moved Temporarily
// => http://www.infogreffe.fr/infogreffe/index.jsp
$data = array(
'codeAbo'=>'2559',
'codeClt'=>'0041',
'log'=>'',
'pwd'=>'69873',
);
$result = getPage($url, $data);
if (DEBUG) file_put_contents('kbis-connexion3.html', $result['content']);
/*
We need to have
<div style="margin-top:2px; *margin-top:0px;" class="identBar">
<span class="name">Abonn&eacute;</span> |
<a href="/infogreffe/jsp/information/monCompte.jsp">Mon compte</a> |
<a href="/infogreffe/afficherMesAchats.do?refCde=N">Documents Command&eacute;s</a>
| <a href="/infogreffe/deconnexion.do">Se d&eacute;connecter</a>
</div>
*/
}
function infogreffeKbis($ref)
{
/**************************************************
afficheProduit
**************************************************/
$url = "http://www.infogreffe.fr/infogreffe/chargement.jsp?oups=".$ref."_0_V_0_";
$result = getPage($url);
if (DEBUG) file_put_contents('kbis-afficheproduit.html', $result['content']);
//Redirection javascript qui fait patienter
sleep(1);
/**************************************************
serviceProduit
**************************************************/
$url = "http://www.infogreffe.fr/infogreffe/serviceProduit.do?cdePro=".$ref."_0_V_0_";
$result = getPage($url);
if (DEBUG) file_put_contents('kbis-serviceproduit.html', $result['content']);
$output = $result['content'];
//Modification du code HTML pour impression
$output = removeTag('script', $output, true, true);
$output = str_replace('/infogreffe/styles/infogreffe_base.css', '../styles/infogreffe_base.css', $output);
$output = str_replace('/infogreffe/styles/infogreffe.css', '../styles/infogreffe.css', $output);
$output = preg_replace(
'/\<body onload="adapterDimensions(\'conteneur\');" class="simple preventSelection" style="background-color: rgb(241, 241, 241);">/',
'<body class="simple preventSelection" style="background-color: rgb(241, 241, 241);">',
$output
);
//Récupération des informations dans le kbis
//<td align="left" valign="top" class="label">Numéro d'identification :</td><td>509 536 371 R.C.S. PONTOISE</td>
preg_match('/([0-9]{3}\s[0-9]{3}\s[0-9]{3})\sR\.C\.S\./', $output, $matches);
if (count($matches)>1){
$identifiant = str_replace(' ', '',$matches[1]);
} else {
$identifiant = 'unknown';
}
$fichier = $identifiant . '-' . $ref . '.html';
$dir = $c->profil->path->secure.'/kbis/'.date('Ymd');
if (!file_exists($dir)) mkdir($dir);
file_put_contents($dir . '/' . $fichier, $output);
return ($identifiant!='unknown') ? $identifiant : false;
}
function removeTag($balise, $content, $endOfTag = true, $removeContent = true)
{
if( $endOfTag )
{
if( $removeContent)
$output = preg_replace(
'@<'.$balise.'[^>]*?>.*?</'.$balise.'>@si',
'',
$content
);
else
$output = preg_replace(
array('@<'.$balise.'[^>]*?>@', '@</'.$balise.'>@'),
'',
$content
);
}
else
{
$output = preg_replace(
'@<'.$balise.'[^>]*?>@',
'',
$content
);
}
return $output;
}
function parseRef($document)
{
$doc = new DOMDocument();
$doc->strictErrorChecking = false;
$doc->preserveWhiteSpace = false;
@$doc->loadHTML($document);
$xpath = new DOMXpath($doc);
//Recherche des infos de la première commande
$nodelist = $xpath->query("//a/img[@alt='visualiser']");
foreach ($nodelist as $n){
$href = $n->parentNode->getAttribute('href');
preg_match("/javascript:afficheProduit\(\'(.*)_0_V_0_\'/", $href, $matches);
$ref = $matches[1];
}
//Recherche Raison Sociale
$nodelist = $xpath->query("//span[@class='text-company']/a");
foreach ($nodelist as $n){
$rs = $n->nodeValue;
break;
}
//Recherche RCS
$nodelist = $xpath->query("//span[@class='text-rcs']");
foreach ($nodelist as $n){
$rcs = $n->nodeValue;
break;
}
return array(
'ref' => $ref,
'rs' => $rs,
'rcs' => $rcs,
);
}
//Define cookie file for storage
//@todo : Faire en sorte d'utiliser le cookie pendant un temps déterminé, ou nouvelle session à chaque fois
$ckfile = __DIR__.'/'.uniqid('cookie-');
if (file_exists($ckfile)) unlink($ckfile);
// Récupération de la liste des commandes et
if ( $opts->commandes )
{
$referer = '';
/**************************************************
Connexion
**************************************************/
infogreffeConnexion();
$url = "http://www.infogreffe.fr/infogreffe/jsp/information/monCompte.jsp";
$result = getPage($url);
if (DEBUG) file_put_contents('kbis-moncompte.html', $result['content']);
/**************************************************
Documents commandés
**************************************************/
$url = "http://www.infogreffe.fr/infogreffe/afficherMesAchats.do?refCde=N";
$result = getPage($url);
if (DEBUG) file_put_contents('documents.html', $result['content']);
//On parse le document pour trouver les références de commandes
$refs = array();
$tmp = parseRef($result['content']);
$refs[] = $tmp;
//Liste des commandes
$nodelist = $xpath->query("//a[@class='folded-fond-gris']");
foreach ($nodelist as $n){
$href = $n->getAttribute('href');
preg_match("/javascript:reveal\(\'(.*)\'\)/", $href, $matches);
$refs[] = array( 'ref' => $matches[1] );
}
$listeRef = array();
foreach($refs as $item){
$listeRef[] = $item['ref'];
}
if ( $opts->visu && in_array($opts->visu, $listeRef) )
{
//Pour toutes les commandes en dehors de la toute dernière
if ( array_search($opts->visu, $listeRef)!=0 ){
$url = "http://www.infogreffe.fr/infogreffe/chargerContenuCommande.do?refCde=".$opts->visu."&_=";
$result = getPage($url);
if (DEBUG) file_put_contents('documents-'.$ref['ref'].'.html', $result['content']);
}
echo "Téléchargement du kbis...\n";
infogreffeKbis($opts->visu);
//Lancer WKHTMLTOPDF pour le PDF
} elseif ( !$opts->visu ) {
/**************************************************
Reveal : Ajax pour mettre à jour le div
**************************************************/
// Parcourir le fichier précédent et relevé toute les références de commandes
// http://www.infogreffe.fr/infogreffe/chargerContenuCommande.do?refCde=XNDAE&_=
// Dans javascript la requete est réalisé en GET
$i = 0;
foreach ( $refs as $ref )
{
if ($i!=0){
$url = "http://www.infogreffe.fr/infogreffe/chargerContenuCommande.do?refCde=".$ref['ref']."&_=";
$result = getPage($url);
if (DEBUG) file_put_contents('documents-'.$ref['ref'].'.html', $result['content']);
//@todo : Recup des informations
$info = array();
$info = parseRef($document);
}
echo $ref['ref'];
if ( $info['ref']==$ref['ref'] ) {
if ( !empty($info['rs']) ) echo " ".$info['rs'];
if ( !empty($info['rcs']) ) echo " ".$info['rcs'];
}
echo "\n";
$i++;
}
}
}
// Commande d'un kbis
if ( $opts->siren )
{
//Vérification du siren
if (strlen($opts->siren)!=9) {
echo "Erreur SIREN invalide\n"; exit;
}
$referer = '';
/**************************************************
Connexion
**************************************************/
infogreffeConnexion();
/**************************************************
Affichage formulaire
**************************************************/
$url = "http://www.infogreffe.fr/infogreffe/index.jsp";
$result = getPage($url);
if (DEBUG) file_put_contents('kbis1.html', $result['content']);
/**************************************************
Soumission formulaire
**************************************************/
$fields = array(
'commune' => '',
'denomination' => '',
'departement' => '',
//'elargirSecondaire' => 'on',
'elargirRadie' => 'on',
'siren' => $opts->siren,
);
$url = "http://www.infogreffe.fr/infogreffe/newRechercheEntreprise.xml";
$result = getPage($url, $fields);
if (DEBUG) file_put_contents('kbis2.html', $result['content']);
// 302 Moved Temporarily - But we always use this URL
$url = 'http://www.infogreffe.fr/infogreffe/entrepRech.do';
/**************************************************
Affichage identite entreprise
**************************************************/
//url defini plus haut
$result = getPage($url, '', array(CURLOPT_FOLLOWLOCATION => true));
if (DEBUG) file_put_contents('kbis3.html', $result['content']);
/*
* !! Attention !! Elargir aux radiés peut retourner plusieurs résultats
*/
$doc = new DOMDocument();
$doc->strictErrorChecking = false;
$doc->preserveWhiteSpace = false;
@$doc->loadHTML($result['content']);
$xpath = new DOMXpath($doc);
$nodelist = $xpath->query("//div[@id='includeEntrepListe']");
if ($nodelist->length>0) {
$entries = $xpath->query("//a[@class='company']");
foreach ($entries as $n) {
$url = 'http://www.infogreffe.fr'. $n->getAttribute('href');
break;
}
$result = getPage($url);
}
/*
* Try to detect we can pass an order, else send message and exit
*
* Si la recherche a fonctionné
* div[@id="libelleRcsGreffe"], nodeValue = "552 144 503 R.C.S. PARIS"
*
* Si pas de résultats lors de la recherche alors
* table[@class='liste-res-rech']/tbody/tr[1]/td[1]/div/span[1]
*/
$doc = new DOMDocument();
$doc->strictErrorChecking = false;
$doc->preserveWhiteSpace = false;
@$doc->loadHTML($result['content']);
$xpath = new DOMXpath($doc);
$nodelist = $xpath->query("//div[@id='libelleRcsGreffe']");
if ($nodelist->length==0) {
$entries = $xpath->query("//table[@class='liste-res-rech']/tbody/tr[1]/td[1]/div/span");
if ($entries->length>0) {
echo trim($entries->item(0)->nodeValue);
} else {
echo "ERREUR";
}
if (file_exists($ckfile)) unlink($ckfile);
exit;
}
/*
* Si les documents sont disponibles
* <a onclick="reveal('kbis');return false" href="#" id="a_kbis" class="folded">Extrait RCS (Kbis)</a>
*
* Sinon rechercher la valeur du message
* "//div[@id='conteneur']/table/tbody/tr/td/table/tbody/tr[2]/td/table[2]/tbody/tr[last()-2]/td";
*/
$nodelist = $xpath->query("//a[@id='a_kbis']");
if ($nodelist->length==0) {
$div = $xpath->query("//div[@id='conteneur']");
$context = $div->item(0);
$entries = $xpath->query("table/tbody/tr/td/table/tbody/tr[2]/td/table[2]/tbody/tr[9]/td", $context);
$message = $entries->item(0)->nodeValue;
echo trim($message);
if (file_exists($ckfile)) unlink($ckfile);
exit;
}
// 302 Moved Temporarily
//http://www.infogreffe.fr/infogreffe/entrepListe.do?index=rcs
if ( $result['header']['http_code']=='302' && array_key_exists('redirect_url', $result['header']) ) {
$url = $result['header']['redirect_url'];
$result = getPage($url);
if (DEBUG) file_put_contents('kbis3-1.html', $result['content']);
// => /infogreffe/getEntrepDetail.do?docId=780308B042410000
$doc = new DOMDocument();
$doc->strictErrorChecking = false;
$doc->preserveWhiteSpace = false;
@$doc->loadHTML($result['content']);
$xpath = new DOMXpath($doc);
$nodelist = $xpath->query("//a[@class='company']");
foreach ($nodelist as $n) {
$url = 'http://www.infogreffe.fr'. $n->getAttribute('href');
break;
}
$result = getPage($url);
if (DEBUG) file_put_contents('kbis3-2.html', $result['content']);
} elseif ( $result['header']['http_code']=='302' ) {
$url = 'http://www.infogreffe.fr/weblogic/infogreffe/entrepListe.do?index=rcs';
$result = getPage($url);
if (DEBUG) file_put_contents('kbis3-1.html', print_r($result,1));
$doc = new DOMDocument();
$doc->strictErrorChecking = false;
$doc->preserveWhiteSpace = false;
@$doc->loadHTML($result['content']);
$xpath = new DOMXpath($doc);
$nodelist = $xpath->query("//a[@class='company']");
foreach ($nodelist as $n) {
$url = 'http://www.infogreffe.fr'. $n->getAttribute('href');
break;
}
$result = getPage($url);
if (DEBUG) file_put_contents('kbis3-2.html', $result['content']);
}
$fields = array();
//Recherche des infos pour la validation du formulaire
$doc = new DOMDocument();
$doc->strictErrorChecking = false;
$doc->preserveWhiteSpace = false;
@$doc->loadHTML($result['content']);
$xpath = new DOMXpath($doc);
$nodelist = $xpath->query("//form[@name='FicheEntrepriseForm']/div[@id='conteneur']/input[@type='hidden']");
foreach ($nodelist as $n) {
$key = $n->getAttribute('name');
$value = $n->getAttribute('value');
$fields[$key] = $value;
}
$fields['montantTotalSelection'] = '5.44'; // 3.11 + 2.33
$fields['extraitForm.visualisation'] = 'on'; // => Définit par javascript
$fields['extraitForm.formVisible'] = 'true';
$fields['extraitForm.envoiPeriodiqueForm.periodicite'] = '1';
$fields['extraitForm.envoiPeriodiqueForm.duree'] = '12';
$fields['etatEndettementForm.formVisible'] = 'true';
$fields['etatEndettementForm.etatComplet'] = 'false';
$fields['listeActesForm.formVisible'] = 'true';
$fields['historiqueModificationsForm.formVisible'] = 'true';
$fields['historiqueModificationsForm.tri'] = '0';
$fields['procedureCollectiveForm.formVisible'] = 'true';
$fields['dossierCompletForm.formVisible'] = 'true';
if (DEBUG) print_r($fields);
/**************************************************
Validation de la selection
**************************************************/
$url = "http://www.infogreffe.fr/infogreffe/validerSelectionFicheEntreprise.xml";
$result = getPage($url, $fields);
if (DEBUG) file_put_contents('kbis4.html', print_r($result,1));
/**************************************************
Valider la commande
**************************************************/
$url = "http://www.infogreffe.fr/infogreffe/validerPanierAbonne.do?forcerSaisieCoordonnees=false";
$fields = array();
$result = getPage($url, $fields);
if (DEBUG) file_put_contents('kbis5.html', $result['content']);
/**************************************************
Afficher mes achats et prendre la première commande
**************************************************/
$url = "http://www.infogreffe.fr/infogreffe/afficherMesAchats.do?refCde=N&pageAppel=validerPanier";
$fields = array();
$result = getPage($url, $fields);
if (DEBUG) file_put_contents('kbis6.html', $result['content']);
$info = parseRef($result['content']);
$ref = $info['ref'];
/**************************************************
Visualiser
**************************************************/
$identifiant = infogreffeKbis($ref);
//Suppression fichier
if (file_exists($ckfile)) unlink($ckfile);
if ($identifiant===false || $identifiant!=$opts->siren) {
echo "ERREUR";
exit;
}
echo $identifiant.'-'.$ref.'.html';
}