Récupération du bodacc PDF

Modification de la récupération du bodacc pdf pour gérer les témoins de
publication unitaire
This commit is contained in:
Michael RICOIS 2017-01-17 13:51:10 +01:00
parent e8d13e6044
commit db1fb57858
2 changed files with 122 additions and 174 deletions

View File

@ -162,31 +162,30 @@ class JuridiqueController extends Zend_Controller_Action
$dateTemoinB = DateTime::createFromFormat('Ymd', '20161011');
$dateTemoinC = DateTime::createFromFormat('Ymd', '20160223');
$params = null;
if ($annonce['Code'] == 'BODA' && $datePublication >= $dateTemoinA) {
$lienBodacc = $this->view->url(array('controller'=>'juridique',
'action'=>'temoinpdf', 'type'=>substr($annonce['Code'],3,1),
'annee'=>$annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
$this->view->assign('bodaccLink', $lienBodacc);
$params = array('unit'=>1);
$this->view->assign('bodaccLinkLabel', 'Télécharger le témoin de publication');
} elseif ($annonce['Code'] == 'BODB' && $datePublication >= $dateTemoinB) {
$lienBodacc = $this->view->url(array('controller'=>'juridique',
'action'=>'temoinpdf', 'type'=>substr($annonce['Code'],3,1),
'annee'=>$annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
$this->view->assign('bodaccLink', $lienBodacc);
$params = array('unit'=>1);
$this->view->assign('bodaccLinkLabel', 'Télécharger le témoin de publication');
} elseif ($annonce['Code'] == 'BODC' && $datePublication >= $dateTemoinC) {
$lienBodacc = $this->view->url(array('controller'=>'juridique',
'action'=>'temoinpdf', 'type'=>substr($annonce['Code'],3,1),
'annee'=>$annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
$this->view->assign('bodaccLink', $lienBodacc);
$params = array('unit'=>1);
$this->view->assign('bodaccLinkLabel', 'Télécharger le témoin de publication');
} elseif ($datePublication >= $dateBodacc) {
$lienBodacc = $this->view->url(array('controller'=>'juridique',
'action'=> 'bodaccpdf', 'type'=>substr($annonce['Code'],3,1),
'annee' => $annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
$this->view->assign('bodaccLink', $lienBodacc);
$params = array();
$this->view->assign('bodaccLinkLabel', 'Télécharger le bulletin officiel');
}
if ($params !== null) {
$params = array_merge($params, array('controller'=>'juridique',
'action'=>'bodaccpdf', 'siren'=>$siren, 'type'=>substr($annonce['Code'],3,1),
'parution'=>$annonce['Annee'].str_pad($ann->BodaccNum, 4, '0', STR_PAD_LEFT),
'annonce'=>$ann->NumAnnonce
));
$lienBodacc = $this->view->url($params, 'default', true);
$this->view->assign('bodaccLink', $lienBodacc);
}
}
$this->renderScript('juridique/annonce.phtml');
@ -541,39 +540,36 @@ class JuridiqueController extends Zend_Controller_Action
$this->_helper->viewRenderer->setNoRender(true);
$request = $this->getRequest();
$siren = $request->getParam('siren');
$type = $request->getParam('type');
$annee = $request->getParam('annee');
$num = $request->getParam('num');
$num = str_pad($num, 4, '0', STR_PAD_LEFT);
$parution = $request->getParam('parution');
$annonce = $request->getParam('annonce');
$unit = $request->getParam('unit');
$annee = substr($parution,0,4);
$c = Zend_Registry::get('config');
$file = $c->profil->path->shared.'/persist/bodacc/'.$type.'/'.$annee.'/'.
'BODACC_'.$type.'_'.$annee.'_'.$num.'.pdf';
$path = $c->profil->path->shared.'/persist/bodacc/'.$type.'/'.$annee;
if ($unit == 1) {
$file = $path."/BODACC_".$type."_".$annee."_".substr($parution,4)."_".$annonce.".pdf";
} else {
$file = $path."/BODACC_".$type."_".$annee."_".substr($parution,4).".pdf";
}
if ( !file_exists($file) ) {
exec('php ' . APPLICATION_PATH . "/../scripts/jobs/getBodaccPdf.php $type $annee $num >> getBodaccPdf.log");
if (!file_exists($file)) {
$cli = "/../scripts/jobs/getBodaccPdf.php";
$params = "--siren ".$siren."--type ".$type." --parution ".$parution." --annonce ".$annonce;
exec('php ' . APPLICATION_PATH . "$cli $params >> getBodaccPdf.log");
}
if (file_exists($file)) {
$href = $this->view->url(array('module'=>'file', 'controller'=>'bodacc', 'action'=>'actual',
'q' => basename($file)), 'default', true);
$href = $this->view->url(array('module'=>'file', 'controller'=>'bodacc',
'action'=>'actual', 'q' => basename($file)), 'default', true);
echo "<a target=\"_blank\" href=\"".$href."\">Cliquer ici pour télécharger le fichier.</a>";
} else {
echo "Erreur lors du chargement du fichier.";
}
}
/**
* Gestion du téléchargement du témoin de publication au bodacc
*/
public function temoinpdfAction()
{
$this->_helper->layout()->disableLayout();
$this->_helper->viewRenderer->setNoRender(true);
$request = $this->getRequest();
}
public function annoncenumAction()
{
$this->_helper->layout()->disableLayout();

View File

@ -1,149 +1,101 @@
<?php
/**
* Download bodacc pdf file
*/
if ($argc != 4){
echo $argv[0].' type annee num';
echo "\n";
exit;
// --- Define path to application directory
defined('APPLICATION_PATH')
|| define('APPLICATION_PATH', realpath(__DIR__ . '/../../application'));
// --- Define application environment
defined('APPLICATION_ENV')
|| define('APPLICATION_ENV', (getenv('APPLICATION_ENV') ? getenv('APPLICATION_ENV') : 'production'));
// --- Composer autoload
require_once realpath(__DIR__ . '/../../vendor/autoload.php');
// --- Create application, bootstrap, and run
$application = new Zend_Application(APPLICATION_ENV, APPLICATION_PATH . '/configs/application.ini');
// --- Options
$displayUsage = false;
try {
$opts = new Zend_Console_Getopt(array(
'help|?' => "Affiche l'aide.",
'verbose|v' => "Mode verbeux",
'siren=s' => "SIREN",
'type=s' => "Edition",
'parution=s' => "Numéro bodacc AAAAnnnn",
'annonce=s' => "Numéro annonce",
));
$opts->parse();
} catch (Zend_Console_Getopt_Exception $e) {
$displayUsage = true;
}
$type = strtoupper($argv[1]);
if (!in_array($type, array('A', 'B', 'C'))) {
exit;
// --- Aide / Options
if (count($opts->getOptions())==0 || isset($opts->help)) {
$displayUsage = true;
}
$annee = $argv[2];
$num = $argv[3];
$num = str_pad($num, 4, '0', STR_PAD_LEFT);
$annonce = 1;
function getPage($url, $curl_data = '', $verbose=false)
{
global $ckfile;
//$user_agent = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1';
//$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
$user_agent = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)';
$post = false;
$fields = '';
if (is_array($curl_data) && count($curl_data)>0) {
foreach($curl_data as $key=>$value) {
$fields .= $key.'='.$value.'&';
}
rtrim($fields,'&');
$post = true;
}
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_USERAGENT => $user_agent, // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLOPT_POST => $post, // i am sending post data
CURLOPT_POSTFIELDS => $fields, // this are my post vars
CURLOPT_SSL_VERIFYHOST => 0, // don't verify ssl
CURLOPT_SSL_VERIFYPEER => false, //
CURLOPT_VERBOSE => $verbose , //
//CURLOPT_COOKIESESSION => true,
CURLOPT_COOKIEFILE => $ckfile,
CURLOPT_COOKIEJAR => $ckfile, // Stockage du cookie de session
);
$ch = curl_init($url);
curl_setopt_array($ch,$options);
$content = curl_exec($ch);
$err = curl_errno($ch);
$errmsg = curl_error($ch) ;
$header = curl_getinfo($ch);
curl_close($ch);
// $header['errno'] = $err;
// $header['errmsg'] = $errmsg;
// $header['content'] = $content;
return array('header'=>$header, 'content'=>$content);
}
//==> Start
$debug = false;
$config = parse_ini_file(dirname(__FILE__).'/../../application/configs/application.ini', true);
$directory = $config['production']['profil.path.shared'].'/persist/bodacc/'.$type.'/'.$annee;
if ( !file_exists($directory) ) mkdir($directory, 0777, true);
//Define cookie file for storage
$ckfile = dirname(__FILE__).'/'.uniqid('cookies-');
$site = 'http://www.bodacc.fr';
//Recherche avancée
echo $url = $site.'/annonce/rechercheavancee';
echo "\n";
$result = getPage($url, $data);
if ($debug) file_put_contents('bodacc1.html', $result['content']);
$url = $site."/annonce/liste";
echo $url."\n";
$data = array(
'categorieannonce' => 'tout',
'commercant' => '',
'datepublicationmax' => '',
'datepublicationmin' => '',
'motscles' => '',
'numeroannonce' => '1',
'numerodepartement' => 'tout',
'numeroparution' => $annee.$num,
'publication' => $type,
'registre' => '',
'typeannonce' => 'tout',
);
$result = getPage($url, $data);
if ($debug) file_put_contents('bodacc2.html', $result['content']);
preg_match('/\<a href="\/annonce\/detail-annonce\/(.*)"\>/', $result['content'], $matches);
$url = null;
if (count($matches) > 0) {
$url = $site.'/annonce/detail-annonce/'.$matches[1];
echo $url."\n";
}
if (empty($url)) {
echo "Erreur Detail de l'annonce.\n";
// --- Usage
if ($displayUsage) {
echo "Télécharge le pdf (entier|temoin) de publication au BODACC";
echo "\n\n";
echo $opts->getUsageMessage();
echo "\n";
exit;
}
//Affichage de l'annonce
$result = getPage($url);
if ($debug) file_put_contents('bodacc3.html', $result['content']);
/*
<p>
<a href="/annonce/telecharger/EBODACC-A_20130013_0001_p000.pdf" title="Téléchargez le témoin de publication de l'annonce n°1 du Bodacc A n°20130013 du 18/01/2013 - Format pdf ">
<img src="/extension/dilabodacc/design/dilabodacc/images/deco/temoin.png" alt="" class="temoin">
Téléchargez le témoin de publication
</a>
</p>
*/
preg_match('/\<a href="\/annonce\/telecharger\/(.*)"/', $result['content'], $matches);
echo $url = $site.'/annonce/telecharger/'.$matches[1];
echo "\n";
if (empty($url)) {
echo "Erreur téléchargement du PDF.\n";
exit;
$c = new Zend_Config($application->getOptions());
$baseUrl = "http://www.bodacc.fr/";
// Crawler
if ($opts->verbose) {
echo "Démarrage du crawl\n";
}
$client = new \Goutte\Client();
$crawler = $client->request('GET', $baseUrl);
$crawler = $client->click($crawler->selectLink("Recherche avancée")->link());
$form = $crawler->selectButton("Lancer la recherche")->form();
if ($opts->verbose) {
echo "Soumission du formulaire\n";
}
$crawler = $client->submit($form, array(
'registre' => $opts->siren,
'publication' => $opts->type,
'numeroparution' => $opts->parution,
'numeroannonce' => $opts->annonce,
));
$result = $crawler->filterXPath('//tr[@class="pair"]')->first();
$annonceLink = $result->filterXPath('//a')->attr('href');
if ($opts->verbose) {
echo "Lien :".$annonceLink."\n";
}
//echo $annonceLink."\n";
$crawler = $client->request('GET', $baseUrl.$annonceLink);
$result = $crawler->filter('.pdf-unit')->first();
$pdfLink = $result->filterXPath('//a')->attr('href');
// PDF Complet : BODACC-B_20150155_0001_p000.pdf => BODACC_{type}_{annee}_{parution}.pdf
// PDF Unitaire : BODACC_A_PDF_Unitaire_20170011_00001.pdf => BODACC_{type}_{annee}_{parution}_{annonce}.pdf
$pos = strrpos($pdfLink, 'BODACC');
$pdfName = substr($pdfLink, $pos);
// Pdf Unitaire
if (strpos($pdfName, 'Unitaire')) {
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4)."_".$opts->annonce.".pdf";
}
// Pdf Complet
else {
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4).".pdf";
}
//Téléchargement du PDF
$result = getPage($url);
if (substr($result['content'],0,4)!='%PDF'){
if ($debug) file_put_contents('bodacc4.html', $result['content']);
echo "Erreur téléchargement du PDF.\n";
exit;
// Download
$annee = substr($opts->parution, 0, 4);
$path = $c->profil->path->shared.'/persit/bodacc/'.$opts->type.'/'.$annee;
$dlClient = new GuzzleHttp\Client();
try {
$dlClient->request('GET', $baseUrl.$pdfLink, ['sink' => $path.'/'.$pdfName]);
} catch (\GuzzleHttp\Exception $e) {
echo date('Y-m-d H:i:s')." - Erreur Téléchargement du PDF $pdfName.\n";
exit(1);
}
file_put_contents($directory."/BODACC_".$type."_".$annee."_".$num.".pdf", $result['content']);
echo "\n";
if (file_exists($ckfile)) unlink($ckfile);
exit(0);