Récupération du bodacc PDF
Modification de la récupération du bodacc pdf pour gérer les témoins de publication unitaire
This commit is contained in:
parent
e8d13e6044
commit
db1fb57858
@ -162,31 +162,30 @@ class JuridiqueController extends Zend_Controller_Action
|
||||
$dateTemoinB = DateTime::createFromFormat('Ymd', '20161011');
|
||||
$dateTemoinC = DateTime::createFromFormat('Ymd', '20160223');
|
||||
|
||||
$params = null;
|
||||
if ($annonce['Code'] == 'BODA' && $datePublication >= $dateTemoinA) {
|
||||
$lienBodacc = $this->view->url(array('controller'=>'juridique',
|
||||
'action'=>'temoinpdf', 'type'=>substr($annonce['Code'],3,1),
|
||||
'annee'=>$annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
|
||||
$this->view->assign('bodaccLink', $lienBodacc);
|
||||
$params = array('unit'=>1);
|
||||
$this->view->assign('bodaccLinkLabel', 'Télécharger le témoin de publication');
|
||||
} elseif ($annonce['Code'] == 'BODB' && $datePublication >= $dateTemoinB) {
|
||||
$lienBodacc = $this->view->url(array('controller'=>'juridique',
|
||||
'action'=>'temoinpdf', 'type'=>substr($annonce['Code'],3,1),
|
||||
'annee'=>$annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
|
||||
$this->view->assign('bodaccLink', $lienBodacc);
|
||||
$params = array('unit'=>1);
|
||||
$this->view->assign('bodaccLinkLabel', 'Télécharger le témoin de publication');
|
||||
} elseif ($annonce['Code'] == 'BODC' && $datePublication >= $dateTemoinC) {
|
||||
$lienBodacc = $this->view->url(array('controller'=>'juridique',
|
||||
'action'=>'temoinpdf', 'type'=>substr($annonce['Code'],3,1),
|
||||
'annee'=>$annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
|
||||
$this->view->assign('bodaccLink', $lienBodacc);
|
||||
$params = array('unit'=>1);
|
||||
$this->view->assign('bodaccLinkLabel', 'Télécharger le témoin de publication');
|
||||
} elseif ($datePublication >= $dateBodacc) {
|
||||
$lienBodacc = $this->view->url(array('controller'=>'juridique',
|
||||
'action'=> 'bodaccpdf', 'type'=>substr($annonce['Code'],3,1),
|
||||
'annee' => $annonce['Annee'], 'num'=>$annonce['Num']), 'default', true);
|
||||
$this->view->assign('bodaccLink', $lienBodacc);
|
||||
$params = array();
|
||||
$this->view->assign('bodaccLinkLabel', 'Télécharger le bulletin officiel');
|
||||
}
|
||||
|
||||
if ($params !== null) {
|
||||
$params = array_merge($params, array('controller'=>'juridique',
|
||||
'action'=>'bodaccpdf', 'siren'=>$siren, 'type'=>substr($annonce['Code'],3,1),
|
||||
'parution'=>$annonce['Annee'].str_pad($ann->BodaccNum, 4, '0', STR_PAD_LEFT),
|
||||
'annonce'=>$ann->NumAnnonce
|
||||
));
|
||||
$lienBodacc = $this->view->url($params, 'default', true);
|
||||
$this->view->assign('bodaccLink', $lienBodacc);
|
||||
}
|
||||
}
|
||||
|
||||
$this->renderScript('juridique/annonce.phtml');
|
||||
@ -541,39 +540,36 @@ class JuridiqueController extends Zend_Controller_Action
|
||||
$this->_helper->viewRenderer->setNoRender(true);
|
||||
$request = $this->getRequest();
|
||||
|
||||
$siren = $request->getParam('siren');
|
||||
$type = $request->getParam('type');
|
||||
$annee = $request->getParam('annee');
|
||||
$num = $request->getParam('num');
|
||||
$num = str_pad($num, 4, '0', STR_PAD_LEFT);
|
||||
$parution = $request->getParam('parution');
|
||||
$annonce = $request->getParam('annonce');
|
||||
$unit = $request->getParam('unit');
|
||||
$annee = substr($parution,0,4);
|
||||
|
||||
$c = Zend_Registry::get('config');
|
||||
$file = $c->profil->path->shared.'/persist/bodacc/'.$type.'/'.$annee.'/'.
|
||||
'BODACC_'.$type.'_'.$annee.'_'.$num.'.pdf';
|
||||
$path = $c->profil->path->shared.'/persist/bodacc/'.$type.'/'.$annee;
|
||||
if ($unit == 1) {
|
||||
$file = $path."/BODACC_".$type."_".$annee."_".substr($parution,4)."_".$annonce.".pdf";
|
||||
} else {
|
||||
$file = $path."/BODACC_".$type."_".$annee."_".substr($parution,4).".pdf";
|
||||
}
|
||||
|
||||
if ( !file_exists($file) ) {
|
||||
exec('php ' . APPLICATION_PATH . "/../scripts/jobs/getBodaccPdf.php $type $annee $num >> getBodaccPdf.log");
|
||||
if (!file_exists($file)) {
|
||||
$cli = "/../scripts/jobs/getBodaccPdf.php";
|
||||
$params = "--siren ".$siren."--type ".$type." --parution ".$parution." --annonce ".$annonce;
|
||||
exec('php ' . APPLICATION_PATH . "$cli $params >> getBodaccPdf.log");
|
||||
}
|
||||
|
||||
if (file_exists($file)) {
|
||||
$href = $this->view->url(array('module'=>'file', 'controller'=>'bodacc', 'action'=>'actual',
|
||||
'q' => basename($file)), 'default', true);
|
||||
$href = $this->view->url(array('module'=>'file', 'controller'=>'bodacc',
|
||||
'action'=>'actual', 'q' => basename($file)), 'default', true);
|
||||
echo "<a target=\"_blank\" href=\"".$href."\">Cliquer ici pour télécharger le fichier.</a>";
|
||||
} else {
|
||||
echo "Erreur lors du chargement du fichier.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gestion du téléchargement du témoin de publication au bodacc
|
||||
*/
|
||||
public function temoinpdfAction()
|
||||
{
|
||||
$this->_helper->layout()->disableLayout();
|
||||
$this->_helper->viewRenderer->setNoRender(true);
|
||||
$request = $this->getRequest();
|
||||
|
||||
}
|
||||
|
||||
public function annoncenumAction()
|
||||
{
|
||||
$this->_helper->layout()->disableLayout();
|
||||
|
@ -1,149 +1,101 @@
|
||||
<?php
|
||||
/**
|
||||
* Download bodacc pdf file
|
||||
*/
|
||||
if ($argc != 4){
|
||||
echo $argv[0].' type annee num';
|
||||
echo "\n";
|
||||
exit;
|
||||
// --- Define path to application directory
|
||||
defined('APPLICATION_PATH')
|
||||
|| define('APPLICATION_PATH', realpath(__DIR__ . '/../../application'));
|
||||
|
||||
// --- Define application environment
|
||||
defined('APPLICATION_ENV')
|
||||
|| define('APPLICATION_ENV', (getenv('APPLICATION_ENV') ? getenv('APPLICATION_ENV') : 'production'));
|
||||
|
||||
// --- Composer autoload
|
||||
require_once realpath(__DIR__ . '/../../vendor/autoload.php');
|
||||
|
||||
// --- Create application, bootstrap, and run
|
||||
$application = new Zend_Application(APPLICATION_ENV, APPLICATION_PATH . '/configs/application.ini');
|
||||
|
||||
// --- Options
|
||||
$displayUsage = false;
|
||||
try {
|
||||
$opts = new Zend_Console_Getopt(array(
|
||||
'help|?' => "Affiche l'aide.",
|
||||
'verbose|v' => "Mode verbeux",
|
||||
'siren=s' => "SIREN",
|
||||
'type=s' => "Edition",
|
||||
'parution=s' => "Numéro bodacc AAAAnnnn",
|
||||
'annonce=s' => "Numéro annonce",
|
||||
));
|
||||
$opts->parse();
|
||||
} catch (Zend_Console_Getopt_Exception $e) {
|
||||
$displayUsage = true;
|
||||
}
|
||||
|
||||
$type = strtoupper($argv[1]);
|
||||
if (!in_array($type, array('A', 'B', 'C'))) {
|
||||
exit;
|
||||
// --- Aide / Options
|
||||
if (count($opts->getOptions())==0 || isset($opts->help)) {
|
||||
$displayUsage = true;
|
||||
}
|
||||
|
||||
$annee = $argv[2];
|
||||
|
||||
$num = $argv[3];
|
||||
$num = str_pad($num, 4, '0', STR_PAD_LEFT);
|
||||
|
||||
$annonce = 1;
|
||||
|
||||
function getPage($url, $curl_data = '', $verbose=false)
|
||||
{
|
||||
global $ckfile;
|
||||
|
||||
//$user_agent = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1';
|
||||
//$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
|
||||
$user_agent = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)';
|
||||
|
||||
$post = false;
|
||||
$fields = '';
|
||||
if (is_array($curl_data) && count($curl_data)>0) {
|
||||
foreach($curl_data as $key=>$value) {
|
||||
$fields .= $key.'='.$value.'&';
|
||||
}
|
||||
rtrim($fields,'&');
|
||||
$post = true;
|
||||
}
|
||||
|
||||
$options = array(
|
||||
CURLOPT_RETURNTRANSFER => true, // return web page
|
||||
CURLOPT_HEADER => false, // don't return headers
|
||||
CURLOPT_FOLLOWLOCATION => true, // follow redirects
|
||||
CURLOPT_ENCODING => "", // handle all encodings
|
||||
CURLOPT_USERAGENT => $user_agent, // who am i
|
||||
CURLOPT_AUTOREFERER => true, // set referer on redirect
|
||||
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
|
||||
CURLOPT_TIMEOUT => 120, // timeout on response
|
||||
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
|
||||
CURLOPT_POST => $post, // i am sending post data
|
||||
CURLOPT_POSTFIELDS => $fields, // this are my post vars
|
||||
CURLOPT_SSL_VERIFYHOST => 0, // don't verify ssl
|
||||
CURLOPT_SSL_VERIFYPEER => false, //
|
||||
CURLOPT_VERBOSE => $verbose , //
|
||||
//CURLOPT_COOKIESESSION => true,
|
||||
CURLOPT_COOKIEFILE => $ckfile,
|
||||
CURLOPT_COOKIEJAR => $ckfile, // Stockage du cookie de session
|
||||
);
|
||||
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch,$options);
|
||||
$content = curl_exec($ch);
|
||||
$err = curl_errno($ch);
|
||||
$errmsg = curl_error($ch) ;
|
||||
$header = curl_getinfo($ch);
|
||||
curl_close($ch);
|
||||
|
||||
// $header['errno'] = $err;
|
||||
// $header['errmsg'] = $errmsg;
|
||||
// $header['content'] = $content;
|
||||
return array('header'=>$header, 'content'=>$content);
|
||||
}
|
||||
|
||||
//==> Start
|
||||
$debug = false;
|
||||
|
||||
$config = parse_ini_file(dirname(__FILE__).'/../../application/configs/application.ini', true);
|
||||
$directory = $config['production']['profil.path.shared'].'/persist/bodacc/'.$type.'/'.$annee;
|
||||
if ( !file_exists($directory) ) mkdir($directory, 0777, true);
|
||||
|
||||
//Define cookie file for storage
|
||||
$ckfile = dirname(__FILE__).'/'.uniqid('cookies-');
|
||||
|
||||
$site = 'http://www.bodacc.fr';
|
||||
|
||||
//Recherche avancée
|
||||
echo $url = $site.'/annonce/rechercheavancee';
|
||||
echo "\n";
|
||||
$result = getPage($url, $data);
|
||||
if ($debug) file_put_contents('bodacc1.html', $result['content']);
|
||||
|
||||
$url = $site."/annonce/liste";
|
||||
echo $url."\n";
|
||||
$data = array(
|
||||
'categorieannonce' => 'tout',
|
||||
'commercant' => '',
|
||||
'datepublicationmax' => '',
|
||||
'datepublicationmin' => '',
|
||||
'motscles' => '',
|
||||
'numeroannonce' => '1',
|
||||
'numerodepartement' => 'tout',
|
||||
'numeroparution' => $annee.$num,
|
||||
'publication' => $type,
|
||||
'registre' => '',
|
||||
'typeannonce' => 'tout',
|
||||
);
|
||||
$result = getPage($url, $data);
|
||||
if ($debug) file_put_contents('bodacc2.html', $result['content']);
|
||||
preg_match('/\<a href="\/annonce\/detail-annonce\/(.*)"\>/', $result['content'], $matches);
|
||||
$url = null;
|
||||
if (count($matches) > 0) {
|
||||
$url = $site.'/annonce/detail-annonce/'.$matches[1];
|
||||
echo $url."\n";
|
||||
}
|
||||
if (empty($url)) {
|
||||
echo "Erreur Detail de l'annonce.\n";
|
||||
// --- Usage
|
||||
if ($displayUsage) {
|
||||
echo "Télécharge le pdf (entier|temoin) de publication au BODACC";
|
||||
echo "\n\n";
|
||||
echo $opts->getUsageMessage();
|
||||
echo "\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
//Affichage de l'annonce
|
||||
$result = getPage($url);
|
||||
if ($debug) file_put_contents('bodacc3.html', $result['content']);
|
||||
/*
|
||||
<p>
|
||||
<a href="/annonce/telecharger/EBODACC-A_20130013_0001_p000.pdf" title="Téléchargez le témoin de publication de l'annonce n°1 du Bodacc A n°20130013 du 18/01/2013 - Format pdf ">
|
||||
<img src="/extension/dilabodacc/design/dilabodacc/images/deco/temoin.png" alt="" class="temoin">
|
||||
Téléchargez le témoin de publication
|
||||
</a>
|
||||
</p>
|
||||
*/
|
||||
preg_match('/\<a href="\/annonce\/telecharger\/(.*)"/', $result['content'], $matches);
|
||||
echo $url = $site.'/annonce/telecharger/'.$matches[1];
|
||||
echo "\n";
|
||||
if (empty($url)) {
|
||||
echo "Erreur téléchargement du PDF.\n";
|
||||
exit;
|
||||
$c = new Zend_Config($application->getOptions());
|
||||
|
||||
$baseUrl = "http://www.bodacc.fr/";
|
||||
|
||||
// Crawler
|
||||
if ($opts->verbose) {
|
||||
echo "Démarrage du crawl\n";
|
||||
}
|
||||
$client = new \Goutte\Client();
|
||||
$crawler = $client->request('GET', $baseUrl);
|
||||
$crawler = $client->click($crawler->selectLink("Recherche avancée")->link());
|
||||
$form = $crawler->selectButton("Lancer la recherche")->form();
|
||||
if ($opts->verbose) {
|
||||
echo "Soumission du formulaire\n";
|
||||
}
|
||||
$crawler = $client->submit($form, array(
|
||||
'registre' => $opts->siren,
|
||||
'publication' => $opts->type,
|
||||
'numeroparution' => $opts->parution,
|
||||
'numeroannonce' => $opts->annonce,
|
||||
));
|
||||
$result = $crawler->filterXPath('//tr[@class="pair"]')->first();
|
||||
$annonceLink = $result->filterXPath('//a')->attr('href');
|
||||
if ($opts->verbose) {
|
||||
echo "Lien :".$annonceLink."\n";
|
||||
}
|
||||
//echo $annonceLink."\n";
|
||||
$crawler = $client->request('GET', $baseUrl.$annonceLink);
|
||||
$result = $crawler->filter('.pdf-unit')->first();
|
||||
$pdfLink = $result->filterXPath('//a')->attr('href');
|
||||
|
||||
// PDF Complet : BODACC-B_20150155_0001_p000.pdf => BODACC_{type}_{annee}_{parution}.pdf
|
||||
// PDF Unitaire : BODACC_A_PDF_Unitaire_20170011_00001.pdf => BODACC_{type}_{annee}_{parution}_{annonce}.pdf
|
||||
$pos = strrpos($pdfLink, 'BODACC');
|
||||
$pdfName = substr($pdfLink, $pos);
|
||||
// Pdf Unitaire
|
||||
if (strpos($pdfName, 'Unitaire')) {
|
||||
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4)."_".$opts->annonce.".pdf";
|
||||
}
|
||||
// Pdf Complet
|
||||
else {
|
||||
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4).".pdf";
|
||||
}
|
||||
|
||||
//Téléchargement du PDF
|
||||
$result = getPage($url);
|
||||
if (substr($result['content'],0,4)!='%PDF'){
|
||||
if ($debug) file_put_contents('bodacc4.html', $result['content']);
|
||||
echo "Erreur téléchargement du PDF.\n";
|
||||
exit;
|
||||
// Download
|
||||
$annee = substr($opts->parution, 0, 4);
|
||||
$path = $c->profil->path->shared.'/persit/bodacc/'.$opts->type.'/'.$annee;
|
||||
$dlClient = new GuzzleHttp\Client();
|
||||
try {
|
||||
$dlClient->request('GET', $baseUrl.$pdfLink, ['sink' => $path.'/'.$pdfName]);
|
||||
} catch (\GuzzleHttp\Exception $e) {
|
||||
echo date('Y-m-d H:i:s')." - Erreur Téléchargement du PDF $pdfName.\n";
|
||||
exit(1);
|
||||
}
|
||||
file_put_contents($directory."/BODACC_".$type."_".$annee."_".$num.".pdf", $result['content']);
|
||||
echo "\n";
|
||||
if (file_exists($ckfile)) unlink($ckfile);
|
||||
|
||||
exit(0);
|
||||
|
Loading…
Reference in New Issue
Block a user