extranet/scripts/jobs/getBodaccPdf.php
Michael RICOIS db1fb57858 Récupération du bodacc PDF
Modification de la récupération du bodacc pdf pour gérer les témoins de
publication unitaire
2017-01-17 13:51:10 +01:00

102 lines
3.2 KiB
PHP

<?php
// --- Define path to application directory
defined('APPLICATION_PATH')
|| define('APPLICATION_PATH', realpath(__DIR__ . '/../../application'));
// --- Define application environment
defined('APPLICATION_ENV')
|| define('APPLICATION_ENV', (getenv('APPLICATION_ENV') ? getenv('APPLICATION_ENV') : 'production'));
// --- Composer autoload
require_once realpath(__DIR__ . '/../../vendor/autoload.php');
// --- Create application, bootstrap, and run
$application = new Zend_Application(APPLICATION_ENV, APPLICATION_PATH . '/configs/application.ini');
// --- Options
$displayUsage = false;
try {
$opts = new Zend_Console_Getopt(array(
'help|?' => "Affiche l'aide.",
'verbose|v' => "Mode verbeux",
'siren=s' => "SIREN",
'type=s' => "Edition",
'parution=s' => "Numéro bodacc AAAAnnnn",
'annonce=s' => "Numéro annonce",
));
$opts->parse();
} catch (Zend_Console_Getopt_Exception $e) {
$displayUsage = true;
}
// --- Aide / Options
if (count($opts->getOptions())==0 || isset($opts->help)) {
$displayUsage = true;
}
// --- Usage
if ($displayUsage) {
echo "Télécharge le pdf (entier|temoin) de publication au BODACC";
echo "\n\n";
echo $opts->getUsageMessage();
echo "\n";
exit;
}
$c = new Zend_Config($application->getOptions());
$baseUrl = "http://www.bodacc.fr/";
// Crawler
if ($opts->verbose) {
echo "Démarrage du crawl\n";
}
$client = new \Goutte\Client();
$crawler = $client->request('GET', $baseUrl);
$crawler = $client->click($crawler->selectLink("Recherche avancée")->link());
$form = $crawler->selectButton("Lancer la recherche")->form();
if ($opts->verbose) {
echo "Soumission du formulaire\n";
}
$crawler = $client->submit($form, array(
'registre' => $opts->siren,
'publication' => $opts->type,
'numeroparution' => $opts->parution,
'numeroannonce' => $opts->annonce,
));
$result = $crawler->filterXPath('//tr[@class="pair"]')->first();
$annonceLink = $result->filterXPath('//a')->attr('href');
if ($opts->verbose) {
echo "Lien :".$annonceLink."\n";
}
//echo $annonceLink."\n";
$crawler = $client->request('GET', $baseUrl.$annonceLink);
$result = $crawler->filter('.pdf-unit')->first();
$pdfLink = $result->filterXPath('//a')->attr('href');
// PDF Complet : BODACC-B_20150155_0001_p000.pdf => BODACC_{type}_{annee}_{parution}.pdf
// PDF Unitaire : BODACC_A_PDF_Unitaire_20170011_00001.pdf => BODACC_{type}_{annee}_{parution}_{annonce}.pdf
$pos = strrpos($pdfLink, 'BODACC');
$pdfName = substr($pdfLink, $pos);
// Pdf Unitaire
if (strpos($pdfName, 'Unitaire')) {
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4)."_".$opts->annonce.".pdf";
}
// Pdf Complet
else {
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4).".pdf";
}
// Download
$annee = substr($opts->parution, 0, 4);
$path = $c->profil->path->shared.'/persit/bodacc/'.$opts->type.'/'.$annee;
$dlClient = new GuzzleHttp\Client();
try {
$dlClient->request('GET', $baseUrl.$pdfLink, ['sink' => $path.'/'.$pdfName]);
} catch (\GuzzleHttp\Exception $e) {
echo date('Y-m-d H:i:s')." - Erreur Téléchargement du PDF $pdfName.\n";
exit(1);
}
exit(0);