db1fb57858
Modification de la récupération du bodacc pdf pour gérer les témoins de publication unitaire
102 lines
3.2 KiB
PHP
102 lines
3.2 KiB
PHP
<?php
|
|
// --- Define path to application directory
|
|
defined('APPLICATION_PATH')
|
|
|| define('APPLICATION_PATH', realpath(__DIR__ . '/../../application'));
|
|
|
|
// --- Define application environment
|
|
defined('APPLICATION_ENV')
|
|
|| define('APPLICATION_ENV', (getenv('APPLICATION_ENV') ? getenv('APPLICATION_ENV') : 'production'));
|
|
|
|
// --- Composer autoload
|
|
require_once realpath(__DIR__ . '/../../vendor/autoload.php');
|
|
|
|
// --- Create application, bootstrap, and run
|
|
$application = new Zend_Application(APPLICATION_ENV, APPLICATION_PATH . '/configs/application.ini');
|
|
|
|
// --- Options
|
|
$displayUsage = false;
|
|
try {
|
|
$opts = new Zend_Console_Getopt(array(
|
|
'help|?' => "Affiche l'aide.",
|
|
'verbose|v' => "Mode verbeux",
|
|
'siren=s' => "SIREN",
|
|
'type=s' => "Edition",
|
|
'parution=s' => "Numéro bodacc AAAAnnnn",
|
|
'annonce=s' => "Numéro annonce",
|
|
));
|
|
$opts->parse();
|
|
} catch (Zend_Console_Getopt_Exception $e) {
|
|
$displayUsage = true;
|
|
}
|
|
|
|
// --- Aide / Options
|
|
if (count($opts->getOptions())==0 || isset($opts->help)) {
|
|
$displayUsage = true;
|
|
}
|
|
|
|
// --- Usage
|
|
if ($displayUsage) {
|
|
echo "Télécharge le pdf (entier|temoin) de publication au BODACC";
|
|
echo "\n\n";
|
|
echo $opts->getUsageMessage();
|
|
echo "\n";
|
|
exit;
|
|
}
|
|
|
|
$c = new Zend_Config($application->getOptions());
|
|
|
|
$baseUrl = "http://www.bodacc.fr/";
|
|
|
|
// Crawler
|
|
if ($opts->verbose) {
|
|
echo "Démarrage du crawl\n";
|
|
}
|
|
$client = new \Goutte\Client();
|
|
$crawler = $client->request('GET', $baseUrl);
|
|
$crawler = $client->click($crawler->selectLink("Recherche avancée")->link());
|
|
$form = $crawler->selectButton("Lancer la recherche")->form();
|
|
if ($opts->verbose) {
|
|
echo "Soumission du formulaire\n";
|
|
}
|
|
$crawler = $client->submit($form, array(
|
|
'registre' => $opts->siren,
|
|
'publication' => $opts->type,
|
|
'numeroparution' => $opts->parution,
|
|
'numeroannonce' => $opts->annonce,
|
|
));
|
|
$result = $crawler->filterXPath('//tr[@class="pair"]')->first();
|
|
$annonceLink = $result->filterXPath('//a')->attr('href');
|
|
if ($opts->verbose) {
|
|
echo "Lien :".$annonceLink."\n";
|
|
}
|
|
//echo $annonceLink."\n";
|
|
$crawler = $client->request('GET', $baseUrl.$annonceLink);
|
|
$result = $crawler->filter('.pdf-unit')->first();
|
|
$pdfLink = $result->filterXPath('//a')->attr('href');
|
|
|
|
// PDF Complet : BODACC-B_20150155_0001_p000.pdf => BODACC_{type}_{annee}_{parution}.pdf
|
|
// PDF Unitaire : BODACC_A_PDF_Unitaire_20170011_00001.pdf => BODACC_{type}_{annee}_{parution}_{annonce}.pdf
|
|
$pos = strrpos($pdfLink, 'BODACC');
|
|
$pdfName = substr($pdfLink, $pos);
|
|
// Pdf Unitaire
|
|
if (strpos($pdfName, 'Unitaire')) {
|
|
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4)."_".$opts->annonce.".pdf";
|
|
}
|
|
// Pdf Complet
|
|
else {
|
|
$pdfName = "BODACC_".$opts->type."_".substr($opts->parution,0,4)."_".substr($opts->parution,4).".pdf";
|
|
}
|
|
|
|
// Download
|
|
$annee = substr($opts->parution, 0, 4);
|
|
$path = $c->profil->path->shared.'/persit/bodacc/'.$opts->type.'/'.$annee;
|
|
$dlClient = new GuzzleHttp\Client();
|
|
try {
|
|
$dlClient->request('GET', $baseUrl.$pdfLink, ['sink' => $path.'/'.$pdfName]);
|
|
} catch (\GuzzleHttp\Exception $e) {
|
|
echo date('Y-m-d H:i:s')." - Erreur Téléchargement du PDF $pdfName.\n";
|
|
exit(1);
|
|
}
|
|
|
|
exit(0);
|