149 lines
4.5 KiB
PHP
149 lines
4.5 KiB
PHP
<?php
|
|
/**
|
|
* Download bodacc pdf file
|
|
*/
|
|
if ($argc != 4){
|
|
echo $argv[0].' type annee num';
|
|
echo "\n";
|
|
exit;
|
|
}
|
|
|
|
$type = strtoupper($argv[1]);
|
|
if (!in_array($type, array('A', 'B', 'C'))) {
|
|
exit;
|
|
}
|
|
|
|
$annee = $argv[2];
|
|
|
|
$num = $argv[3];
|
|
$num = str_pad($num, 4, '0', STR_PAD_LEFT);
|
|
|
|
$annonce = 1;
|
|
|
|
function getPage($url, $curl_data = '', $verbose=false)
|
|
{
|
|
global $ckfile;
|
|
|
|
//$user_agent = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1';
|
|
//$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
|
|
$user_agent = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)';
|
|
|
|
$post = false;
|
|
$fields = '';
|
|
if (is_array($curl_data) && count($curl_data)>0) {
|
|
foreach($curl_data as $key=>$value) {
|
|
$fields .= $key.'='.$value.'&';
|
|
}
|
|
rtrim($fields,'&');
|
|
$post = true;
|
|
}
|
|
|
|
$options = array(
|
|
CURLOPT_RETURNTRANSFER => true, // return web page
|
|
CURLOPT_HEADER => false, // don't return headers
|
|
CURLOPT_FOLLOWLOCATION => true, // follow redirects
|
|
CURLOPT_ENCODING => "", // handle all encodings
|
|
CURLOPT_USERAGENT => $user_agent, // who am i
|
|
CURLOPT_AUTOREFERER => true, // set referer on redirect
|
|
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
|
|
CURLOPT_TIMEOUT => 120, // timeout on response
|
|
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
|
|
CURLOPT_POST => $post, // i am sending post data
|
|
CURLOPT_POSTFIELDS => $fields, // this are my post vars
|
|
CURLOPT_SSL_VERIFYHOST => 0, // don't verify ssl
|
|
CURLOPT_SSL_VERIFYPEER => false, //
|
|
CURLOPT_VERBOSE => $verbose , //
|
|
//CURLOPT_COOKIESESSION => true,
|
|
CURLOPT_COOKIEFILE => $ckfile,
|
|
CURLOPT_COOKIEJAR => $ckfile, // Stockage du cookie de session
|
|
);
|
|
|
|
$ch = curl_init($url);
|
|
curl_setopt_array($ch,$options);
|
|
$content = curl_exec($ch);
|
|
$err = curl_errno($ch);
|
|
$errmsg = curl_error($ch) ;
|
|
$header = curl_getinfo($ch);
|
|
curl_close($ch);
|
|
|
|
// $header['errno'] = $err;
|
|
// $header['errmsg'] = $errmsg;
|
|
// $header['content'] = $content;
|
|
return array('header'=>$header, 'content'=>$content);
|
|
}
|
|
|
|
//==> Start
|
|
$debug = false;
|
|
|
|
$config = parse_ini_file(dirname(__FILE__).'/../../application/configs/application.ini', true);
|
|
$directory = $config['production']['profil.path.shared'].'/persist/bodacc/'.$type.'/'.$annee;
|
|
if ( !file_exists($directory) ) mkdir($directory, 0777, true);
|
|
|
|
//Define cookie file for storage
|
|
$ckfile = dirname(__FILE__).'/'.uniqid('cookies-');
|
|
|
|
$site = 'http://www.bodacc.fr';
|
|
|
|
//Recherche avancée
|
|
echo $url = $site.'/annonce/rechercheavancee';
|
|
echo "\n";
|
|
$result = getPage($url, $data);
|
|
if ($debug) file_put_contents('bodacc1.html', $result['content']);
|
|
|
|
$url = $site."/annonce/liste";
|
|
echo $url."\n";
|
|
$data = array(
|
|
'categorieannonce' => 'tout',
|
|
'commercant' => '',
|
|
'datepublicationmax' => '',
|
|
'datepublicationmin' => '',
|
|
'motscles' => '',
|
|
'numeroannonce' => '1',
|
|
'numerodepartement' => 'tout',
|
|
'numeroparution' => $annee.$num,
|
|
'publication' => $type,
|
|
'registre' => '',
|
|
'typeannonce' => 'tout',
|
|
);
|
|
$result = getPage($url, $data);
|
|
if ($debug) file_put_contents('bodacc2.html', $result['content']);
|
|
preg_match('/\<a href="\/annonce\/detail-annonce\/(.*)"\>/', $result['content'], $matches);
|
|
$url = null;
|
|
if (count($matches) > 0) {
|
|
$url = $site.'/annonce/detail-annonce/'.$matches[1];
|
|
echo $url."\n";
|
|
}
|
|
if (empty($url)) {
|
|
echo "Erreur Detail de l'annonce.\n";
|
|
exit;
|
|
}
|
|
|
|
//Affichage de l'annonce
|
|
$result = getPage($url);
|
|
if ($debug) file_put_contents('bodacc3.html', $result['content']);
|
|
/*
|
|
<p>
|
|
<a href="/annonce/telecharger/EBODACC-A_20130013_0001_p000.pdf" title="Téléchargez le témoin de publication de l'annonce n°1 du Bodacc A n°20130013 du 18/01/2013 - Format pdf ">
|
|
<img src="/extension/dilabodacc/design/dilabodacc/images/deco/temoin.png" alt="" class="temoin">
|
|
Téléchargez le témoin de publication
|
|
</a>
|
|
</p>
|
|
*/
|
|
preg_match('/\<a href="\/annonce\/telecharger\/(.*)"/', $result['content'], $matches);
|
|
echo $url = $site.'/annonce/telecharger/'.$matches[1];
|
|
echo "\n";
|
|
if (empty($url)) {
|
|
echo "Erreur téléchargement du PDF.\n";
|
|
exit;
|
|
}
|
|
|
|
//Téléchargement du PDF
|
|
$result = getPage($url);
|
|
if (substr($result['content'],0,4)!='%PDF'){
|
|
if ($debug) file_put_contents('bodacc4.html', $result['content']);
|
|
echo "Erreur téléchargement du PDF.\n";
|
|
exit;
|
|
}
|
|
file_put_contents($directory."/BODACC_".$type."_".$annee."_".$num.".pdf", $result['content']);
|
|
echo "\n";
|
|
if (file_exists($ckfile)) unlink($ckfile); |