2012-03-02 13:54:47 +00:00
< ? php
2013-01-18 21:19:56 +00:00
/**
* Download bodacc pdf file
*/
2012-03-02 13:54:47 +00:00
if ( $argc != 4 ){
echo $argv [ 0 ] . ' type annee num' ;
echo " \n " ;
exit ;
}
$type = strtoupper ( $argv [ 1 ]);
$annee = $argv [ 2 ];
$num = $argv [ 3 ];
$c = strlen ( $num );
for ( $i = 0 ; $i < 4 - $c ; $i ++ ){
2012-11-16 09:34:42 +00:00
$num = '0' . $num ;
2012-03-02 13:54:47 +00:00
}
$annonce = 1 ;
function getPage ( $url , $curl_data = '' , $verbose = false )
{
global $ckfile ;
//$user_agent = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1';
//$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
$user_agent = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)' ;
$post = false ;
$fields = '' ;
if ( is_array ( $curl_data ) && count ( $curl_data ) > 0 ) {
foreach ( $curl_data as $key => $value ) {
$fields .= $key . '=' . $value . '&' ;
}
rtrim ( $fields , '&' );
$post = true ;
}
$options = array (
CURLOPT_RETURNTRANSFER => true , // return web page
CURLOPT_HEADER => false , // don't return headers
CURLOPT_FOLLOWLOCATION => true , // follow redirects
CURLOPT_ENCODING => " " , // handle all encodings
CURLOPT_USERAGENT => $user_agent , // who am i
CURLOPT_AUTOREFERER => true , // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120 , // timeout on connect
CURLOPT_TIMEOUT => 120 , // timeout on response
CURLOPT_MAXREDIRS => 10 , // stop after 10 redirects
CURLOPT_POST => $post , // i am sending post data
CURLOPT_POSTFIELDS => $fields , // this are my post vars
CURLOPT_SSL_VERIFYHOST => 0 , // don't verify ssl
CURLOPT_SSL_VERIFYPEER => false , //
CURLOPT_VERBOSE => $verbose , //
//CURLOPT_COOKIESESSION => true,
CURLOPT_COOKIEFILE => $ckfile ,
CURLOPT_COOKIEJAR => $ckfile , // Stockage du cookie de session
);
$ch = curl_init ( $url );
curl_setopt_array ( $ch , $options );
$content = curl_exec ( $ch );
$err = curl_errno ( $ch );
$errmsg = curl_error ( $ch ) ;
$header = curl_getinfo ( $ch );
curl_close ( $ch );
// $header['errno'] = $err;
// $header['errmsg'] = $errmsg;
// $header['content'] = $content;
return array ( 'header' => $header , 'content' => $content );
}
//==> Start
$debug = false ;
2012-11-16 14:12:03 +00:00
$config = parse_ini_file ( dirname ( __FILE__ ) . '/../../application/configs/application.ini' , true );
2013-01-24 09:43:36 +00:00
$directory = $config [ 'production' ][ 'profil.path.data' ] . '/bodacc/' . $type . '/' . $annee ;
2013-01-18 21:19:56 +00:00
if ( ! file_exists ( $directory ) ) mkdir ( $directory , 0777 , true );
2012-03-02 13:54:47 +00:00
//Define cookie file for storage
2012-03-05 10:26:27 +00:00
$ckfile = dirname ( __FILE__ ) . '/' . uniqid ( 'cookies-' );
2012-03-02 13:54:47 +00:00
2013-01-18 21:33:29 +00:00
$site = 'http://www.bodacc.fr' ;
2013-01-18 21:19:56 +00:00
//Recherche avancée
2013-01-18 21:33:29 +00:00
echo $url = $site . '/annonce/rechercheavancee' ;
2013-01-18 21:40:22 +00:00
echo " \n " ;
2013-01-18 21:33:29 +00:00
$result = getPage ( $url , $data );
if ( $debug ) file_put_contents ( 'bodacc1.html' , $result [ 'content' ]);
2013-01-18 21:40:22 +00:00
echo $url = $site . " /annonce/liste " ;
2012-03-02 13:54:47 +00:00
echo " \n " ;
2013-01-18 21:19:56 +00:00
$data = array (
'categorieannonce' => 'tout' ,
'commercant' => '' ,
'datepublicationmax' => '' ,
'datepublicationmin' => '' ,
'motscles' => '' ,
'numeroannonce' => $num ,
'numerodepartement' => 'tout' ,
'numeroparution' => $annee . $num ,
'publication' => 'A' ,
'registre' => '' ,
'typeannonce' => 'tout' ,
);
$result = getPage ( $url , $data );
2013-01-18 21:33:29 +00:00
if ( $debug ) file_put_contents ( 'bodacc2.html' , $result [ 'content' ]);
2013-01-18 21:19:56 +00:00
preg_match ( '/\<a href="\/annonce\/detail\/(.*)"\>/' , $result [ 'content' ], $matches );
2013-01-18 21:40:22 +00:00
echo $url = $site . '/annonce/detail/' . $matches [ 1 ];
2012-03-02 13:54:47 +00:00
echo " \n " ;
2013-01-18 21:19:56 +00:00
if ( empty ( $url )) {
echo " Erreur téléchargement du PDF. " ;
echo " \n " ;
exit ;
}
//Affichage de l'annonce
2012-03-02 13:54:47 +00:00
$result = getPage ( $url );
2013-01-18 21:33:29 +00:00
if ( $debug ) file_put_contents ( 'bodacc3.html' , $result [ 'content' ]);
2013-01-18 21:19:56 +00:00
/*
< p >
< a href = " /annonce/telecharger/EBODACC-A_20130013_0001_p000.pdf " title = " Téléchargez le témoin de publication de l'annonce n°1 du Bodacc A n°20130013 du 18/01/2013 - Format pdf " >
< img src = " /extension/dilabodacc/design/dilabodacc/images/deco/temoin.png " alt = " " class = " temoin " >
Téléchargez le témoin de publication
</ a >
</ p >
*/
2013-01-18 21:53:50 +00:00
preg_match ( '/\<a href="\/annonce\/telecharger\/(.*)"/' , $result [ 'content' ], $matches );
2013-01-18 21:19:56 +00:00
2013-01-18 21:40:22 +00:00
echo $url = $site . '/annonce/telecharger/' . $matches [ 1 ];
2013-01-18 21:19:56 +00:00
echo " \n " ;
if ( empty ( $url )) {
echo " Erreur téléchargement du PDF. " ;
echo " \n " ;
exit ;
}
2012-03-02 13:54:47 +00:00
2013-01-18 21:19:56 +00:00
//Téléchargement du PDF
$result = getPage ( $url );
2012-11-16 09:34:42 +00:00
if ( substr ( $result [ 'content' ], 0 , 4 ) != '%PDF' ){
2013-01-18 21:33:29 +00:00
if ( $debug ) file_put_contents ( 'bodacc4.html' , $result [ 'content' ]);
2012-03-02 13:54:47 +00:00
echo " Erreur téléchargement du PDF. " ;
echo " \n " ;
2012-11-16 09:34:42 +00:00
exit ;
2012-03-02 13:54:47 +00:00
}
file_put_contents ( $directory . " /BODACC_ " . $type . " _ " . $annee . " _ " . $num . " .pdf " , $result [ 'content' ]);
echo " \n " ;
if ( file_exists ( $ckfile )) unlink ( $ckfile );