2012-10-16 07:44:31 +00:00
#!/usr/bin/php -c/var/www/batch/config/php_batch_sd.ini
< ? php
/**
** Attention : Si ajout d ' une nouvelle source modifier la table articles !
** ALTER TABLE `articles` CHANGE `source` `source` ENUM ( 'A' , 'B' , 'D' , 'E' , 'H' ) NOT NULL DEFAULT 'A' ;
**/
echo date ( 'Y/m/d - H:i:s' ) . " - D<> but de lecture des flux RSS... " . EOL ;
include_once ( FWK_PATH . 'common/chiffres.php' );
include_once ( FWK_PATH . 'common/dates.php' );
include_once ( FWK_PATH . 'common/rss.php' );
include_once ( FWK_PATH . 'common/curl.php' );
$iDb = new WDB ( 'presse' );
$iDb2 = new WDB ( 'sdv1' );
$iRss = new atomRss ();
2013-06-19 08:24:49 +00:00
$cours = $doInvestir = $doBoursier = $doPQR = $modeVerbose = false ;
2012-10-16 07:44:31 +00:00
$heureExec = date ( 'Hi' ) * 1 ;
2013-06-19 08:24:49 +00:00
$strInfoScript = 'Usage : ' . basename ( $argv [ 0 ]) . " [OPTION]
Collecte des flux RSS .
Options :
- i Traiter les flux RSSS Isin de Invstir
- b Traiter les flux RSSS Isin de Boursier
- p Traiter la PQR
- v Mode verbeux
" ;
$argv = $_SERVER [ 'argv' ];
if ( $_SERVER [ 'argc' ] > 1 )
{
for ( $i = 1 ; isset ( $argv [ $i ]); $i ++ ) {
if ( substr ( $argv [ $i ], 0 , 1 ) == '-' ) {
switch ( substr ( $argv [ $i ], 1 , 1 )) {
case 'i' : $doInvestir = true ; break ;
case 'b' : $doBoursier = true ; break ;
case 'p' : $doPQR = true ; break ;
case 'v' : $modeVerbose = true ; break ;
case '-' :
case '?' : die ( $strInfoScript ); break ;
default : die ( 'Option ' . $argv [ $i ] . " inconnue ! \n " ); break ;
}
}
}
}
if ( ! $doInvestir && ! $doBoursier ) $doPQR = true ;
2012-10-16 07:44:31 +00:00
$lstRegex = $iDb -> select ( 'pqr_filtres' , " id, nomFiltre, motsClefs " , '1' );
foreach ( $lstRegex as $iReg => $tReg )
$tabRegEx [] = array ( 'id' => $tReg [ 'id' ],
'nom' => $tReg [ 'nomFiltre' ],
'mots' => $tReg [ 'motsClefs' ],
);
$nbFiltres = count ( $tabRegEx );
2013-06-19 08:24:49 +00:00
echo date ( 'Y/m/d - H:i:s' ) . " - Nombre de r<> gles de filtrages : " . $nbFiltres . EOL ;
//print_r($tabRegEx);
//die();
if ( $doInvestir ) { //$heureExec>=2000 && $heureExec<2030) {
2012-10-16 07:44:31 +00:00
$lstRss = $iDb2 -> select ( 'bourse_isin' , " 11 as id, CONCAT('http://www.investir.fr/RSS/RSS.php?codeISIN=',code_isin,'&type=infos-conseils') AS url, code_isin, 'horaire' AS frequence " , " code_isin<>'' GROUP BY code_isin " );
$cours = true ;
2013-06-19 08:24:49 +00:00
$typeFlux = 'ISIN Investir' ;
} elseif ( $doBoursier ) { //heureExec>=2100 && $heureExec<2130) {
2012-10-16 07:44:31 +00:00
$lstRss = $iDb2 -> select ( 'bourse_isin' , " 18 as id, CONCAT('http://www.boursier.com/syndication/rss/news/',code_isin,'/FR') AS url, code_isin, 'horaire' AS frequence " , " code_isin<>'' GROUP BY code_isin " );
$cours = true ;
2013-06-19 08:24:49 +00:00
$typeFlux = 'ISIN Boursier' ;
} else {
2012-10-16 07:44:31 +00:00
$lstRss = $iDb -> select ( 'pqr_rss' , " id, url, titreRss, siteWeb, description, dateFluxRss, dateDwl, '' as code_isin, frequence " , 'actif=1' );
2013-06-19 08:24:49 +00:00
$typeFlux = 'PQR' ;
}
shuffle ( $lstRss );
$nbFlux = count ( $lstRss );
echo date ( 'Y/m/d - H:i:s' ) . " - Nombre de flux ' $typeFlux ' <20> traiter : " . $nbFlux . EOL ;
2012-10-16 07:44:31 +00:00
foreach ( $lstRss as $iFlux => $tabFlux ) {
$urlRss = $tabFlux [ 'url' ];
$idRss = $tabFlux [ 'id' ];
$isin = $tabFlux [ 'code_isin' ];
$horaire = $tabFlux [ 'frequence' ];
//echo "Je charge le flux $urlRss\n";
if ( $idRss <> 11 && $idRss <> 18 && $horaire == 'quotidien' && $heureExec > 800 ) {
2013-06-19 08:24:49 +00:00
echo date ( 'Y/m/d - H:i:s' ) . " - Feed $horaire # $iFlux / $nbFlux : $urlRss - Flux QUOTIDIEN ignor<6F> ! " . EOL ;
2012-10-16 07:44:31 +00:00
continue ;
} /* else
echo date ( 'Y/m/d - H:i:s' ) . " - Feed $horaire # $iFlux : $urlRss - $idRss , $horaire , $heureExec " . EOL ;
*/
if ( $idRss == 0 || $idRss == 18 ) randsleep ( 1 , 2 );
$tabFeed =@ $iRss -> loadRss ( $urlRss );
2013-06-19 08:24:49 +00:00
echo date ( 'Y/m/d - H:i:s' ) . " - Feed $horaire # $iFlux / $nbFlux : $urlRss - Code Retour HTTP : " . $iRss -> codeErreurHttp . EOL ;
2012-10-16 07:44:31 +00:00
if ( count ( $tabFeed ) > 0 ) {
//print_r($tabFeed);
foreach ( $tabFeed as $iFeed => $tabArticle ) {
$dateRss = date ( " YmdHis " , strtotime ( $tabArticle [ 'updated' ]));
if ( $tabArticle [ 'type' ] == 0 && ! $cours ) {
$tabUpdate = array ( 'titreRss' => trim ( utf8_decode ( $tabArticle [ 'title' ])),
'siteWeb' => trim ( $tabArticle [ 'link' ]),
'description' => trim ( utf8_decode ( $tabArticle [ 'description' ])),
'dateFluxRss' => $dateRss ,
'dateDwl' => date ( 'YmdHis' ));
if ( $iDb -> update ( 'pqr_rss' , $tabUpdate , " id= $idRss " ))
echo date ( 'Y/m/d - H:i:s' ) . " - Feed $horaire # $iFlux : Mise <20> jour des informations du flux $urlRss " . EOL ;
else
echo date ( 'Y/m/d - H:i:s' ) . " - Feed $horaire # $iFlux : ERREUR lors de la mise <20> jour des informations du flux $urlRss " . EOL ;
}
elseif ( $tabArticle [ 'type' ] == 1 ) {
$urlArticle = trim ( $tabArticle [ 'link' ]);
$idArticle = md5 ( $dateRss . $urlArticle );
foreach ( $tabRegEx as $iReg => $tReg ) {
$regEx = $tReg [ 'mots' ];
$regId = $tReg [ 'id' ];
$regNom = $tReg [ 'nom' ];
$titre = trim ( utf8_decode ( $tabArticle [ 'title' ]));
$description = trim ( utf8_decode ( $tabArticle [ 'description' ]));
sleep ( 1 );
$page = getUrl ( $urlArticle );
$article = $page [ 'body' ];
if ( preg_match ( " / $regEx /Uis " , $titre . ' ' . $description . ' ' . $article )) {
$tabInsert = array ( 'titre' => $titre ,
'lien' => trim ( utf8_decode ( $urlArticle )),
'description' => $description ,
'article' => $article ,
'dateRss' => $dateRss ,
'idArticle' => $idArticle ,
'isin' => $isin ,
'idRss' => $idRss );
$id = $iDb -> insert ( 'pqr_articles' , $tabInsert );
if ( $id ) {
//if($iDb->update('pqr_articles',$tabUpdate,'id='.$id))
2013-06-19 08:24:49 +00:00
echo date ( 'Y/m/d - H:i:s' ) . " - Feed $horaire # $iFlux / $nbFlux : Insertion de l'article n<> $id " . EOL ;
2012-10-16 07:44:31 +00:00
} elseif ( mysql_errno () <> 1062 )
die ( mysql_errno () . ': ' . mysql_error ());
}
}
}
}
} else {
//
2013-06-19 08:24:49 +00:00
echo date ( 'Y/m/d - H:i:s' ) . " - Feed # $iFlux / $nbFlux : $urlRss - Code Retour HTTP : " . $iRss -> codeErreurHttp . EOL ;
2012-10-16 07:44:31 +00:00
$tabUpdate = array ( 'dernierCodeRetour' => $iRss -> codeErreurHttp );
if ( $iDb -> update ( 'pqr_rss' , $tabUpdate , " id= $idRss " ))
2013-06-19 08:24:49 +00:00
echo date ( 'Y/m/d - H:i:s' ) . " - Feed $horaire # $iFlux / $nbFlux : Mise <20> jour des informations du flux $urlRss " . EOL ;
2012-10-16 07:44:31 +00:00
}
//<channel><title>Economie - Le Monde.fr</title><link>http://www.lemonde.fr</link><description>Toute l'actualit<69> au moment de la connexion</description><language>en</language><copyright>Copyright Le Monde.fr</copyright><pubDate>Thu, 05 Feb 2009 11:51:14 GMT</pubDate><lastBuildDate>Thu, 05 Feb 2009 11:51:14 GMT</lastBuildDate><ttl>30</ttl><image><title>Economie - Le Monde.fr</title><url>http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_rss.gif</url><link>http://www.lemonde.fr</link></image>
}
echo date ( 'Y/m/d - H:i:s' ) . " - FIN du script. " . EOL ;
die ();
/*
INSERT INTO `presse` . `` (
`id` ,
`url` ,
`dateInsert`
)
VALUES (
NULL , 'http://www.lemondeinformatique.fr/flux-rss/ssii/page-1.html' , ''
);
*/ die ();
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de la r<> cup<75> ration des flux Hugin... " . EOL ;
$ret = ftp_mget ( HUGIN_FTP_URL , HUGIN_FTP_USER , HUGIN_FTP_PASS , '*.xml' , HUGIN_LOCAL_DIR , true );
if ( $ret === false )
die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : R<> cup<75> ration des flux Hugin en FTP incorrecte ! " . EOL );
else
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de la r<> cup<75> ration des flux Hugin en FTP ( $ret fichiers r<> cup<75> r<EFBFBD> s). " . EOL ;
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de la r<> cup<75> ration des flux ActusNews... " . EOL ;
$ret = ftp_mget ( ACTUSNEWS_FTP_URL , ACTUSNEWS_FTP_USER , ACTUSNEWS_FTP_PASS , '*.xml' , ACTUSNEWS_LOCAL_DIR , true );
if ( $ret === false )
die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : R<> cup<75> ration des flux ActusNews en FTP incorrecte ! " . EOL );
else
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de la r<> cup<75> ration des flux ActusNews en FTP ( $ret fichiers r<> cup<75> r<EFBFBD> s). " . EOL ;
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de la r<> cup<75> ration des flux Di Release... " . EOL ;
$ret = ftp_mget ( DIRELEASE_FTP_URL , DIRELEASE_FTP_USER , DIRELEASE_FTP_PASS , '*.xml' , DIRELEASE_LOCAL_DIR , true );
if ( $ret === false )
die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : R<> cup<75> ration des flux Di Release en FTP incorrecte ! " . EOL );
else
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de la r<> cup<75> ration des flux Di Release en FTP ( $ret fichiers r<> cup<75> r<EFBFBD> s). " . EOL ;
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de la r<> cup<75> ration des flux BusinessWire... " . EOL ;
$dh = opendir ( BUSINESSWIRE_LOCAL_DIR_INCOMING );
$ret = 0 ;
while ( false !== ( $filename = readdir ( $dh ))) {
if ( $filename <> '.' && $filename <> '..' && substr ( $filename , - 4 ) == '.xml' )
if ( ! file_exists ( BUSINESSWIRE_LOCAL_DIR . $filename ) && filesize ( BUSINESSWIRE_LOCAL_DIR_INCOMING . $filename ) > 0 ) {
copy ( BUSINESSWIRE_LOCAL_DIR_INCOMING . $filename , BUSINESSWIRE_LOCAL_DIR . $filename );
$ret ++ ;
}
}
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de la r<> cup<75> ration des flux BusinessWire ( $ret fichiers copi<70> s). " . EOL ;
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de la r<> cup<75> ration des flux Les Echos... " . EOL ;
$ret = ftp_mget ( LESECHOS_FTP_URL , LESECHOS_FTP_USER , LESECHOS_FTP_PASS , 'syndication/*.xml' , LESECHOS_LOCAL_DIR , true );
if ( $ret === false )
die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : R<> cup<75> ration des flux Les Echos en FTP incorrecte ! " . EOL );
else
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de la r<> cup<75> ration des flux Les Echos en FTP ( $ret fichiers r<> cup<75> r<EFBFBD> s). " . EOL ;
/**
** INTEGRATION DES COMMUNIQUES " HUGIN "
**/
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de l'int<6E> gration des flux Hugin... " . EOL ;
$tabFichier = array ();
$dh = opendir ( HUGIN_LOCAL_DIR );
while ( false !== ( $filename = readdir ( $dh ))) {
if ( $filename <> '.' && $filename <> '..' && substr ( $filename , - 4 ) == '.xml' )
$tabFichier [] = $filename ;
}
foreach ( $tabFichier as $k => $nomFichier ) {
$tabTmp = file ( HUGIN_LOCAL_DIR . $nomFichier );
if ( ! $tabTmp ) die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Fichier local $nomFichier inexistant ! " . EOL );
if ( stripos ( $tabTmp [ 0 ], 'ISO-8859-1' ) > 0 ) $encoding = 'ISO-8859-1' ;
elseif ( stripos ( $tabTmp [ 0 ], 'UTF-8' ) > 0 ) $encoding = 'UTF-8' ;
else die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Encoding " . $tabTmp [ 0 ] . " non g<> r<EFBFBD> ! " . EOL );
$dom = @ new DomDocument2 ();
$dom -> load ( HUGIN_LOCAL_DIR . $nomFichier );
$pressReleaseId = $dom -> getValueFromTag ( 'PRESS_RELEASE_ID' );
$tabInsert = array ( 'companyId' => $dom -> getValueFromTag ( 'COMPANY_ID' ),
'companyName' => $dom -> getValueFromTag ( 'COMPANY_NAME' ),
'companyIsin' => $dom -> getValueFromTag ( 'COMPANY_ISIN' ),
'companySiren' => $dom -> getValueFromTag ( 'COMPANY_SIREN' ),
'companyRic' => $dom -> getValueFromTag ( 'COMPANY_RIC' ),
'companyLogoUrl' => $dom -> getValueFromTag ( 'URL1' ),
'companyWebSite' => $dom -> getValueFromTag ( 'URL2' ),
'companyProfile' => $dom -> getValueFromTag ( 'URL3' ),
'companyAnnualReport' => $dom -> getValueFromTag ( 'URL4' ),
'companyInfoActionnaires' => $dom -> getValueFromTag ( 'URL5' ),
'pressReleaseId' => $pressReleaseId ,
'pressReleaseDate' => $dom -> getValueFromTag ( 'PRESS_RELEASE_DATE_TIME' ),
'pressReleaseTitle' => $dom -> getValueFromTag ( 'PRESS_RELEASE_TITLE' ),
'pressReleaseText' => $dom -> getValueFromTag ( 'TEXT_FORMAT' ),
'pressReleaseHtml' => $dom -> getValueFromTag ( 'HTML_FORMAT' ),
'pressReleaseAttachments' => $dom -> getValueFromTag ( 'ATTACHMENTS_URL1' ),
'pressReleaseUrl' => $dom -> getValueFromTag ( 'PRESS_RELEASE_URL_CNG' ),
'indexAll' => $dom -> getValueFromTag ( 'INDEX_ALL' ),
'indexTheme' => $dom -> getValueFromTag ( 'INDEX_THEME' ),
'indexSector' => $dom -> getValueFromTag ( 'INDEX_SECTOR' ),
'indexCountry' => $dom -> getValueFromTag ( 'INDEX_COUNTRY' ),
'indexLanguage' => $dom -> getValueFromTag ( 'INDEX_LANGUAGE' ),
'indexMarketPlace' => $dom -> getValueFromTag ( 'INDEX_MARKET_PLACE' ),
'indexQuoteInd' => $dom -> getValueFromTag ( 'INDEX_QUOTE_INDICATOR' ),
'source' => 'H' ,
);
$ret = $iDb -> insert ( 'articles' , $tabInsert );
if ( ! $ret && $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - ERREUR " . $iDb -> getLastError () . EOL ;
print_r ( $tabInsert );
die ();
} elseif ( $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - Communiqu<71> n<> $pressReleaseId enregistr<74> avec succ<63> s. " . EOL ;
}
unset ( $dom );
}
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de l'int<6E> gration des flux Hugin. " . EOL ;
/**
** INTEGRATION DES COMMUNIQUES " ACTUSNEWS "
**/
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de l'int<6E> gration des flux ActusNews... " . EOL ;
$tabFichier = array ();
$dh = opendir ( ACTUSNEWS_LOCAL_DIR );
while ( false !== ( $filename = readdir ( $dh ))) {
if ( $filename <> '.' && $filename <> '..' && substr ( $filename , - 4 ) == '.xml' )
$tabFichier [] = $filename ;
}
foreach ( $tabFichier as $k => $nomFichier ) {
$tabTmp = file ( ACTUSNEWS_LOCAL_DIR . $nomFichier );
if ( ! $tabTmp ) die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Fichier local $nomFichier inexistant ! " . EOL );
if ( stripos ( $tabTmp [ 0 ], 'ISO-8859-1' ) > 0 ) $encoding = 'ISO-8859-1' ;
elseif ( stripos ( $tabTmp [ 0 ], 'UTF-8' ) > 0 ) $encoding = 'UTF-8' ;
else die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Encoding " . $tabTmp [ 0 ] . " non g<> r<EFBFBD> ! " . EOL );
foreach ( $tabTmp as $i_ligne => $ligne ) {
if ( stripos ( $ligne , '<communique ' ) !== false ) { //<communique
//echo date ('Y/m/d - H:i:s')." - Premi<6D> re ligne du communiqu<71> ...".EOL;
if ( stripos ( $ligne , 'langue="FR"' ) > 0 ) {
//echo date ('Y/m/d - H:i:s')." - Communiqu<71> en fran<61> ais ($ligne).".EOL;
$french = true ;
} else {
//echo date ('Y/m/d - H:i:s')." - Langue non int<6E> gr<67> e en base ($ligne) !".EOL;
$french = false ;
break ;
}
break ;
}
}
if ( $french ) {
//echo date ('Y/m/d - H:i:s')." - Chargement du communiqu<71> ...".EOL;
$dom = @ new DomDocument2 ();
$dom -> load ( ACTUSNEWS_LOCAL_DIR . $nomFichier );
$pressReleaseId = $dom -> getValueFromTag ( 'id' );
$tabInsert = array ( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName' => trim ( $dom -> getValueFromTag ( 'raisonsociale' )),
'companyIsin' => trim ( $dom -> getValueFromTag ( 'codeisin' )),
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic' => trim ( $dom -> getValueFromTag ( 'code_reuters' )),
'companyLogoUrl' => trim ( $dom -> getValueFromTag ( 'logo' )),
'companyWebSite' => trim ( $dom -> getValueFromTag ( 'site' )),
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires' => trim ( $dom -> getValueFromTag ( 'site_investisseur' )),
'pressReleaseId' => $pressReleaseId ,
'pressReleaseDate' => trim ( $dom -> getValueFromTag ( 'miseajour' )),
'pressReleaseTitle' => trim ( $dom -> getValueFromTag ( 'titre' )),
'pressReleaseText' => trim ( strtr ( html_entity_decode ( strip_tags ( $dom -> getValueFromTag ( 'contenu' ))), array ( '’' => '<27> ' ))),
'pressReleaseHtml' => trim ( $dom -> getValueFromTag ( 'contenu' )),
'pressReleaseAttachments' => trim ( $dom -> getValueFromTag ( 'pdf' )),
//'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
/* 'indexAll' => $dom -> getValueFromTag ( 'INDEX_ALL' ),
'indexTheme' => $dom -> getValueFromTag ( 'INDEX_THEME' ),
'indexCountry' => $dom -> getValueFromTag ( 'INDEX_COUNTRY' ),
'indexLanguage' => $dom -> getValueFromTag ( 'INDEX_LANGUAGE' ),
*/
'indexQuoteInd' => trim ( $dom -> getValueFromTag ( 'indice' )),
'indexSector' => trim ( $dom -> getValueFromTag ( 'secteur' )),
'indexMarketPlace' => trim ( $dom -> getValueFromTag ( 'marche_cotation' )),
'source' => 'A' ,
'companyBloomberg' => trim ( $dom -> getValueFromTag ( 'code_bloomberg' )),
'companyMnemo' => trim ( $dom -> getValueFromTag ( 'mnemo' )),
'companyNbTitles' => trim ( $dom -> getValueFromTag ( 'nb_de_titres' )),
'companyFootsie' => trim ( $dom -> getValueFromTag ( 'footsie' )),
);
$ret = $iDb -> insert ( 'articles' , $tabInsert );
if ( ! $ret && $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - ERREUR " . $iDb -> getLastError () . EOL ;
print_r ( $tabInsert );
die ();
} elseif ( $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - Communiqu<71> n<> $pressReleaseId enregistr<74> avec succ<63> s. " . EOL ;
}
unset ( $dom );
}
}
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de l'int<6E> gration des flux ActusNews... " . EOL ;
/**
** INTEGRATION DES COMMUNIQUES " DIRELEASE "
**/
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de l'int<6E> gration des flux DiRelease... " . EOL ;
$tabFichier = array ();
$dh = opendir ( DIRELEASE_LOCAL_DIR );
while ( false !== ( $filename = readdir ( $dh ))) {
if ( $filename <> '.' && $filename <> '..' && substr ( $filename , - 4 ) == '.xml' )
$tabFichier [] = $filename ;
}
/** @ todo Pour DiRelease , Ne pas utiliser le parser XML mais uniquement des expr . r<EFBFBD> guli<EFBFBD> re car XML de merde .
** Prendre tous les ISIN et rechercher le SIREN .
**/
foreach ( $tabFichier as $k => $nomFichier ) {
$strTmp = file_get_contents ( DIRELEASE_LOCAL_DIR . $nomFichier );
/* $strTmp = preg_replace ( '/\s+/is' , ' ' , $strTmp );
$fp = fopen ( DIRELEASE_LOCAL_DIR . 'new_' . $nomFichier , 'w' );
fwrite ( $fp , $strTmp );
fclose ( $fp );
$tabTmp = file ( DIRELEASE_LOCAL_DIR . 'new_' . $nomFichier );
if ( ! $tabTmp ) die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Fichier local $nomFichier inexistant ! " . EOL );
*/
if ( stripos ( $strTmp , 'ISO-8859-1' ) > 0 ) $encoding = 'ISO-8859-1' ;
elseif ( stripos ( $strTmp , 'UTF-8' ) > 0 ) $encoding = 'UTF-8' ;
else die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Encoding non g<> r<EFBFBD> ! " . EOL );
if ( stripos ( $strTmp , 'langue="FR"' ) > 0 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - Chargement du communiqu<71> en fran<61> ais ( $ligne ). " . EOL ;
preg_match ( '/<id>(.*)<\/id>/isU' , $strTmp , $matches );
$pressReleaseId = trim ( @ $matches [ 1 ]);
preg_match ( '/<raisonsociale>(.*)<\/raisonsociale>/isU' , $strTmp , $matches );
$raisonsociale = trim ( @ $matches [ 1 ]);
preg_match ( '/<codeisin>(.*)<\/codeisin>/isU' , $strTmp , $matches );
$codeisin = trim ( @ $matches [ 1 ]);
preg_match ( '/<code_reuters>(.*)<\/code_reuters>/isU' , $strTmp , $matches );
$code_reuters = trim ( @ $matches [ 1 ]);
preg_match ( '/<logo>(.*)<\/logo>/isU' , $strTmp , $matches );
$logo = trim ( @ $matches [ 1 ]);
preg_match ( '/<site>(.*)<\/site>/isU' , $strTmp , $matches );
$site = trim ( @ $matches [ 1 ]);
preg_match ( '/<site_investisseur>(.*)<\/site_investisseur>/isU' , $strTmp , $matches );
$site_investisseur = trim ( @ $matches [ 1 ]);
preg_match ( '/<logo>(.*)<\/logo>/isU' , $strTmp , $matches );
$logo = trim ( preg_replace ( '/\s/' , '' , @ $matches [ 1 ]));
preg_match ( '/<miseajour>(.*)<\/miseajour>/isU' , $strTmp , $matches );
$miseajour = trim ( @ $matches [ 1 ]);
preg_match ( '/<titre>(.*)<\/titre>/isU' , $strTmp , $matches );
$titre = trim ( @ $matches [ 1 ]);
preg_match ( '/<contenu>(.*)<\/contenu>/isU' , $strTmp , $matches );
$contenu = trim ( strtr ( @ $matches [ 1 ], array ( '<![CDATA[' => '' , ']]>' => '' )));
preg_match ( '/<pdf>(.*)<\/pdf>/isU' , $strTmp , $matches );
$pdf = preg_replace ( '/\s/' , '' , @ $matches [ 1 ]);
preg_match ( '/<indice>(.*)<\/indice>/isU' , $strTmp , $matches );
$indice = trim ( @ $matches [ 1 ]);
preg_match ( '/<secteur>(.*)<\/secteur>/isU' , $strTmp , $matches );
$secteur = trim ( @ $matches [ 1 ]);
preg_match ( '/<marche_cotation>(.*)<\/marche_cotation>/isU' , $strTmp , $matches );
$marche_cotation = trim ( @ $matches [ 1 ]);
preg_match ( '/<code_bloomberg>(.*)<\/code_bloomberg>/isU' , $strTmp , $matches );
$code_bloomberg = trim ( @ $matches [ 1 ]);
preg_match ( '/<mnemo>(.*)<\/mnemo>/isU' , $strTmp , $matches );
$mnemo = trim ( @ $matches [ 1 ]);
preg_match ( '/<nb_de_titres>(.*)<\/nb_de_titres>/isU' , $strTmp , $matches );
$nb_de_titres = trim ( @ $matches [ 1 ]);
preg_match ( '/<footsie>(.*)<\/footsie>/isU' , $strTmp , $matches );
$footsie = trim ( @ $matches [ 1 ]);
$tabInsert = array ( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName' => $raisonsociale ,
'companyIsin' => $codeisin ,
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic' => $code_reuters ,
'companyLogoUrl' => $logo ,
'companyWebSite' => $site ,
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires' => $site_investisseur ,
'pressReleaseId' => $pressReleaseId ,
'pressReleaseDate' => $miseajour ,
'pressReleaseTitle' => $titre ,
'pressReleaseText' => trim ( strtr ( html_entity_decode ( strip_tags ( $contenu )), array ( '’' => '<27> ' ))),
'pressReleaseHtml' => $contenu ,
'pressReleaseAttachments' => $pdf ,
/*
'pressReleaseUrl' => $dom -> getValueFromTag ( 'PRESS_RELEASE_URL_CNG' ),
'indexAll' => $dom -> getValueFromTag ( 'INDEX_ALL' ),
'indexTheme' => $dom -> getValueFromTag ( 'INDEX_THEME' ),
'indexCountry' => $dom -> getValueFromTag ( 'INDEX_COUNTRY' ),
'indexLanguage' => $dom -> getValueFromTag ( 'INDEX_LANGUAGE' ),
*/
'indexQuoteInd' => $indice ,
'indexSector' => $secteur ,
'indexMarketPlace' => $marche_cotation ,
'source' => 'D' ,
'companyBloomberg' => $code_bloomberg ,
'companyMnemo' => $mnemo ,
'companyNbTitles' => $nb_de_titres ,
'companyFootsie' => $footsie ,
);
$ret = $iDb -> insert ( 'articles' , $tabInsert );
if ( ! $ret && $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - ERREUR " . $iDb -> getLastError () . EOL ;
print_r ( $tabInsert );
die ();
} elseif ( $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - Communiqu<71> n<> $pressReleaseId enregistr<74> avec succ<63> s. " . EOL ;
}
unset ( $dom );
}
}
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de l'int<6E> gration des flux DiRelease... " . EOL ;
/**
** INTEGRATION DES COMMUNIQUES " BUSINESSWIRE "
**/
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de l'int<6E> gration des flux BusinessWire... " . EOL ;
$tabFichier = array ();
$dh = opendir ( BUSINESSWIRE_LOCAL_DIR );
while ( false !== ( $filename = readdir ( $dh ))) {
if ( $filename <> '.' && $filename <> '..' && substr ( $filename , - 4 ) == '.xml' )
$tabFichier [] = $filename ;
}
foreach ( $tabFichier as $k => $nomFichier ) {
$tabTmp = file ( BUSINESSWIRE_LOCAL_DIR . $nomFichier );
$strTmp = implode ( " \n " , $tabTmp );
if ( ! $tabTmp ) die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Fichier local " . BUSINESSWIRE_LOCAL_DIR . " $nomFichier inexistant ! " . EOL );
if ( stripos ( $tabTmp [ 0 ], 'ISO-8859-1' ) > 0 ) $encoding = 'ISO-8859-1' ;
elseif ( stripos ( $tabTmp [ 0 ], 'UTF-8' ) > 0 ) $encoding = 'UTF-8' ;
else die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Encoding " . $tabTmp [ 0 ] . " non g<> r<EFBFBD> ! " . EOL );
/*< DateAndTime > 20070906 T190800 + 0000 </ DateAndTime >
< NewsService FormalName = " Business Wire " />
< NewsProduct FormalName = " BUSINESS WIRE " />
</ NewsEnvelope >
< NewsItem >
< Identification >
< NewsIdentifier >
< ProviderId > businesswire . com </ ProviderId >
< DateId > 20010714 </ DateId >
<> 20070906006073 </ NewsItemId >
< RevisionId PreviousRevision = " 0 " Update = " N " > 1 </ RevisionId >
< PublicIdentifier > urn : newsml : businesswire . com : 20010714 : 20070906006073 : 1 </ PublicIdentifier >
</ NewsIdentifier >
</ Identification >
< NewsManagement >
< NewsItemType FormalName = " News " />
< FirstCreated > 20070906 T190800 + 0000 </ FirstCreated >
< ThisRevisionCreated > 20070906 T190800 + 0000 </ ThisRevisionCreated >
< Status FormalName = " Usable " />
< AssociatedWith NewsItem = " businesswire.com:20010714:242009 " />
</ NewsManagement >
< NewsComponent >
< BasisForChoice Rank = " 1 " >./ NewsComponent / DescriptiveMetadata / Language </ BasisForChoice >
< NewsLines >
< CopyrightLine > Copyright Business Wire 2007 </ CopyrightLine >
</ NewsLines >
< AdministrativeMetadata >
< Source >
< Party FormalName = " CIT Group Inc. " />
</ Source >
< Contributor >
< Comment FormalName = " BWoffices " > NY </ Comment >
< Party FormalName = " DB " Scheme = " BWEditor " />
</ Contributor >
</ AdministrativeMetadata >
< DescriptiveMetadata >
< Language FormalName = " fr " />
< Genre FormalName = " Release " />
</ DescriptiveMetadata >
< Metadata >
< MetadataType FormalName = " BWKeywords " />
< Property FormalName = " BWCountryKeywords " Value = " United States " />
< Property FormalName = " BWRegionKeywords " Value = " Europe " />
< Property FormalName = " BWRegionKeywords " Value = " North America " />
< Property FormalName = " BWIndustryKeywords " Value = " Manufacturing " />
< Property FormalName = " BWIndustryKeywords " Value = " Aerospace " />
< Property FormalName = " BWCategoryKeywords " Value = " Product/Service " />
< Property FormalName = " BWIndustryKeywords " Value = " Professional Services " />
< Property FormalName = " BWIndustryKeywords " Value = " Finance " />
< Property FormalName = " BWStateKeywords " Value = " New York " />
< Property FormalName = " BWCountryKeywords " Value = " Ireland " />
</ Metadata >
< Metadata >
< MetadataType FormalName = " Securities Identifier " />
< Property FormalName = " " Value = " CIT " />
< Property FormalName = " Exchange " Value = " NYSE " />
< Property FormalName = " ISIN " Value = " US1255811085 " />
< Property FormalName = " SlugLine Display Order " Value = " 1 " />
</ Metadata >
< NewsComponent >
< BasisForChoice Rank = " 1 " >./ NewsComponent / Role </ BasisForChoice >
< NewsLines >
<> CIT consolide son engagement envers l ' <EFBFBD> conomie irlandaise et agrandit son centre op<EFBFBD> rationnel de Dublin </ HeadLine >*/
$dom = @ new DomDocument2 ();
$dom -> load ( BUSINESSWIRE_LOCAL_DIR . $nomFichier );
$pressReleaseId = $dom -> getValueFromTag ( 'NewsItemId' );
preg_match ( '/<Property FormalName="ISIN" Value="(.*)"\/>/i' , $strTmp , $matches );
$isin =@ $matches [ 1 ];
preg_match ( '/<Property FormalName="Exchange" Value="(.*)"\/>/i' , $strTmp , $matches );
$exchange =@ $matches [ 1 ];
preg_match ( '/<Property FormalName="Ticker Symbol" Value="(.*)"\/>/i' , $strTmp , $matches );
$mnemo =@ $matches [ 1 ];
preg_match ( '/<NewsLineType FormalName="ClickThru URL"(?:.*)<NewsLineText>(.*)<\/NewsLineText>/isU' , $strTmp , $matches );
$siteWeb =@ $matches [ 1 ];
preg_match ( '/<NewsComponent>(?:.*)<Role FormalName="Logo"\/>(?:.*)<ContentItem Duid="(?:.*)" Href="(.*)">(?:.*)<\/NewsComponent>/isU' , $strTmp , $matches );
$logo = urldecode ( @ $matches [ 1 ]);
preg_match ( '/<NewsComponent>(?:.*)<Role FormalName="Body"\/>(?:.*)<ContentItem (?:.*)<Format FormalName="XHTML"\/>(?:.*)<DataContent>(.*)<\/DataContent>/isU' , $strTmp , $matches );
$bodyHtml = utf8_decode ( @ $matches [ 1 ]);
preg_match ( '/<NewsComponent>(?:.*)<Role FormalName="Body"\/>(?:.*)<ContentItem (?:.*)<Format FormalName="BW-Text"\/>(?:.*)<DataContent>(.*)<\/DataContent>/isU' , $strTmp , $matches );
$bodyTxt = utf8_decode ( @ $matches [ 1 ]);
preg_match_all ( '/<Property FormalName="BWIndustryKeywords" Value="(.*)"\/>/iU' , $strTmp , $matches );
$industry =@ implode ( ';' , @ $matches [ 1 ]);
preg_match_all ( '/<Property FormalName="BWCategoryKeywords" Value="(.*)"\/>/iU' , $strTmp , $matches );
$category =@ implode ( ';' , @ $matches [ 1 ]);
preg_match_all ( '/<Property FormalName="BWCountryKeywords" Value="(.*)"\/>/iU' , $strTmp , $matches );
$country =@ implode ( ';' , @ $matches [ 1 ]);
preg_match ( '/<Language FormalName="(.*)"\/>/i' , $strTmp , $matches );
$language =@ $matches [ 1 ];
$tabInsert = array ( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName' => $dom -> getValueFromTag ( 'SlugLine' ),
'companyIsin' => $isin ,
'companyMnemo' => $mnemo ,
'companyWebSite' => $siteWeb ,
'companyLogoUrl' => $logo ,
/* 'companySiren' => $dom -> getValueFromTag ( 'COMPANY_SIREN' ),
'companyRic' => $dom -> getValueFromTag ( 'COMPANY_RIC' ),
'companyProfile' => $dom -> getValueFromTag ( 'URL3' ),
'companyAnnualReport' => $dom -> getValueFromTag ( 'URL4' ),
'companyInfoActionnaires' => $dom -> getValueFromTag ( 'URL5' ), */
'pressReleaseId' => $pressReleaseId ,
'pressReleaseDate' => substr ( str_replace ( 'T' , '' , $dom -> getValueFromTag ( 'DateAndTime' )), 0 , 14 ), // 20070906T190800+0000
'pressReleaseTitle' => $dom -> getValueFromTag ( 'HeadLine' ),
'pressReleaseText' => $bodyTxt ,
'pressReleaseHtml' => $bodyHtml ,
'indexMarketPlace' => $exchange ,
'indexTheme' => $category ,
'indexSector' => $industry ,
'indexCountry' => $country ,
'indexLanguage' => $language ,
/*
'pressReleaseAttachments' => $dom -> getValueFromTag ( 'ATTACHMENTS_URL1' ),
'pressReleaseUrl' => $dom -> getValueFromTag ( 'PRESS_RELEASE_URL_CNG' ),
'indexAll' => $dom -> getValueFromTag ( 'INDEX_ALL' ),
'indexQuoteInd' => $dom -> getValueFromTag ( 'INDEX_QUOTE_INDICATOR' ), */
'source' => 'B' ,
);
$ret = $iDb -> insert ( 'articles' , $tabInsert );
if ( ! $ret && $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - ERREUR " . $iDb -> getLastError () . EOL ;
print_r ( $tabInsert );
die ();
} elseif ( $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - Communiqu<71> n<> $pressReleaseId enregistr<74> avec succ<63> s. " . EOL ;
}
unset ( $dom );
}
echo date ( 'Y/m/d - H:i:s' ) . " - FIN de l'int<6E> gration des flux BusinessWire. " . EOL ;
/**
** INTEGRATION DES COMMUNIQUES " LES ECHOS "
**/
echo date ( 'Y/m/d - H:i:s' ) . " - DEBUT de l'int<6E> gration des flux Les Echos... " . EOL ;
$tabFichier = array ();
$dh = opendir ( LESECHOS_LOCAL_DIR . 'syndication/' );
while ( false !== ( $filename = readdir ( $dh ))) {
if ( $filename <> '.' && $filename <> '..' && substr ( $filename , - 4 ) == '.xml' )
$tabFichier [] = $filename ;
}
foreach ( $tabFichier as $k => $nomFichier ) {
$tabTmp = file ( LESECHOS_LOCAL_DIR . 'syndication/' . $nomFichier );
if ( ! $tabTmp ) die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Fichier local $nomFichier inexistant ! " . EOL );
if ( stripos ( $tabTmp [ 0 ], 'ISO-8859-1' ) > 0 ) $encoding = 'ISO-8859-1' ;
elseif ( stripos ( $tabTmp [ 0 ], 'UTF-8' ) > 0 ) $encoding = 'UTF-8' ;
else die ( date ( 'Y/m/d - H:i:s' ) . " - ERREUR : Encoding " . $tabTmp [ 0 ] . " non g<> r<EFBFBD> ! " . EOL );
$dom = @ new DomDocument2 ();
$dom -> load ( LESECHOS_LOCAL_DIR . 'syndication/' . $nomFichier );
$pressReleaseId = $dom -> getValueFromTag ( 'CODE' );
$language = strtoupper ( $dom -> getValueFromTag ( 'PRESS_RELEASE_LANGUAGE' ));
$tabInsert = array ( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName' => $dom -> getValueFromTag ( 'COMPANY_NAME' ),
'companyIsin' => $dom -> getValueFromTag ( 'COMPANY_ISIN' ),
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
//'companyRic'=>$dom->getValueFromTag('COMPANY_RIC'),
//'companyLogoUrl'=>$dom->getValueFromTag('URL1'),
//'companyWebSite'=>$dom->getValueFromTag('URL2'),
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
//'companyInfoActionnaires'=>$dom->getValueFromTag('URL5'),
'pressReleaseId' => $pressReleaseId ,
'pressReleaseDate' => WDate :: dateT ( 'd/m/Y' , 'Y-m-d' , $dom -> getValueFromTag ( 'PRESS_RELEASE_PUBDATE' )) . ' ' .
$dom -> getValueFromTag ( 'PRESS_RELEASE_PUBTIME' ),
'pressReleaseTitle' => $dom -> getValueFromTag ( 'PRESS_RELEASE_TITLE' ),
'pressReleaseText' => $dom -> getValueFromTag ( 'TEXT_FORMAT' ),
'pressReleaseHtml' => $dom -> getValueFromTag ( 'HTML_FORMAT' ),
'pressReleaseAttachments' => $dom -> getValueFromTag ( 'LINK_ORIGINAL' ),
'pressReleaseUrl' => $dom -> getValueFromTag ( 'LINK' ),
//'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme' => $dom -> getValueFromTag ( 'PRESS_RELEASE_THEME' ),
//'indexSector'=>$dom->getValueFromTag('INDEX_SECTOR'),
//'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage' => $language ,
//'indexMarketPlace'=>$dom->getValueFromTag('INDEX_MARKET_PLACE'),
//'indexQuoteInd'=>$dom->getValueFromTag('INDEX_QUOTE_INDICATOR'),
'source' => 'E' ,
);
if ( $language == 'FR' ) {
$ret = $iDb -> insert ( 'articles' , $tabInsert );
if ( ! $ret && $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - ERREUR " . $iDb -> getLastError () . EOL ;
print_r ( $tabInsert );
die ();
} elseif ( $iDb -> getLastErrorNum () <> 1062 ) {
echo date ( 'Y/m/d - H:i:s' ) . " - Communiqu<71> n<> $pressReleaseId enregistr<74> avec succ<63> s. " . EOL ;
}
} else
echo date ( 'Y/m/d - H:i:s' ) . " - Langue du communiqu<71> non int<6E> gr<67> e en base ( $language ). " . EOL ;
unset ( $dom );
}
?>