batch/1.1/old/getRSS.php
2013-06-19 08:24:49 +00:00

683 lines
31 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/php -c/var/www/batch/config/php_batch_sd.ini
<?php
/**
** Attention : Si ajout d'une nouvelle source modifier la table articles !
** ALTER TABLE `articles` CHANGE `source` `source` ENUM( 'A', 'B', 'D', 'E', 'H' ) NOT NULL DEFAULT 'A';
**/
echo date ('Y/m/d - H:i:s')." - Début de lecture des flux RSS...".EOL;
include_once(FWK_PATH.'common/chiffres.php');
include_once(FWK_PATH.'common/dates.php');
include_once(FWK_PATH.'common/rss.php');
include_once(FWK_PATH.'common/curl.php');
$iDb=new WDB('presse');
$iDb2=new WDB('sdv1');
$iRss=new atomRss();
$cours=$doInvestir=$doBoursier=$doPQR=$modeVerbose=false;
$heureExec=date('Hi')*1;
$strInfoScript='Usage : '.basename($argv[0]). " [OPTION]
Collecte des flux RSS.
Options :
-i Traiter les flux RSSS Isin de Invstir
-b Traiter les flux RSSS Isin de Boursier
-p Traiter la PQR
-v Mode verbeux
";
$argv=$_SERVER['argv'];
if ($_SERVER['argc']>1)
{
for ($i=1; isset($argv[$i]); $i++) {
if (substr($argv[$i],0,1)=='-') {
switch (substr($argv[$i],1,1)) {
case 'i': $doInvestir=true; break;
case 'b': $doBoursier=true; break;
case 'p': $doPQR=true; break;
case 'v': $modeVerbose=true; break;
case '-':
case '?': die($strInfoScript); break;
default: die('Option '. $argv[$i] . " inconnue !\n"); break;
}
}
}
}
if (!$doInvestir && !$doBoursier) $doPQR=true;
$lstRegex=$iDb->select('pqr_filtres', "id, nomFiltre, motsClefs", '1');
foreach ($lstRegex as $iReg=>$tReg)
$tabRegEx[]=array( 'id' => $tReg['id'],
'nom' => $tReg['nomFiltre'],
'mots' => $tReg['motsClefs'],
);
$nbFiltres=count($tabRegEx);
echo date ('Y/m/d - H:i:s')." - Nombre de règles de filtrages : ".$nbFiltres.EOL;
//print_r($tabRegEx);
//die();
if ($doInvestir) { //$heureExec>=2000 && $heureExec<2030) {
$lstRss=$iDb2->select('bourse_isin', "11 as id, CONCAT('http://www.investir.fr/RSS/RSS.php?codeISIN=',code_isin,'&type=infos-conseils') AS url, code_isin, 'horaire' AS frequence", "code_isin<>'' GROUP BY code_isin");
$cours=true;
$typeFlux='ISIN Investir';
} elseif ($doBoursier) { //heureExec>=2100 && $heureExec<2130) {
$lstRss=$iDb2->select('bourse_isin', "18 as id, CONCAT('http://www.boursier.com/syndication/rss/news/',code_isin,'/FR') AS url, code_isin, 'horaire' AS frequence", "code_isin<>'' GROUP BY code_isin");
$cours=true;
$typeFlux='ISIN Boursier';
} else {
$lstRss=$iDb->select('pqr_rss', "id, url, titreRss, siteWeb, description, dateFluxRss, dateDwl, '' as code_isin, frequence", 'actif=1');
$typeFlux='PQR';
}
shuffle($lstRss);
$nbFlux=count($lstRss);
echo date ('Y/m/d - H:i:s')." - Nombre de flux '$typeFlux' à traiter : ".$nbFlux.EOL;
foreach ($lstRss as $iFlux=>$tabFlux) {
$urlRss=$tabFlux['url'];
$idRss=$tabFlux['id'];
$isin=$tabFlux['code_isin'];
$horaire=$tabFlux['frequence'];
//echo "Je charge le flux $urlRss\n";
if ($idRss<>11 && $idRss<>18 && $horaire=='quotidien' && $heureExec>800) {
echo date ('Y/m/d - H:i:s')." - Feed $horaire #$iFlux/$nbFlux : $urlRss - Flux QUOTIDIEN ignoré !".EOL;
continue;
}/* else
echo date ('Y/m/d - H:i:s')." - Feed $horaire #$iFlux : $urlRss - $idRss, $horaire,$heureExec".EOL;
*/
if ($idRss==0 || $idRss==18) randsleep(1,2);
$tabFeed=@$iRss->loadRss($urlRss);
echo date ('Y/m/d - H:i:s')." - Feed $horaire #$iFlux/$nbFlux : $urlRss - Code Retour HTTP : ".$iRss->codeErreurHttp.EOL;
if (count($tabFeed)>0) {
//print_r($tabFeed);
foreach ($tabFeed as $iFeed=>$tabArticle) {
$dateRss=date("YmdHis",strtotime($tabArticle['updated']));
if ($tabArticle['type']==0 && !$cours) {
$tabUpdate=array('titreRss'=>trim(utf8_decode($tabArticle['title'])),
'siteWeb'=>trim($tabArticle['link']),
'description'=>trim(utf8_decode($tabArticle['description'])),
'dateFluxRss'=>$dateRss,
'dateDwl'=>date('YmdHis'));
if($iDb->update('pqr_rss',$tabUpdate,"id=$idRss"))
echo date ('Y/m/d - H:i:s')." - Feed $horaire #$iFlux : Mise à jour des informations du flux $urlRss".EOL;
else
echo date ('Y/m/d - H:i:s')." - Feed $horaire #$iFlux : ERREUR lors de la mise à jour des informations du flux $urlRss".EOL;
}
elseif ($tabArticle['type']==1) {
$urlArticle=trim($tabArticle['link']);
$idArticle=md5($dateRss.$urlArticle);
foreach ($tabRegEx as $iReg=>$tReg) {
$regEx=$tReg['mots'];
$regId=$tReg['id'];
$regNom=$tReg['nom'];
$titre=trim(utf8_decode($tabArticle['title']));
$description=trim(utf8_decode($tabArticle['description']));
sleep(1);
$page=getUrl($urlArticle);
$article=$page['body'];
if (preg_match("/$regEx/Uis", $titre.' '.$description.' '.$article)) {
$tabInsert=array('titre'=>$titre,
'lien'=>trim(utf8_decode($urlArticle)),
'description'=>$description,
'article'=>$article,
'dateRss'=>$dateRss,
'idArticle'=>$idArticle,
'isin'=>$isin,
'idRss'=>$idRss);
$id=$iDb->insert('pqr_articles',$tabInsert);
if ($id) {
//if($iDb->update('pqr_articles',$tabUpdate,'id='.$id))
echo date ('Y/m/d - H:i:s')." - Feed $horaire #$iFlux/$nbFlux : Insertion de l'article n°$id".EOL;
} elseif (mysql_errno()<>1062)
die(mysql_errno().': '.mysql_error());
}
}
}
}
} else {
//
echo date ('Y/m/d - H:i:s')." - Feed #$iFlux/$nbFlux : $urlRss - Code Retour HTTP : ".$iRss->codeErreurHttp.EOL;
$tabUpdate=array('dernierCodeRetour'=>$iRss->codeErreurHttp);
if($iDb->update('pqr_rss',$tabUpdate,"id=$idRss"))
echo date ('Y/m/d - H:i:s')." - Feed $horaire #$iFlux/$nbFlux : Mise à jour des informations du flux $urlRss".EOL;
}
//<channel><title>Economie - Le Monde.fr</title><link>http://www.lemonde.fr</link><description>Toute l'actualité au moment de la connexion</description><language>en</language><copyright>Copyright Le Monde.fr</copyright><pubDate>Thu, 05 Feb 2009 11:51:14 GMT</pubDate><lastBuildDate>Thu, 05 Feb 2009 11:51:14 GMT</lastBuildDate><ttl>30</ttl><image><title>Economie - Le Monde.fr</title><url>http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_rss.gif</url><link>http://www.lemonde.fr</link></image>
}
echo date('Y/m/d - H:i:s') ." - FIN du script.".EOL;
die();
/*
INSERT INTO `presse`.`` (
`id` ,
`url` ,
`dateInsert`
)
VALUES (
NULL , 'http://www.lemondeinformatique.fr/flux-rss/ssii/page-1.html', ''
);
*/die();
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux Hugin...".EOL;
$ret=ftp_mget(HUGIN_FTP_URL, HUGIN_FTP_USER, HUGIN_FTP_PASS, '*.xml', HUGIN_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux Hugin en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux Hugin en FTP ($ret fichiers récupérés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux ActusNews...".EOL;
$ret=ftp_mget(ACTUSNEWS_FTP_URL, ACTUSNEWS_FTP_USER, ACTUSNEWS_FTP_PASS, '*.xml', ACTUSNEWS_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux ActusNews en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux ActusNews en FTP ($ret fichiers récupérés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux Di Release...".EOL;
$ret=ftp_mget(DIRELEASE_FTP_URL, DIRELEASE_FTP_USER, DIRELEASE_FTP_PASS, '*.xml', DIRELEASE_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux Di Release en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux Di Release en FTP ($ret fichiers récupérés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux BusinessWire...".EOL;
$dh = opendir(BUSINESSWIRE_LOCAL_DIR_INCOMING);
$ret=0;
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
if (!file_exists(BUSINESSWIRE_LOCAL_DIR . $filename) && filesize(BUSINESSWIRE_LOCAL_DIR_INCOMING . $filename)>0) {
copy(BUSINESSWIRE_LOCAL_DIR_INCOMING . $filename,BUSINESSWIRE_LOCAL_DIR . $filename);
$ret++;
}
}
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux BusinessWire ($ret fichiers copiés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux Les Echos...".EOL;
$ret=ftp_mget(LESECHOS_FTP_URL, LESECHOS_FTP_USER, LESECHOS_FTP_PASS, 'syndication/*.xml', LESECHOS_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux Les Echos en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux Les Echos en FTP ($ret fichiers récupérés).".EOL;
/**
** INTEGRATION DES COMMUNIQUES "HUGIN"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux Hugin...".EOL;
$tabFichier=array();
$dh = opendir(HUGIN_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(HUGIN_LOCAL_DIR.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré !".EOL);
$dom = @new DomDocument2();
$dom->load(HUGIN_LOCAL_DIR.$nomFichier);
$pressReleaseId=$dom->getValueFromTag('PRESS_RELEASE_ID');
$tabInsert=array( 'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$dom->getValueFromTag('COMPANY_NAME'),
'companyIsin'=>$dom->getValueFromTag('COMPANY_ISIN'),
'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>$dom->getValueFromTag('COMPANY_RIC'),
'companyLogoUrl'=>$dom->getValueFromTag('URL1'),
'companyWebSite'=>$dom->getValueFromTag('URL2'),
'companyProfile'=>$dom->getValueFromTag('URL3'),
'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>$dom->getValueFromTag('URL5'),
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>$dom->getValueFromTag('PRESS_RELEASE_DATE_TIME'),
'pressReleaseTitle'=>$dom->getValueFromTag('PRESS_RELEASE_TITLE'),
'pressReleaseText'=>$dom->getValueFromTag('TEXT_FORMAT'),
'pressReleaseHtml'=>$dom->getValueFromTag('HTML_FORMAT'),
'pressReleaseAttachments'=>$dom->getValueFromTag('ATTACHMENTS_URL1'),
'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('INDEX_THEME'),
'indexSector'=>$dom->getValueFromTag('INDEX_SECTOR'),
'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$dom->getValueFromTag('INDEX_LANGUAGE'),
'indexMarketPlace'=>$dom->getValueFromTag('INDEX_MARKET_PLACE'),
'indexQuoteInd'=>$dom->getValueFromTag('INDEX_QUOTE_INDICATOR'),
'source'=>'H',
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux Hugin.".EOL;
/**
** INTEGRATION DES COMMUNIQUES "ACTUSNEWS"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux ActusNews...".EOL;
$tabFichier=array();
$dh = opendir(ACTUSNEWS_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(ACTUSNEWS_LOCAL_DIR.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré !".EOL);
foreach ($tabTmp as $i_ligne => $ligne) {
if (stripos($ligne, '<communique ')!== false) {//<communique
//echo date ('Y/m/d - H:i:s')." - Première ligne du communiqué...".EOL;
if (stripos($ligne, 'langue="FR"')>0) {
//echo date ('Y/m/d - H:i:s')." - Communiqué en français ($ligne).".EOL;
$french=true;
} else {
//echo date ('Y/m/d - H:i:s')." - Langue non intégrée en base ($ligne) !".EOL;
$french=false;
break;
}
break;
}
}
if ($french) {
//echo date ('Y/m/d - H:i:s')." - Chargement du communiqué ...".EOL;
$dom = @new DomDocument2();
$dom->load(ACTUSNEWS_LOCAL_DIR.$nomFichier);
$pressReleaseId=$dom->getValueFromTag('id');
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>trim($dom->getValueFromTag('raisonsociale')),
'companyIsin'=>trim($dom->getValueFromTag('codeisin')),
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>trim($dom->getValueFromTag('code_reuters')),
'companyLogoUrl'=>trim($dom->getValueFromTag('logo')),
'companyWebSite'=>trim($dom->getValueFromTag('site')),
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>trim($dom->getValueFromTag('site_investisseur')),
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>trim($dom->getValueFromTag('miseajour')),
'pressReleaseTitle'=>trim($dom->getValueFromTag('titre')),
'pressReleaseText'=>trim(strtr(html_entity_decode(strip_tags($dom->getValueFromTag('contenu'))),array('&rsquo;'=>''))),
'pressReleaseHtml'=>trim($dom->getValueFromTag('contenu')),
'pressReleaseAttachments'=>trim($dom->getValueFromTag('pdf')),
//'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
/*'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('INDEX_THEME'),
'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$dom->getValueFromTag('INDEX_LANGUAGE'),
*/
'indexQuoteInd'=>trim($dom->getValueFromTag('indice')),
'indexSector'=>trim($dom->getValueFromTag('secteur')),
'indexMarketPlace'=>trim($dom->getValueFromTag('marche_cotation')),
'source'=>'A',
'companyBloomberg'=>trim($dom->getValueFromTag('code_bloomberg')),
'companyMnemo'=>trim($dom->getValueFromTag('mnemo')),
'companyNbTitles'=>trim($dom->getValueFromTag('nb_de_titres')),
'companyFootsie'=>trim($dom->getValueFromTag('footsie')),
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux ActusNews...".EOL;
/**
** INTEGRATION DES COMMUNIQUES "DIRELEASE"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux DiRelease...".EOL;
$tabFichier=array();
$dh = opendir(DIRELEASE_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
/** @todo Pour DiRelease, Ne pas utiliser le parser XML mais uniquement des expr. régulière car XML de merde.
** Prendre tous les ISIN et rechercher le SIREN.
**/
foreach ($tabFichier as $k => $nomFichier) {
$strTmp=file_get_contents(DIRELEASE_LOCAL_DIR.$nomFichier);
/*$strTmp=preg_replace('/\s+/is',' ', $strTmp);
$fp=fopen(DIRELEASE_LOCAL_DIR.'new_'.$nomFichier, 'w');
fwrite($fp, $strTmp);
fclose($fp);
$tabTmp=file(DIRELEASE_LOCAL_DIR.'new_'.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
*/
if (stripos($strTmp, 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($strTmp, 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding non géré !".EOL);
if (stripos($strTmp, 'langue="FR"')>0) {
echo date ('Y/m/d - H:i:s')." - Chargement du communiqué en français ($ligne).".EOL;
preg_match('/<id>(.*)<\/id>/isU',$strTmp,$matches);
$pressReleaseId=trim(@$matches[1]);
preg_match('/<raisonsociale>(.*)<\/raisonsociale>/isU',$strTmp,$matches);
$raisonsociale=trim(@$matches[1]);
preg_match('/<codeisin>(.*)<\/codeisin>/isU',$strTmp,$matches);
$codeisin=trim(@$matches[1]);
preg_match('/<code_reuters>(.*)<\/code_reuters>/isU',$strTmp,$matches);
$code_reuters=trim(@$matches[1]);
preg_match('/<logo>(.*)<\/logo>/isU',$strTmp,$matches);
$logo=trim(@$matches[1]);
preg_match('/<site>(.*)<\/site>/isU',$strTmp,$matches);
$site=trim(@$matches[1]);
preg_match('/<site_investisseur>(.*)<\/site_investisseur>/isU',$strTmp,$matches);
$site_investisseur=trim(@$matches[1]);
preg_match('/<logo>(.*)<\/logo>/isU',$strTmp,$matches);
$logo=trim(preg_replace('/\s/','',@$matches[1]));
preg_match('/<miseajour>(.*)<\/miseajour>/isU',$strTmp,$matches);
$miseajour=trim(@$matches[1]);
preg_match('/<titre>(.*)<\/titre>/isU',$strTmp,$matches);
$titre=trim(@$matches[1]);
preg_match('/<contenu>(.*)<\/contenu>/isU',$strTmp,$matches);
$contenu=trim(strtr(@$matches[1],array('<![CDATA['=>'', ']]>'=>'')));
preg_match('/<pdf>(.*)<\/pdf>/isU',$strTmp,$matches);
$pdf=preg_replace('/\s/','',@$matches[1]);
preg_match('/<indice>(.*)<\/indice>/isU',$strTmp,$matches);
$indice=trim(@$matches[1]);
preg_match('/<secteur>(.*)<\/secteur>/isU',$strTmp,$matches);
$secteur=trim(@$matches[1]);
preg_match('/<marche_cotation>(.*)<\/marche_cotation>/isU',$strTmp,$matches);
$marche_cotation=trim(@$matches[1]);
preg_match('/<code_bloomberg>(.*)<\/code_bloomberg>/isU',$strTmp,$matches);
$code_bloomberg=trim(@$matches[1]);
preg_match('/<mnemo>(.*)<\/mnemo>/isU',$strTmp,$matches);
$mnemo=trim(@$matches[1]);
preg_match('/<nb_de_titres>(.*)<\/nb_de_titres>/isU',$strTmp,$matches);
$nb_de_titres=trim(@$matches[1]);
preg_match('/<footsie>(.*)<\/footsie>/isU',$strTmp,$matches);
$footsie=trim(@$matches[1]);
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$raisonsociale,
'companyIsin'=>$codeisin,
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>$code_reuters,
'companyLogoUrl'=>$logo,
'companyWebSite'=>$site,
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>$site_investisseur,
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>$miseajour,
'pressReleaseTitle'=>$titre,
'pressReleaseText'=>trim(strtr(html_entity_decode(strip_tags($contenu)),array('&rsquo;'=>''))),
'pressReleaseHtml'=>$contenu,
'pressReleaseAttachments'=>$pdf,
/*
'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('INDEX_THEME'),
'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$dom->getValueFromTag('INDEX_LANGUAGE'),
*/
'indexQuoteInd'=>$indice,
'indexSector'=>$secteur,
'indexMarketPlace'=>$marche_cotation,
'source'=>'D',
'companyBloomberg'=>$code_bloomberg,
'companyMnemo'=>$mnemo,
'companyNbTitles'=>$nb_de_titres,
'companyFootsie'=>$footsie,
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux DiRelease...".EOL;
/**
** INTEGRATION DES COMMUNIQUES "BUSINESSWIRE"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux BusinessWire...".EOL;
$tabFichier=array();
$dh = opendir(BUSINESSWIRE_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(BUSINESSWIRE_LOCAL_DIR.$nomFichier);
$strTmp=implode("\n", $tabTmp);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local ".BUSINESSWIRE_LOCAL_DIR."$nomFichier inexistant !".EOL);
if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré !".EOL);
/*<DateAndTime>20070906T190800+0000</DateAndTime>
<NewsService FormalName="Business Wire"/>
<NewsProduct FormalName="BUSINESS WIRE"/>
</NewsEnvelope>
<NewsItem>
<Identification>
<NewsIdentifier>
<ProviderId>businesswire.com</ProviderId>
<DateId>20010714</DateId>
<>20070906006073</NewsItemId>
<RevisionId PreviousRevision="0" Update="N">1</RevisionId>
<PublicIdentifier>urn:newsml:businesswire.com:20010714:20070906006073:1</PublicIdentifier>
</NewsIdentifier>
</Identification>
<NewsManagement>
<NewsItemType FormalName="News"/>
<FirstCreated>20070906T190800+0000</FirstCreated>
<ThisRevisionCreated>20070906T190800+0000</ThisRevisionCreated>
<Status FormalName="Usable"/>
<AssociatedWith NewsItem="businesswire.com:20010714:242009"/>
</NewsManagement>
<NewsComponent>
<BasisForChoice Rank="1">./NewsComponent/DescriptiveMetadata/Language</BasisForChoice>
<NewsLines>
<CopyrightLine>Copyright Business Wire 2007</CopyrightLine>
</NewsLines>
<AdministrativeMetadata>
<Source>
<Party FormalName="CIT Group Inc."/>
</Source>
<Contributor>
<Comment FormalName="BWoffices">NY</Comment>
<Party FormalName="DB" Scheme="BWEditor"/>
</Contributor>
</AdministrativeMetadata>
<DescriptiveMetadata>
<Language FormalName="fr"/>
<Genre FormalName="Release"/>
</DescriptiveMetadata>
<Metadata>
<MetadataType FormalName="BWKeywords"/>
<Property FormalName="BWCountryKeywords" Value="United States"/>
<Property FormalName="BWRegionKeywords" Value="Europe"/>
<Property FormalName="BWRegionKeywords" Value="North America"/>
<Property FormalName="BWIndustryKeywords" Value="Manufacturing"/>
<Property FormalName="BWIndustryKeywords" Value="Aerospace"/>
<Property FormalName="BWCategoryKeywords" Value="Product/Service"/>
<Property FormalName="BWIndustryKeywords" Value="Professional Services"/>
<Property FormalName="BWIndustryKeywords" Value="Finance"/>
<Property FormalName="BWStateKeywords" Value="New York"/>
<Property FormalName="BWCountryKeywords" Value="Ireland"/>
</Metadata>
<Metadata>
<MetadataType FormalName="Securities Identifier"/>
<Property FormalName="" Value="CIT"/>
<Property FormalName="Exchange" Value="NYSE"/>
<Property FormalName="ISIN" Value="US1255811085"/>
<Property FormalName="SlugLine Display Order" Value="1"/>
</Metadata>
<NewsComponent>
<BasisForChoice Rank="1">./NewsComponent/Role</BasisForChoice>
<NewsLines>
<>CIT consolide son engagement envers l'économie irlandaise et agrandit son centre opérationnel de Dublin</HeadLine>*/
$dom = @new DomDocument2();
$dom->load(BUSINESSWIRE_LOCAL_DIR.$nomFichier);
$pressReleaseId=$dom->getValueFromTag('NewsItemId');
preg_match('/<Property FormalName="ISIN" Value="(.*)"\/>/i',$strTmp,$matches);
$isin=@$matches[1];
preg_match('/<Property FormalName="Exchange" Value="(.*)"\/>/i',$strTmp,$matches);
$exchange=@$matches[1];
preg_match('/<Property FormalName="Ticker Symbol" Value="(.*)"\/>/i',$strTmp,$matches);
$mnemo=@$matches[1];
preg_match('/<NewsLineType FormalName="ClickThru URL"(?:.*)<NewsLineText>(.*)<\/NewsLineText>/isU',$strTmp,$matches);
$siteWeb=@$matches[1];
preg_match('/<NewsComponent>(?:.*)<Role FormalName="Logo"\/>(?:.*)<ContentItem Duid="(?:.*)" Href="(.*)">(?:.*)<\/NewsComponent>/isU',$strTmp,$matches);
$logo=urldecode(@$matches[1]);
preg_match('/<NewsComponent>(?:.*)<Role FormalName="Body"\/>(?:.*)<ContentItem (?:.*)<Format FormalName="XHTML"\/>(?:.*)<DataContent>(.*)<\/DataContent>/isU',$strTmp,$matches);
$bodyHtml=utf8_decode(@$matches[1]);
preg_match('/<NewsComponent>(?:.*)<Role FormalName="Body"\/>(?:.*)<ContentItem (?:.*)<Format FormalName="BW-Text"\/>(?:.*)<DataContent>(.*)<\/DataContent>/isU',$strTmp,$matches);
$bodyTxt=utf8_decode(@$matches[1]);
preg_match_all('/<Property FormalName="BWIndustryKeywords" Value="(.*)"\/>/iU',$strTmp,$matches);
$industry=@implode(';',@$matches[1]);
preg_match_all('/<Property FormalName="BWCategoryKeywords" Value="(.*)"\/>/iU',$strTmp,$matches);
$category=@implode(';',@$matches[1]);
preg_match_all('/<Property FormalName="BWCountryKeywords" Value="(.*)"\/>/iU',$strTmp,$matches);
$country=@implode(';',@$matches[1]);
preg_match('/<Language FormalName="(.*)"\/>/i',$strTmp,$matches);
$language=@$matches[1];
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$dom->getValueFromTag('SlugLine'),
'companyIsin'=>$isin,
'companyMnemo'=>$mnemo,
'companyWebSite'=>$siteWeb,
'companyLogoUrl'=>$logo,
/*'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>$dom->getValueFromTag('COMPANY_RIC'),
'companyProfile'=>$dom->getValueFromTag('URL3'),
'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>$dom->getValueFromTag('URL5'),*/
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>substr(str_replace('T','',$dom->getValueFromTag('DateAndTime')),0,14),// 20070906T190800+0000
'pressReleaseTitle'=>$dom->getValueFromTag('HeadLine'),
'pressReleaseText'=>$bodyTxt,
'pressReleaseHtml'=>$bodyHtml,
'indexMarketPlace'=>$exchange,
'indexTheme'=>$category,
'indexSector'=>$industry,
'indexCountry'=>$country,
'indexLanguage'=>$language,
/*
'pressReleaseAttachments'=>$dom->getValueFromTag('ATTACHMENTS_URL1'),
'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexQuoteInd'=>$dom->getValueFromTag('INDEX_QUOTE_INDICATOR'),*/
'source'=>'B',
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux BusinessWire.".EOL;
/**
** INTEGRATION DES COMMUNIQUES "LES ECHOS"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux Les Echos...".EOL;
$tabFichier=array();
$dh = opendir(LESECHOS_LOCAL_DIR.'syndication/');
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(LESECHOS_LOCAL_DIR.'syndication/'.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré !".EOL);
$dom = @new DomDocument2();
$dom->load(LESECHOS_LOCAL_DIR.'syndication/'.$nomFichier);
$pressReleaseId=$dom->getValueFromTag('CODE');
$language=strtoupper($dom->getValueFromTag('PRESS_RELEASE_LANGUAGE'));
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$dom->getValueFromTag('COMPANY_NAME'),
'companyIsin'=>$dom->getValueFromTag('COMPANY_ISIN'),
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
//'companyRic'=>$dom->getValueFromTag('COMPANY_RIC'),
//'companyLogoUrl'=>$dom->getValueFromTag('URL1'),
//'companyWebSite'=>$dom->getValueFromTag('URL2'),
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
//'companyInfoActionnaires'=>$dom->getValueFromTag('URL5'),
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>WDate::dateT('d/m/Y', 'Y-m-d', $dom->getValueFromTag('PRESS_RELEASE_PUBDATE')).' '.
$dom->getValueFromTag('PRESS_RELEASE_PUBTIME'),
'pressReleaseTitle'=>$dom->getValueFromTag('PRESS_RELEASE_TITLE'),
'pressReleaseText'=>$dom->getValueFromTag('TEXT_FORMAT'),
'pressReleaseHtml'=>$dom->getValueFromTag('HTML_FORMAT'),
'pressReleaseAttachments'=>$dom->getValueFromTag('LINK_ORIGINAL'),
'pressReleaseUrl'=>$dom->getValueFromTag('LINK'),
//'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('PRESS_RELEASE_THEME'),
//'indexSector'=>$dom->getValueFromTag('INDEX_SECTOR'),
//'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$language,
//'indexMarketPlace'=>$dom->getValueFromTag('INDEX_MARKET_PLACE'),
//'indexQuoteInd'=>$dom->getValueFromTag('INDEX_QUOTE_INDICATOR'),
'source'=>'E',
);
if ($language=='FR') {
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
} else
echo date ('Y/m/d - H:i:s')." - Langue du communiqué non intégrée en base ($language).".EOL;
unset($dom);
}
?>