batch/1.1/old/getInfosReg.php
2012-10-16 07:44:31 +00:00

533 lines
25 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/php -c/var/www/batch/config/php_batch_sd.ini
<?php
/**
** Attention : Si ajout d'une nouvelle source modifier la table articles !
** ALTER TABLE `articles` CHANGE `source` `source` ENUM( 'A', 'B', 'D', 'E', 'H' ) NOT NULL DEFAULT 'A';
**/
include_once(FWK_PATH.'common/chiffres.php');
include_once(FWK_PATH.'common/dates.php');
include_once(FWK_PATH.'common/ftp.php');
$iDb=new WDB('presse');
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux Hugin...".EOL;
$ret=ftp_mget(HUGIN_FTP_URL, HUGIN_FTP_USER, HUGIN_FTP_PASS, '*.xml', HUGIN_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux Hugin en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux Hugin en FTP ($ret fichiers récupérés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux ActusNews...".EOL;
$ret=ftp_mget(ACTUSNEWS_FTP_URL, ACTUSNEWS_FTP_USER, ACTUSNEWS_FTP_PASS, '*.xml', ACTUSNEWS_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux ActusNews en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux ActusNews en FTP ($ret fichiers récupérés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux Di Release...".EOL;
$ret=ftp_mget(DIRELEASE_FTP_URL, DIRELEASE_FTP_USER, DIRELEASE_FTP_PASS, '*.xml', DIRELEASE_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux Di Release en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux Di Release en FTP ($ret fichiers récupérés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux BusinessWire...".EOL;
$dh = opendir(BUSINESSWIRE_LOCAL_DIR_INCOMING);
$ret=0;
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
if (!file_exists(BUSINESSWIRE_LOCAL_DIR . $filename) && filesize(BUSINESSWIRE_LOCAL_DIR_INCOMING . $filename)>0) {
copy(BUSINESSWIRE_LOCAL_DIR_INCOMING . $filename,BUSINESSWIRE_LOCAL_DIR . $filename);
$ret++;
}
}
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux BusinessWire ($ret fichiers copiés).".EOL;
echo date('Y/m/d - H:i:s') ." - DEBUT de la récupération des flux Les Echos...".EOL;
$ret=ftp_mget(LESECHOS_FTP_URL, LESECHOS_FTP_USER, LESECHOS_FTP_PASS, 'syndication/*.xml', LESECHOS_LOCAL_DIR, true);
if ($ret===false)
die (date ('Y/m/d - H:i:s')." - ERREUR : Récupération des flux Les Echos en FTP incorrecte !".EOL);
else
echo date ('Y/m/d - H:i:s')." - FIN de la récupération des flux Les Echos en FTP ($ret fichiers récupérés).".EOL;
/**
** INTEGRATION DES COMMUNIQUES "HUGIN"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux Hugin...".EOL;
$tabFichier=array();
$dh = opendir(HUGIN_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(HUGIN_LOCAL_DIR.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré !".EOL);
$dom = @new DomDocument2();
@$dom->load(HUGIN_LOCAL_DIR.$nomFichier);
$pressReleaseId=@$dom->getValueFromTag('PRESS_RELEASE_ID');
$tabInsert=array( 'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$dom->getValueFromTag('COMPANY_NAME'),
'companyIsin'=>$dom->getValueFromTag('COMPANY_ISIN'),
'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>$dom->getValueFromTag('COMPANY_RIC'),
'companyLogoUrl'=>$dom->getValueFromTag('URL1'),
'companyWebSite'=>$dom->getValueFromTag('URL2'),
'companyProfile'=>$dom->getValueFromTag('URL3'),
'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>$dom->getValueFromTag('URL5'),
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>$dom->getValueFromTag('PRESS_RELEASE_DATE_TIME'),
'pressReleaseTitle'=>$dom->getValueFromTag('PRESS_RELEASE_TITLE'),
'pressReleaseText'=>$dom->getValueFromTag('TEXT_FORMAT'),
'pressReleaseHtml'=>$dom->getValueFromTag('HTML_FORMAT'),
'pressReleaseAttachments'=>$dom->getValueFromTag('ATTACHMENTS_URL1'),
'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('INDEX_THEME'),
'indexSector'=>$dom->getValueFromTag('INDEX_SECTOR'),
'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$dom->getValueFromTag('INDEX_LANGUAGE'),
'indexMarketPlace'=>$dom->getValueFromTag('INDEX_MARKET_PLACE'),
'indexQuoteInd'=>$dom->getValueFromTag('INDEX_QUOTE_INDICATOR'),
'source'=>'H',
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux Hugin.".EOL;
/**
** INTEGRATION DES COMMUNIQUES "ACTUSNEWS"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux ActusNews...".EOL;
$tabFichier=array();
$dh = opendir(ACTUSNEWS_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(ACTUSNEWS_LOCAL_DIR.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré !".EOL);
foreach ($tabTmp as $i_ligne => $ligne) {
if (stripos($ligne, '<communique ')!== false) {//<communique
//echo date ('Y/m/d - H:i:s')." - Première ligne du communiqué...".EOL;
if (stripos($ligne, 'langue="FR"')>0) {
//echo date ('Y/m/d - H:i:s')." - Communiqué en français ($ligne).".EOL;
$french=true;
} else {
//echo date ('Y/m/d - H:i:s')." - Langue non intégrée en base ($ligne) !".EOL;
$french=false;
break;
}
break;
}
}
if ($french) {
//echo date ('Y/m/d - H:i:s')." - Chargement du communiqué ...".EOL;
$dom = @new DomDocument2();
$dom->load(ACTUSNEWS_LOCAL_DIR.$nomFichier);
$pressReleaseId=$dom->getValueFromTag('id');
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>trim($dom->getValueFromTag('raisonsociale')),
'companyIsin'=>trim($dom->getValueFromTag('codeisin')),
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>trim($dom->getValueFromTag('code_reuters')),
'companyLogoUrl'=>trim($dom->getValueFromTag('logo')),
'companyWebSite'=>trim($dom->getValueFromTag('site')),
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>trim($dom->getValueFromTag('site_investisseur')),
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>trim($dom->getValueFromTag('miseajour')),
'pressReleaseTitle'=>trim($dom->getValueFromTag('titre')),
'pressReleaseText'=>trim(strtr(html_entity_decode(strip_tags($dom->getValueFromTag('contenu'))),array('&rsquo;'=>''))),
'pressReleaseHtml'=>trim($dom->getValueFromTag('contenu')),
'pressReleaseAttachments'=>trim($dom->getValueFromTag('pdf')),
//'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
/*'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('INDEX_THEME'),
'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$dom->getValueFromTag('INDEX_LANGUAGE'),
*/
'indexQuoteInd'=>trim($dom->getValueFromTag('indice')),
'indexSector'=>trim($dom->getValueFromTag('secteur')),
'indexMarketPlace'=>trim($dom->getValueFromTag('marche_cotation')),
'source'=>'A',
'companyBloomberg'=>trim($dom->getValueFromTag('code_bloomberg')),
'companyMnemo'=>trim($dom->getValueFromTag('mnemo')),
'companyNbTitles'=>trim($dom->getValueFromTag('nb_de_titres')),
'companyFootsie'=>trim($dom->getValueFromTag('footsie')),
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux ActusNews...".EOL;
/**
** INTEGRATION DES COMMUNIQUES "DIRELEASE"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux DiRelease...".EOL;
$tabFichier=array();
$dh = opendir(DIRELEASE_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
/** @todo Pour DiRelease, Ne pas utiliser le parser XML mais uniquement des expr. régulière car XML de merde.
** Prendre tous les ISIN et rechercher le SIREN.
**/
foreach ($tabFichier as $k => $nomFichier) {
$strTmp=file_get_contents(DIRELEASE_LOCAL_DIR.$nomFichier);
/*$strTmp=preg_replace('/\s+/is',' ', $strTmp);
$fp=fopen(DIRELEASE_LOCAL_DIR.'new_'.$nomFichier, 'w');
fwrite($fp, $strTmp);
fclose($fp);
$tabTmp=file(DIRELEASE_LOCAL_DIR.'new_'.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
*/
if (stripos($strTmp, 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($strTmp, 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding non géré !".EOL);
if (stripos($strTmp, 'langue="FR"')>0) {
echo date ('Y/m/d - H:i:s')." - Chargement du communiqué en français ($ligne).".EOL;
preg_match('/<id>(.*)<\/id>/isU',$strTmp,$matches);
$pressReleaseId=trim(@$matches[1]);
preg_match('/<raisonsociale>(.*)<\/raisonsociale>/isU',$strTmp,$matches);
$raisonsociale=trim(@$matches[1]);
preg_match('/<codeisin>(.*)<\/codeisin>/isU',$strTmp,$matches);
$codeisin=trim(@$matches[1]);
preg_match('/<code_reuters>(.*)<\/code_reuters>/isU',$strTmp,$matches);
$code_reuters=trim(@$matches[1]);
preg_match('/<logo>(.*)<\/logo>/isU',$strTmp,$matches);
$logo=trim(@$matches[1]);
preg_match('/<site>(.*)<\/site>/isU',$strTmp,$matches);
$site=trim(@$matches[1]);
preg_match('/<site_investisseur>(.*)<\/site_investisseur>/isU',$strTmp,$matches);
$site_investisseur=trim(@$matches[1]);
preg_match('/<logo>(.*)<\/logo>/isU',$strTmp,$matches);
$logo=trim(preg_replace('/\s/','',@$matches[1]));
preg_match('/<miseajour>(.*)<\/miseajour>/isU',$strTmp,$matches);
$miseajour=trim(@$matches[1]);
preg_match('/<titre>(.*)<\/titre>/isU',$strTmp,$matches);
$titre=trim(@$matches[1]);
preg_match('/<contenu>(.*)<\/contenu>/isU',$strTmp,$matches);
$contenu=trim(strtr(@$matches[1],array('<![CDATA['=>'', ']]>'=>'')));
preg_match('/<pdf>(.*)<\/pdf>/isU',$strTmp,$matches);
$pdf=preg_replace('/\s/','',@$matches[1]);
preg_match('/<indice>(.*)<\/indice>/isU',$strTmp,$matches);
$indice=trim(@$matches[1]);
preg_match('/<secteur>(.*)<\/secteur>/isU',$strTmp,$matches);
$secteur=trim(@$matches[1]);
preg_match('/<marche_cotation>(.*)<\/marche_cotation>/isU',$strTmp,$matches);
$marche_cotation=trim(@$matches[1]);
preg_match('/<code_bloomberg>(.*)<\/code_bloomberg>/isU',$strTmp,$matches);
$code_bloomberg=trim(@$matches[1]);
preg_match('/<mnemo>(.*)<\/mnemo>/isU',$strTmp,$matches);
$mnemo=trim(@$matches[1]);
preg_match('/<nb_de_titres>(.*)<\/nb_de_titres>/isU',$strTmp,$matches);
$nb_de_titres=trim(@$matches[1]);
preg_match('/<footsie>(.*)<\/footsie>/isU',$strTmp,$matches);
$footsie=trim(@$matches[1]);
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$raisonsociale,
'companyIsin'=>$codeisin,
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>$code_reuters,
'companyLogoUrl'=>$logo,
'companyWebSite'=>$site,
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>$site_investisseur,
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>$miseajour,
'pressReleaseTitle'=>$titre,
'pressReleaseText'=>trim(strtr(html_entity_decode(strip_tags($contenu)),array('&rsquo;'=>''))),
'pressReleaseHtml'=>$contenu,
'pressReleaseAttachments'=>$pdf,
/*
'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('INDEX_THEME'),
'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$dom->getValueFromTag('INDEX_LANGUAGE'),
*/
'indexQuoteInd'=>$indice,
'indexSector'=>$secteur,
'indexMarketPlace'=>$marche_cotation,
'source'=>'D',
'companyBloomberg'=>$code_bloomberg,
'companyMnemo'=>$mnemo,
'companyNbTitles'=>$nb_de_titres,
'companyFootsie'=>$footsie,
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux DiRelease...".EOL;
/**
** INTEGRATION DES COMMUNIQUES "BUSINESSWIRE"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux BusinessWire...".EOL;
$tabFichier=array();
$dh = opendir(BUSINESSWIRE_LOCAL_DIR);
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(BUSINESSWIRE_LOCAL_DIR.$nomFichier);
$strTmp=implode("\n", $tabTmp);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local ".BUSINESSWIRE_LOCAL_DIR."$nomFichier inexistant !".EOL);
/*if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
elseif (stripos($tabTmp[0], '<?xml version = \'1.0\'?>')) $encoding='?';
// <?xml version = '1.0'?>
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré pour le fichier ".BUSINESSWIRE_LOCAL_DIR."$nomFichier !".EOL);*/
/*<DateAndTime>20070906T190800+0000</DateAndTime>
<NewsService FormalName="Business Wire"/>
<NewsProduct FormalName="BUSINESS WIRE"/>
</NewsEnvelope>
<NewsItem>
<Identification>
<NewsIdentifier>
<ProviderId>businesswire.com</ProviderId>
<DateId>20010714</DateId>
<>20070906006073</NewsItemId>
<RevisionId PreviousRevision="0" Update="N">1</RevisionId>
<PublicIdentifier>urn:newsml:businesswire.com:20010714:20070906006073:1</PublicIdentifier>
</NewsIdentifier>
</Identification>
<NewsManagement>
<NewsItemType FormalName="News"/>
<FirstCreated>20070906T190800+0000</FirstCreated>
<ThisRevisionCreated>20070906T190800+0000</ThisRevisionCreated>
<Status FormalName="Usable"/>
<AssociatedWith NewsItem="businesswire.com:20010714:242009"/>
</NewsManagement>
<NewsComponent>
<BasisForChoice Rank="1">./NewsComponent/DescriptiveMetadata/Language</BasisForChoice>
<NewsLines>
<CopyrightLine>Copyright Business Wire 2007</CopyrightLine>
</NewsLines>
<AdministrativeMetadata>
<Source>
<Party FormalName="CIT Group Inc."/>
</Source>
<Contributor>
<Comment FormalName="BWoffices">NY</Comment>
<Party FormalName="DB" Scheme="BWEditor"/>
</Contributor>
</AdministrativeMetadata>
<DescriptiveMetadata>
<Language FormalName="fr"/>
<Genre FormalName="Release"/>
</DescriptiveMetadata>
<Metadata>
<MetadataType FormalName="BWKeywords"/>
<Property FormalName="BWCountryKeywords" Value="United States"/>
<Property FormalName="BWRegionKeywords" Value="Europe"/>
<Property FormalName="BWRegionKeywords" Value="North America"/>
<Property FormalName="BWIndustryKeywords" Value="Manufacturing"/>
<Property FormalName="BWIndustryKeywords" Value="Aerospace"/>
<Property FormalName="BWCategoryKeywords" Value="Product/Service"/>
<Property FormalName="BWIndustryKeywords" Value="Professional Services"/>
<Property FormalName="BWIndustryKeywords" Value="Finance"/>
<Property FormalName="BWStateKeywords" Value="New York"/>
<Property FormalName="BWCountryKeywords" Value="Ireland"/>
</Metadata>
<Metadata>
<MetadataType FormalName="Securities Identifier"/>
<Property FormalName="" Value="CIT"/>
<Property FormalName="Exchange" Value="NYSE"/>
<Property FormalName="ISIN" Value="US1255811085"/>
<Property FormalName="SlugLine Display Order" Value="1"/>
</Metadata>
<NewsComponent>
<BasisForChoice Rank="1">./NewsComponent/Role</BasisForChoice>
<NewsLines>
<>CIT consolide son engagement envers l'économie irlandaise et agrandit son centre opérationnel de Dublin</HeadLine>*/
$dom = @new DomDocument2();
$dom->load(BUSINESSWIRE_LOCAL_DIR.$nomFichier);
$pressReleaseId=$dom->getValueFromTag('NewsItemId');
preg_match('/<Property FormalName="ISIN" Value="(.*)"\/>/i',$strTmp,$matches);
$isin=@$matches[1];
preg_match('/<Property FormalName="Exchange" Value="(.*)"\/>/i',$strTmp,$matches);
$exchange=@$matches[1];
preg_match('/<Property FormalName="Ticker Symbol" Value="(.*)"\/>/i',$strTmp,$matches);
$mnemo=@$matches[1];
preg_match('/<NewsLineType FormalName="ClickThru URL"(?:.*)<NewsLineText>(.*)<\/NewsLineText>/isU',$strTmp,$matches);
$siteWeb=@$matches[1];
preg_match('/<NewsComponent>(?:.*)<Role FormalName="Logo"\/>(?:.*)<ContentItem Duid="(?:.*)" Href="(.*)">(?:.*)<\/NewsComponent>/isU',$strTmp,$matches);
$logo=urldecode(@$matches[1]);
preg_match('/<NewsComponent>(?:.*)<Role FormalName="Body"\/>(?:.*)<ContentItem (?:.*)<Format FormalName="XHTML"\/>(?:.*)<DataContent>(.*)<\/DataContent>/isU',$strTmp,$matches);
$bodyHtml=utf8_decode(@$matches[1]);
preg_match('/<NewsComponent>(?:.*)<Role FormalName="Body"\/>(?:.*)<ContentItem (?:.*)<Format FormalName="BW-Text"\/>(?:.*)<DataContent>(.*)<\/DataContent>/isU',$strTmp,$matches);
$bodyTxt=utf8_decode(@$matches[1]);
preg_match_all('/<Property FormalName="BWIndustryKeywords" Value="(.*)"\/>/iU',$strTmp,$matches);
$industry=@implode(';',@$matches[1]);
preg_match_all('/<Property FormalName="BWCategoryKeywords" Value="(.*)"\/>/iU',$strTmp,$matches);
$category=@implode(';',@$matches[1]);
preg_match_all('/<Property FormalName="BWCountryKeywords" Value="(.*)"\/>/iU',$strTmp,$matches);
$country=@implode(';',@$matches[1]);
preg_match('/<Language FormalName="(.*)"\/>/i',$strTmp,$matches);
$language=@$matches[1];
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$dom->getValueFromTag('SlugLine'),
'companyIsin'=>$isin,
'companyMnemo'=>$mnemo,
'companyWebSite'=>$siteWeb,
'companyLogoUrl'=>$logo,
/*'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
'companyRic'=>$dom->getValueFromTag('COMPANY_RIC'),
'companyProfile'=>$dom->getValueFromTag('URL3'),
'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
'companyInfoActionnaires'=>$dom->getValueFromTag('URL5'),*/
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>substr(str_replace('T','',$dom->getValueFromTag('DateAndTime')),0,14),// 20070906T190800+0000
'pressReleaseTitle'=>$dom->getValueFromTag('HeadLine'),
'pressReleaseText'=>$bodyTxt,
'pressReleaseHtml'=>$bodyHtml,
'indexMarketPlace'=>$exchange,
'indexTheme'=>$category,
'indexSector'=>$industry,
'indexCountry'=>$country,
'indexLanguage'=>$language,
/*
'pressReleaseAttachments'=>$dom->getValueFromTag('ATTACHMENTS_URL1'),
'pressReleaseUrl'=>$dom->getValueFromTag('PRESS_RELEASE_URL_CNG'),
'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexQuoteInd'=>$dom->getValueFromTag('INDEX_QUOTE_INDICATOR'),*/
'source'=>'B',
);
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
unset($dom);
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux BusinessWire.".EOL;
/**
** INTEGRATION DES COMMUNIQUES "LES ECHOS"
**/
echo date('Y/m/d - H:i:s') ." - DEBUT de l'intégration des flux Les Echos...".EOL;
$tabFichier=array();
$dh = opendir(LESECHOS_LOCAL_DIR.'syndication/');
while (false !== ($filename = readdir($dh))) {
if ($filename<>'.' && $filename<>'..' && substr($filename, -4)=='.xml')
$tabFichier[] = $filename;
}
foreach ($tabFichier as $k => $nomFichier) {
$tabTmp=file(LESECHOS_LOCAL_DIR.'syndication/'.$nomFichier);
if (!$tabTmp) die(date ('Y/m/d - H:i:s')." - ERREUR : Fichier local $nomFichier inexistant !".EOL);
if (stripos($tabTmp[0], 'ISO-8859-1')>0) $encoding='ISO-8859-1';
elseif (stripos($tabTmp[0], 'UTF-8')>0) $encoding='UTF-8';
else die(date ('Y/m/d - H:i:s')." - ERREUR : Encoding ".$tabTmp[0]." non géré !".EOL);
$dom = @new DomDocument2();
$dom->load(LESECHOS_LOCAL_DIR.'syndication/'.$nomFichier);
$pressReleaseId=$dom->getValueFromTag('CODE');
$language=strtoupper($dom->getValueFromTag('PRESS_RELEASE_LANGUAGE'));
$tabInsert=array( //'companyId'=>$dom->getValueFromTag('COMPANY_ID'),
'companyName'=>$dom->getValueFromTag('COMPANY_NAME'),
'companyIsin'=>$dom->getValueFromTag('COMPANY_ISIN'),
//'companySiren'=>$dom->getValueFromTag('COMPANY_SIREN'),
//'companyRic'=>$dom->getValueFromTag('COMPANY_RIC'),
//'companyLogoUrl'=>$dom->getValueFromTag('URL1'),
//'companyWebSite'=>$dom->getValueFromTag('URL2'),
//'companyProfile'=>$dom->getValueFromTag('URL3'),
//'companyAnnualReport'=>$dom->getValueFromTag('URL4'),
//'companyInfoActionnaires'=>$dom->getValueFromTag('URL5'),
'pressReleaseId'=>$pressReleaseId,
'pressReleaseDate'=>WDate::dateT('d/m/Y', 'Y-m-d', $dom->getValueFromTag('PRESS_RELEASE_PUBDATE')).' '.
$dom->getValueFromTag('PRESS_RELEASE_PUBTIME'),
'pressReleaseTitle'=>$dom->getValueFromTag('PRESS_RELEASE_TITLE'),
'pressReleaseText'=>$dom->getValueFromTag('TEXT_FORMAT'),
'pressReleaseHtml'=>$dom->getValueFromTag('HTML_FORMAT'),
'pressReleaseAttachments'=>$dom->getValueFromTag('LINK_ORIGINAL'),
'pressReleaseUrl'=>$dom->getValueFromTag('LINK'),
//'indexAll'=>$dom->getValueFromTag('INDEX_ALL'),
'indexTheme'=>$dom->getValueFromTag('PRESS_RELEASE_THEME'),
//'indexSector'=>$dom->getValueFromTag('INDEX_SECTOR'),
//'indexCountry'=>$dom->getValueFromTag('INDEX_COUNTRY'),
'indexLanguage'=>$language,
//'indexMarketPlace'=>$dom->getValueFromTag('INDEX_MARKET_PLACE'),
//'indexQuoteInd'=>$dom->getValueFromTag('INDEX_QUOTE_INDICATOR'),
'source'=>'E',
);
if ($language=='FR') {
$ret=$iDb->insert('articles', $tabInsert);
if (!$ret && $iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - ERREUR ". $iDb->getLastError() . EOL;
print_r($tabInsert);
die();
} elseif ($iDb->getLastErrorNum()<>1062) {
echo date ('Y/m/d - H:i:s')." - Communiqué n°$pressReleaseId enregistré avec succès.".EOL;
}
} else
echo date ('Y/m/d - H:i:s')." - Langue du communiqué non intégrée en base ($language).".EOL;
unset($dom);
}
echo date('Y/m/d - H:i:s') ." - FIN de l'intégration des flux Les Echos.".EOL;
die();
?>