log($url, 'url');
$page = getUrl($url, '', '', '', false, '', '',15);
//Fichier non disponible
if($page['code']==408 || $page['code']==400){
$output = false;
//Ecriture du fichier sur le serveur en local
}else{
$body = $page['body'];
$output = $body;
}
$firephp->log($output, 'content');
$output = utf8_encode($output);
$output = bdf_removeblock($output);
$output = removeTag('html', $output, true, false);
$output = removeTag('head', $output, true, false);
$output = removeTag('body', $output, true, false);
$output = removeTag('meta', $output, false, false);
$output = removeTag('title', $output, true, true);
$output = changeLinkToLowerCase($output);
$output = bdf_lien_siren($output);
return $output;
}
function bdf_removeblock($content)
{
$output = $content;
/*
* Image shim.gif
* Module 27, 28, 29, 30
* Module 38
*/
$output = str_replace('','',$output);
$output = str_replace('', '', $output);
/*
* Image mise en suivi
* Module 27
* Module 28
* Module 51
*/
$output = str_replace('', '', $output);
$output = str_replace('', '', $output);
$output = str_replace('', '', $output);
/*
* Imprimer
* Module 27
* Module 28
* Module 51
*
*/
$output = str_replace('', '', $output);
$output = str_replace('', '', $output);
$output = str_replace('', '', $output);
return $output;
}
function removeTag($balise, $content, $endOfTag = true, $removeContent = true)
{
if( $endOfTag )
{
if( $removeContent)
$output = preg_replace(
'@<'.$balise.'[^>]*?>.*?'.$balise.'>@si',
'',
$content
);
else
$output = preg_replace(
array('@<'.$balise.'[^>]*?>@', '@'.$balise.'>@'),
'',
$content
);
}
else
{
$output = preg_replace(
'@<'.$balise.'[^>]*?>@',
'',
$content
);
}
return $output;
}
function changeLinkToLowerCase($content)
{
global $firephp;
//$pattern = "/\< *[img][^\>]*[href] *= *[\"\']{0,1}([^\"\'\ >]*)/i";
$pattern = '/(\].*?>)/ie';
$replace = "strtolower('\\1')";
$content = preg_replace($pattern,$replace,$content);
$pattern = '/(\