358 lines
11 KiB
PHP
358 lines
11 KiB
PHP
<?
|
|
|
|
// Ajouter fonction de séparation de mots séparant + que les espaces (' ', ',', '-', '.')
|
|
function getPhonexDiff($siret, $tabDiff) {
|
|
$ntTot=0;
|
|
foreach ($tabDiff as $ktmp => $vtmp)
|
|
{
|
|
foreach ($vtmp as $sir => $nb);
|
|
if ($sir==$siret)
|
|
$ntTot=$nb;
|
|
}
|
|
return $ntTot;
|
|
}
|
|
|
|
function printResults($tab, $tabDiff=array()) {
|
|
echo '<table>';
|
|
echo "<tr><td><b>Fiabilité</b></td><td><b>Siret</b></td><td><b>Raison Sociale</b></td><td><b>Phonex</b></td><td><b>soundex</b></td></tr>";
|
|
|
|
foreach ($tab as $niv => $tabNiv) {
|
|
foreach ($tabNiv as $k => $siret) {
|
|
// $soundex=$tabSoundex[$siret];
|
|
// $phonex=$tabPhonex[$siret];
|
|
if ($niv>1 || ($niv==1 && count($tabNiv)<=100)) {
|
|
$q="SELECT RS, p1,p2,p3,p4,p5,p6,p7,p8,p9 , m0,m1,m2,m3,m4,m5,m6,m7,m8,m9 , s0,s1,s2,s3,s4,s5,s6,s7,s8,s9 FROM entrep WHERE siret='$siret';";
|
|
$res=mysql_query($q);
|
|
$ret=mysql_fetch_array($res);
|
|
$rs_=$ret[0];
|
|
if (count($tabDiff)>0) $phonex=getPhonexDiff($siret, $tabDiff);
|
|
echo "<tr><td>$niv</td><td>$siret</td><td>$rs_</td><td>$phonex</td><td>$soundex</td></tr>";
|
|
flush();
|
|
}
|
|
}
|
|
}
|
|
|
|
echo '</table><br>';
|
|
}
|
|
|
|
function compte_elements($tab) {
|
|
sort($tab);
|
|
$value_before=null;
|
|
$tabRet=array();
|
|
foreach ($tab as $key => $value) {
|
|
if ($value<>$value_before)
|
|
$tabRet[$value]=1;
|
|
else
|
|
$tabRet[$value]++;
|
|
$value_before=$value;
|
|
}
|
|
|
|
$tabNb=array();
|
|
$tabU=array_reverse(array_unique($tabRet));
|
|
foreach ($tabU as $k => $nb)
|
|
$tabNb[$nb]=array_keys($tabRet, $nb);
|
|
|
|
return $tabNb;
|
|
}
|
|
|
|
$c=mysql_pconnect('localhost', 'root', 'tvacfm56');
|
|
mysql_select_db('insee');
|
|
|
|
set_time_limit(0);
|
|
$rs=$_REQUEST['rs'];
|
|
$action=$_REQUEST['action'];
|
|
if ($action=="Reconstruire l\'index")
|
|
$action='Index';
|
|
?>
|
|
<form action="./index.php" method="post">
|
|
Raison sociale : <input type="text" name="rs" value="<?=$rs?>"><br/>
|
|
<input type="submit" name="action" value="Recherche">
|
|
<hr width="80%">
|
|
<input type="radio" name="idxType" value="sql"> SQL
|
|
<input type="radio" name="idxType" value="file"> Fichier
|
|
<input type="submit" name="action" value="Reconstruire l'index">
|
|
</form>
|
|
<hr width="95%">
|
|
|
|
<?
|
|
include './soundex2/class/php4/soundex2.cls.php';
|
|
function soundex2($str) {
|
|
$soundex2 = new soundex2;
|
|
$soundex2 -> build ($str);
|
|
return $soundex2 -> sString;
|
|
}
|
|
include './phonex.php';
|
|
|
|
$tabInutils=array('', 'la', 'le', 'du', 'les', 'aux', 'au', 'des', 'ma', 'mon', 'ton', 'tes', 'nos', 'vos', 'l', 'de',
|
|
'ta', 'a', 'et', 'on', 'en');
|
|
|
|
if ($action=='Index' && ($_REQUEST['idxType']=='sql'||$_REQUEST['idxType']=='file') )
|
|
{
|
|
echo '<b>'.date('Y/m/d - H:i:s').' - Début de la création de l\'index...</b><br/>';
|
|
flush();
|
|
$tab=file('./entrep.txt');
|
|
|
|
if ($_REQUEST['idxType']=='file')
|
|
$fp=fopen('./entrep_index.csv', 'w');
|
|
if ($_REQUEST['idxType']=='sql')
|
|
{
|
|
$q="DROP TABLE IF EXISTS entrep;";
|
|
mysql_query($q);
|
|
$q="CREATE TABLE entrep (
|
|
siret varchar(14) NOT NULL,
|
|
RS varchar(80) NOT NULL,
|
|
p0 bigint(20) NOT NULL,
|
|
p1 bigint(20) NOT NULL,
|
|
p2 bigint(20) NOT NULL,
|
|
p3 bigint(20) NOT NULL,
|
|
p4 bigint(20) NOT NULL,
|
|
p5 bigint(20) NOT NULL,
|
|
p6 bigint(20) NOT NULL,
|
|
p7 bigint(20) NOT NULL,
|
|
p8 bigint(20) NOT NULL,
|
|
p9 bigint(20) NOT NULL,
|
|
m0 char(24) NOT NULL,
|
|
m1 char(24) NOT NULL,
|
|
m2 char(24) NOT NULL,
|
|
m3 char(24) NOT NULL,
|
|
m4 char(24) NOT NULL,
|
|
m5 char(24) NOT NULL,
|
|
m6 char(24) NOT NULL,
|
|
m7 char(24) NOT NULL,
|
|
m8 char(24) NOT NULL,
|
|
m9 char(24) NOT NULL,
|
|
s0 char(4) NOT NULL,
|
|
s1 char(4) NOT NULL,
|
|
s2 char(4) NOT NULL,
|
|
s3 char(4) NOT NULL,
|
|
s4 char(4) NOT NULL,
|
|
s5 char(4) NOT NULL,
|
|
s6 char(4) NOT NULL,
|
|
s7 char(4) NOT NULL,
|
|
s8 char(4) NOT NULL,
|
|
s9 char(4) NOT NULL
|
|
) ENGINE=MyISAM;";
|
|
mysql_query($q);
|
|
}
|
|
|
|
$maxMot=0;$maxMotSiret='';
|
|
foreach ($tab as $key => $value) {
|
|
$tabSoundex=$tabPhonex=$tabMot=$tabS=$tabP=$tabM=array();
|
|
$siret=substr($value, 0, 14);
|
|
$nom=trim(substr($value, 14, strlen($value)-14));
|
|
$tabRS[$siret]=explode(' ', $nom);
|
|
if (count($tabRS[$siret])>$maxMot){
|
|
$maxMot=count($tabRS[$siret]);
|
|
$maxMotSiret=$siret;
|
|
}
|
|
$i=0;
|
|
foreach($tabRS[$siret] as $k => $mot) {
|
|
if (!in_array(trimAccent($mot), $tabInutils))
|
|
{
|
|
$tabSoundex[$i]=soundex2($mot);
|
|
$tabPhonex[$i]=phonex($mot);
|
|
$tabMot[$i]=$mot;
|
|
$i++;
|
|
}
|
|
}
|
|
//echo '<br>';
|
|
$tabS=$tabP=array();
|
|
for ($i=0; $i<10; $i++) {
|
|
if ($tabSoundex[$i]<>'')
|
|
$tabS[$i]=$tabSoundex[$i];
|
|
else
|
|
$tabS[$i]='';
|
|
if ($tabPhonex[$i]<>'')
|
|
$tabP[$i]=$tabPhonex[$i];
|
|
else
|
|
$tabP[$i]=0;
|
|
if ($tabMot[$i]<>'')
|
|
$tabM[$i]=$tabMot[$i];
|
|
else
|
|
$tabM[$i]='';
|
|
}
|
|
if ($_REQUEST['idxType']=='file')
|
|
fwrite($fp, ''. $siret .';'. $nom .';'. implode(',', $tabP) .';'. implode(',', $tabM) .';'. implode(',', $tabS) ."\r\n");
|
|
if ($_REQUEST['idxType']=='sql')
|
|
{
|
|
$q="INSERT INTO entrep VALUES ('$siret', '".addslashes($nom)."', '". implode("','", $tabP) ."','". implode("','", $tabM) ."','". implode("','", $tabS) ."');";
|
|
mysql_query($q);
|
|
}
|
|
}
|
|
if ($_REQUEST['idxType']=='sql')
|
|
{
|
|
$q="ALTER TABLE entrep ADD INDEX (s0),
|
|
ADD INDEX (s1),
|
|
ADD INDEX (s2),
|
|
ADD INDEX (s3),
|
|
ADD INDEX (s4),
|
|
ADD INDEX (s5),
|
|
ADD INDEX (s6),
|
|
ADD INDEX (s7),
|
|
ADD INDEX (s8),
|
|
ADD INDEX (s9),
|
|
ADD INDEX (m0),
|
|
ADD INDEX (m1),
|
|
ADD INDEX (m2),
|
|
ADD INDEX (m3),
|
|
ADD INDEX (m4),
|
|
ADD INDEX (m5),
|
|
ADD INDEX (m6),
|
|
ADD INDEX (m7),
|
|
ADD INDEX (m8),
|
|
ADD INDEX (m9);";
|
|
mysql_query($q);
|
|
}
|
|
if ($_REQUEST['idxType']=='file')
|
|
fclose($fp);
|
|
echo '<b>'.date('Y/m/d - H:i:s').' - Fin de la création de l\'index...</b><br/>';
|
|
echo "Nb MAX de mots dans Raison Sociale=$maxMot (SIRET=$maxMotSiret)<br/>";
|
|
flush();
|
|
die();
|
|
}
|
|
|
|
if ($action=='Recherche' && $rs!='')
|
|
{
|
|
/*echo '<b>'.date('Y/m/d - H:i:s').' - Début du chargement des Entrep de test...</b><br/>';
|
|
$tab=file('./entrep_index.csv');
|
|
foreach ($tab as $key => $value) {
|
|
$line=explode(';', $value);
|
|
$siret=$line[0];
|
|
$tabRS[$siret]=$line[1];
|
|
$tmp=explode(',', $line[2]);
|
|
foreach ($tmp as $k => $v)
|
|
$tabPhonex[$k][$siret]=$v;
|
|
$tmp=explode(',', $line[3]);
|
|
foreach ($tmp as $k => $v)
|
|
$tabSoundex[$k][$siret]=$v;
|
|
}
|
|
echo '<b>'.date('Y/m/d - H:i:s').' - Fin du chargement des Entrep de test !</b><br/>';
|
|
*/
|
|
/**
|
|
** Recherche de la raison sociale
|
|
**/
|
|
|
|
/** 1. Equivalence parfaite sur NOMEN, NOMET, SIGLE **/
|
|
$q="SELECT siret, RS FROM entrep WHERE RS='".addslashes($rs)."';";
|
|
$res=mysql_query($q);
|
|
$siren_trouve1=array();
|
|
while($line=mysql_fetch_array($res))
|
|
$siren_trouve1[]=$line;
|
|
|
|
echo '<b>'.date('Y/m/d - H:i:s').' - Recherche de l\'équivalence parfaite passé...</b><br/>';
|
|
|
|
/** 2. Ressemblance des mots **/
|
|
|
|
// 2.1. Eclatement de la raison sociale en mots acceptables
|
|
$tmp=explode(' ', $rs);
|
|
|
|
foreach($tmp as $k => $mot) {
|
|
if (!in_array(trimAccent($mot), $tabInutils))
|
|
{
|
|
$tabMot[]=$mot;
|
|
$tabS[]=soundex2($mot);
|
|
$tabP[]=phonex($mot);
|
|
}
|
|
}
|
|
|
|
// Pour chaque mot valable, je recherche
|
|
$soundexTot=array();
|
|
$cptDiff=0;
|
|
foreach($tabMot as $k => $mot) {
|
|
// echo "Mot '$mot'<br>";
|
|
if (true) {
|
|
for ($i=0; $i<10; $i++) {
|
|
// s'il est TROUVE dans une des 10 colonnes
|
|
$q="SELECT siret FROM entrep WHERE m$i='".$tabMot[$k]."';";
|
|
$res=mysql_query($q);
|
|
$tabTmp=array();
|
|
while($line=mysql_fetch_array($res))
|
|
$tabTmp[]=$line['siret'];
|
|
$motT[$i]=$tabTmp;
|
|
//$soundexT[$i]=array_keys($tabSoundex[$i], $tabS[$k]);
|
|
$comptage[$i]=count($motT[$i]);
|
|
//echo "Mot ".$tabMot[$k]." : Colonne $i, j'ai trouvé ".$comptage[$i]." correspondance PARFAITE<br/>";
|
|
$motTot=array_merge($motTot, array_values($motT[$i]));
|
|
}
|
|
|
|
for ($i=0; $i<10; $i++) {
|
|
// s'il est SOUNDEXé dans une des 10 colonnes
|
|
$q="SELECT siret FROM entrep WHERE s$i='".$tabS[$k]."';";
|
|
$res=mysql_query($q);
|
|
$tabTmp=array();
|
|
while($line=mysql_fetch_array($res))
|
|
$tabTmp[]=$line['siret'];
|
|
$soundexT[$i]=$tabTmp;
|
|
//$soundexT[$i]=array_keys($tabSoundex[$i], $tabS[$k]);
|
|
$comptage[$i]=count($soundexT[$i]);
|
|
//echo "Mot ".$tabMot[$k]." : Colonne $i, j'ai trouvé ".$comptage[$i]." correspondance de SOUNDEX<br/>";
|
|
$soundexTot=array_merge($soundexTot, array_values($soundexT[$i]));
|
|
}
|
|
|
|
for ($i=0; $i<10; $i++) {
|
|
// s'il est Phonexé dans une des 10 colonnes
|
|
$phonexMin=$tabP[$k]*0.9999;
|
|
$phonex=$tabP[$k];
|
|
$phonexMax=$tabP[$k]*1.0001;
|
|
$q="SELECT siret, p$i FROM entrep WHERE p$i>$phonexMin AND p$i<$phonexMax;";
|
|
$res=mysql_query($q);
|
|
$tabTmp=array();
|
|
while($line=mysql_fetch_array($res))
|
|
{
|
|
$tabTmp[]=$line['siret'];
|
|
$tabDiff[$cptDiff][$line['siret']]=abs($line["p$i"]-$phonex);
|
|
$cptDiff++;
|
|
}
|
|
$phonexT[$i]=$tabTmp;
|
|
//$soundexT[$i]=array_keys($tabSoundex[$i], $tabS[$k]);
|
|
$comptage[$i]=count($phonexT[$i]);
|
|
//echo "Mot ".$tabMot[$k]." : Colonne $i, j'ai trouvé ".$comptage[$i]." correspondance de PHONEX<br/>";
|
|
$phonexTot=array_merge($phonexTot, array_values($phonexT[$i]));
|
|
}
|
|
|
|
} else {
|
|
$q="SELECT siret FROM entrep WHERE s0='".$tabS[$k]."' OR s1='".$tabS[$k]."' OR
|
|
s2='".$tabS[$k]."' OR s3='".$tabS[$k]."' OR s4='".$tabS[$k]."' OR s5='".$tabS[$k]."' OR
|
|
s6='".$tabS[$k]."' OR s7='".$tabS[$k]."' OR s8='".$tabS[$k]."' OR s9='".$tabS[$k]."';";
|
|
$res=mysql_query($q);
|
|
while($line=mysql_fetch_array($res))
|
|
$soundexTot[]=$line['siret'];
|
|
}
|
|
}
|
|
//$siren_trouve1
|
|
$tabSoundex=compte_elements($soundexTot);
|
|
$tabPhonex=compte_elements($phonexTot);
|
|
/* print_r($tabDiff);
|
|
foreach ($tabDiff as $ktmp => $vtmp)
|
|
{
|
|
foreach ($vtmp as $siren => $nb);
|
|
|
|
0=>SIREN:NB
|
|
}*/
|
|
$tabMot=compte_elements($motTot);
|
|
echo '<b>'.date('Y/m/d - H:i:s').' - Recherche des équivalence approx. passé...</b><br/>';
|
|
|
|
/**
|
|
** Résultats
|
|
**/
|
|
echo "Raison Sociale demandée : $rs<br/>";
|
|
if (count($siren_trouve1)>0) {
|
|
echo "<p><b>Liste des équivalence parfaite trouvée</b></p><br/>";
|
|
printResults($tabSiren);
|
|
} else
|
|
echo "Equivalence parfaite non trouvée !<br/>";
|
|
|
|
|
|
echo '<p><b>Liste des équivalences de mots :</b></p><br/>';
|
|
printResults($tabMot);
|
|
|
|
echo '<p><b>Liste des équivalences de SOUNDEX :</b></p><br/>';
|
|
printResults($tabSoundex);
|
|
|
|
echo '<p><b>Liste des équivalences de PHONEX :</b></p><br/>';
|
|
printResults($tabPhonex, $tabDiff);
|
|
|
|
}
|
|
|
|
?>
|