From 02ac466cc2d2f026acde12bcbf8749f98a34fc79 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 18 Apr 2014 07:59:04 +0000 Subject: [PATCH 01/40] Create the branch 1.3 From d9249f0a419579f68eda03d79eaf8ef7099b4596 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 18 Apr 2014 12:07:04 +0000 Subject: [PATCH 02/40] =?UTF-8?q?issue=20#0001932=20:=20Script=20pour=20la?= =?UTF-8?q?=20gestion=20de=20l'indexation=20apr=C3=A8s=20la=20consolidatio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- indexer/indexer-ciblage.sh | 40 +++++++++++++++++++++++++++++ indexer/sql/consolidate-ciblage.sql | 6 +++++ 2 files changed, 46 insertions(+) create mode 100644 indexer/indexer-ciblage.sh create mode 100644 indexer/sql/consolidate-ciblage.sql diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh new file mode 100644 index 0000000..52c7e0a --- /dev/null +++ b/indexer/indexer-ciblage.sh @@ -0,0 +1,40 @@ +#!/bin/bash +PATH_BIN=/usr/local/sphinx/bin +PATH_LOG=/dbs/sphinxlog +PATH_SQL=/home/scripts/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + +# Is consolidated ? +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ciblage.sql) +idx=''; +for line in "$output"; do + idx="$line"; +done +# Suppression fin de ligne +idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") + +# Lancement de l'indexation si la consolidation a eu lieu +if [ -n "$idx" ]; then + if [[ "$idx" > 0 ]]; then + + # Enregistrement Debut Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "INSERT INTO sphinx_idx (indexingBegin) VALUES (NOW()) WHERE id=$idx" + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ciblage >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Enregistrement Fin Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "INSERT INTO sphinx_idx (indexingEnd) VALUES (NOW()) WHERE id=$idx" + + fi +fi + +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + + diff --git a/indexer/sql/consolidate-ciblage.sql b/indexer/sql/consolidate-ciblage.sql new file mode 100644 index 0000000..b52b843 --- /dev/null +++ b/indexer/sql/consolidate-ciblage.sql @@ -0,0 +1,6 @@ +SELECT id FROM sphinx_idx +WHERE createEnd BETWEEN (NOW() - INTERVAL 1 HOUR) AND NOW() +AND nom = 'jo.etablissements_act' +AND indexingBegin IS NULL +AND indexingEnd IS NULL +ORDER BY createEnd DESC LIMIT 1; \ No newline at end of file From 589da22cdac6050e28f8c7ec766c66275bd3860c Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 18 Apr 2014 13:08:34 +0000 Subject: [PATCH 03/40] issue #0001932 : Make an sql update --- indexer/indexer-ciblage.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh index 52c7e0a..f542839 100644 --- a/indexer/indexer-ciblage.sh +++ b/indexer/indexer-ciblage.sh @@ -22,7 +22,7 @@ if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then # Enregistrement Debut Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "INSERT INTO sphinx_idx (indexingBegin) VALUES (NOW()) WHERE id=$idx" + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log # Sphinx rotate echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log @@ -30,7 +30,7 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log # Enregistrement Fin Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "INSERT INTO sphinx_idx (indexingEnd) VALUES (NOW()) WHERE id=$idx" + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log fi fi From 04f472bc869f52515ce4d2621de19ba1708d60d3 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 18 Apr 2014 13:27:44 +0000 Subject: [PATCH 04/40] issue #0001933 : Prepare file name and update --- indexer/{indexerAct.sh => indexer-act.sh} | 0 indexer/indexer-dir.sh | 48 ++++++++++++++++++ indexer/indexer-etab.sh | 49 +++++++++++++++++++ indexer/indexerDiri.sh | 39 --------------- indexer/indexerEtab.sh | 43 ---------------- indexer/sql/count-dir.sql | 1 + indexer/sql/count-dirtmp.sql | 1 + indexer/sql/{countEtab.sql => count-ent.sql} | 0 .../{countEtab_tmp.sql => count-enttmp.sql} | 0 indexer/sql/countDiri.sql | 1 - indexer/sql/countDiri_tmp.sql | 1 - .../{finIndexationDiri.sql => rotate-dir.sql} | 0 .../{finIndexationEtab.sql => rotate-ent.sql} | 0 13 files changed, 99 insertions(+), 84 deletions(-) rename indexer/{indexerAct.sh => indexer-act.sh} (100%) create mode 100755 indexer/indexer-dir.sh create mode 100755 indexer/indexer-etab.sh delete mode 100755 indexer/indexerDiri.sh delete mode 100755 indexer/indexerEtab.sh create mode 100644 indexer/sql/count-dir.sql create mode 100644 indexer/sql/count-dirtmp.sql rename indexer/sql/{countEtab.sql => count-ent.sql} (100%) rename indexer/sql/{countEtab_tmp.sql => count-enttmp.sql} (100%) delete mode 100644 indexer/sql/countDiri.sql delete mode 100644 indexer/sql/countDiri_tmp.sql rename indexer/sql/{finIndexationDiri.sql => rotate-dir.sql} (100%) rename indexer/sql/{finIndexationEtab.sql => rotate-ent.sql} (100%) diff --git a/indexer/indexerAct.sh b/indexer/indexer-act.sh similarity index 100% rename from indexer/indexerAct.sh rename to indexer/indexer-act.sh diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh new file mode 100755 index 0000000..8a6055a --- /dev/null +++ b/indexer/indexer-dir.sh @@ -0,0 +1,48 @@ +#!/bin/bash +PATH_BIN=/usr/local/sphinx/bin +PATH_LOG=/dbs/sphinxlog +PATH_SQL=/home/scripts/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log + +# Nombre de lignes dans la table actuelle +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-dir.sql) +for line in "$output"; do + nbC="$line" +done +# Suppression fin de ligne +nbC=$(echo $nbC|sed -e "s/^[nbC ]*//g"||sed -e "s/[ ]*$//g") + +# Nombre de lignes dans la table nouvelle +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-dirtmp.sql) +for line in "$output"; do + nbT="$line" +done +# Suppression fin de ligne +nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") + +# Comparaison du nombre de lignes entre nbC (ancien) et nbT (nouveau) +if [ -z "$nbC" ]; then + echo "etab = null" +elif [ -z "$nbT" ]; then + echo "etab_tmp = null" +elif [ $nbT -gt $nbC ]; then + + echo "Il y a $nbT lignes dans la nouvelle table dirigeants ($nbC lignes dans l'ancienne)" >> $PATH_LOG/indexer.log + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Rotation des tables MySQL + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log + +else + # Il y a plus de lignes dans l'ancienne table dirigeants ==> On ne fait rien + echo "Il n'y a pas assez de lignes dans la nouvelle table dirigeants ($nbT / $nbC) !" >> $PATH_LOG/indexer.log +fi + +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-etab.sh b/indexer/indexer-etab.sh new file mode 100755 index 0000000..0bf0d98 --- /dev/null +++ b/indexer/indexer-etab.sh @@ -0,0 +1,49 @@ +#!/bin/bash +PATH_BIN=/usr/local/sphinx/bin +PATH_LOG=/dbs/sphinxlog +PATH_SQL=/home/scripts/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log + +# Nombre de lignes dans la table actuelle +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-ent.sql) +for line in "$output"; do + nbC="$line" +done +# Suppression fin de ligne +nbC=$(echo $nbC|sed -e "s/^[nbC ]*//g"||sed -e "s/[ ]*$//g") + +# Nombre de lignes dans la table nouvelle +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-enttmp.sql) +for line in "$output"; do + nbT="$line" +done +# Suppression fin de ligne +nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") + +# Comparaison du nombre de lignes entre etab (ancien) et etab_tmp (nouveau) +if [ -z "$nbC" ]; then + echo "nbC = null" +elif [ -z "$nbT" ]; then + echo "nbT = null" +elif [ $nbT -gt $nbC ]; then + + echo "Il y a $nbT lignes dans la nouvelle table etablissement ($nbC lignes dans l'ancienne)" >> $PATH_LOG/indexer.log + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Rotation des tables MySQL + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ent.sql >> $PATH_LOG/indexer.log + +else + # Il y a plus de lignes dans l'ancienne table etablissements ==> On ne fait rien + echo "Il n'y a pas assez de lignes dans la nouvelle table etablissement ($nbT / $nbC) !" >> $PATH_LOG/indexer.log +fi + +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log diff --git a/indexer/indexerDiri.sh b/indexer/indexerDiri.sh deleted file mode 100755 index 247bf80..0000000 --- a/indexer/indexerDiri.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/sphinxlog -PATH_SQL=/home/scripts/indexer/sql - -echo "$(date '+%Y-%m-%d %H:%M:%S') === DEBUT de l'indexation" >> $PATH_LOG/indexer.log - -# nombre de lignes dans la table etab -output=$(mysql -h192.168.3.30 -usphinx -pindexer jo < $PATH_SQL/countDiri.sql) -for line in "$output"; do - etab="$line" -done -# Suppression fin de ligne -etab=$(echo $etab|sed -e "s/^[etab ]*//g"||sed -e "s/[ ]*$//g") - -# nombre de lignes dans la table diri_tmp -output=$(mysql -h192.168.3.30 -usphinx -pindexer jo < $PATH_SQL/countDiri_tmp.sql) -for line in "$output"; do - etab_tmp="$line" -done -# Suppression fin de ligne -etab_tmp=$(echo $etab_tmp|sed -e "s/^[etab_tmp ]*//g"||sed -e "s/[ ]*$//g") - -# Comparaison du nombre de lignes entre etab (ancien) et etab_tmp (nouveau) -if [ -z "$etab" ]; then - echo "etab = null" -elif [ -z "$etab_tmp" ]; then - echo "etab_tmp = null" -elif [ $etab_tmp -gt $etab ]; then - echo "Il y a $etab_tmp lignes dans la nouvelle table dirigeants ($etab lignes dans l'ancienne)" >> $PATH_LOG/indexer.log - $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log - echo "Sphinx - termine : $(date +%H:%M:%S)" >> $PATH_LOG/indexer.log - mysql -f -usphinx -h192.168.3.30 -pindexer jo < $PATH_SQL/finIndexationDiri.sql >> $PATH_LOG/indexer.log -else - # Il y a plus de lignes dans l'ancienne table dirigeants ==> On ne fait rien - echo "Il n'y a pas assez de lignes dans la nouvelle table dirigeants ($etab_tmp / $etab) !" >> $PATH_LOG/indexer.log -fi - -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN" >> $PATH_LOG/indexer.log diff --git a/indexer/indexerEtab.sh b/indexer/indexerEtab.sh deleted file mode 100755 index cdeb932..0000000 --- a/indexer/indexerEtab.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/sphinxlog -PATH_SQL=/home/scripts/indexer/sql - -echo "$(date '+%Y-%m-%d %H:%M:%S') === DEBUT de l'indexation" >> $PATH_LOG/indexer.log - -# nombre de lignes dans la table etab -output=$(mysql -h192.168.3.30 -usphinx -pindexer jo < $PATH_SQL/countEtab.sql) -for line in "$output"; do - etab="$line" -done -# Suppression fin de ligne -etab=$(echo $etab|sed -e "s/^[etab ]*//g"||sed -e "s/[ ]*$//g") -# Cast de la variable au cas ou -#etab=$(echo $etab| gawk '{print + 0}') - -# nombre de lignes dans la table etab_tmp -output=$(mysql -h192.168.3.30 -usphinx -pindexer jo < $PATH_SQL/countEtab_tmp.sql) -for line in "$output"; do - etab_tmp="$line" -done -# Suppression fin de ligne -etab_tmp=$(echo $etab_tmp|sed -e "s/^[etab_tmp ]*//g"||sed -e "s/[ ]*$//g") -# Cast de la variable au cas ou -#etab_tmp=$(echo $etab_tmp| gawk '{print + 0}') - -# Comparaison du nombre de lignes entre etab (ancien) et etab_tmp (nouveau) -if [ -z "$etab" ]; then - echo "etab = null" -elif [ -z "$etab_tmp" ]; then - echo "etab_tmp = null" -elif [ $etab_tmp -gt $etab ]; then - echo "Il y a $etab_tmp lignes dans la nouvelle table etablissement ($etab lignes dans l'ancienne)" >> $PATH_LOG/indexer.log - $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log - echo "Sphinx - terminé : $(date +%H:%M:%S)" >> $PATH_LOG/indexer.log - mysql -f -usphinx -h192.168.3.30 -pindexer jo < $PATH_SQL/finIndexationEtab.sql >> $PATH_LOG/indexer.log -else - # Il y a plus de lignes dans l'ancienne table etablissements ==> On ne fait rien - echo "Il n'y a pas assez de lignes dans la nouvelle table etablissement ($etab_tmp / $etab) !" >> $PATH_LOG/indexer.log -fi - -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN" >> $PATH_LOG/indexer.log diff --git a/indexer/sql/count-dir.sql b/indexer/sql/count-dir.sql new file mode 100644 index 0000000..63da2a0 --- /dev/null +++ b/indexer/sql/count-dir.sql @@ -0,0 +1 @@ +SELECT count(*) AS nb from dirigeants; \ No newline at end of file diff --git a/indexer/sql/count-dirtmp.sql b/indexer/sql/count-dirtmp.sql new file mode 100644 index 0000000..a1ae09b --- /dev/null +++ b/indexer/sql/count-dirtmp.sql @@ -0,0 +1 @@ +SELECT count(*) AS nb from dirigeants_tmp; diff --git a/indexer/sql/countEtab.sql b/indexer/sql/count-ent.sql similarity index 100% rename from indexer/sql/countEtab.sql rename to indexer/sql/count-ent.sql diff --git a/indexer/sql/countEtab_tmp.sql b/indexer/sql/count-enttmp.sql similarity index 100% rename from indexer/sql/countEtab_tmp.sql rename to indexer/sql/count-enttmp.sql diff --git a/indexer/sql/countDiri.sql b/indexer/sql/countDiri.sql deleted file mode 100644 index 7605dc6..0000000 --- a/indexer/sql/countDiri.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT count(*) AS etab from dirigeants; \ No newline at end of file diff --git a/indexer/sql/countDiri_tmp.sql b/indexer/sql/countDiri_tmp.sql deleted file mode 100644 index 0a27146..0000000 --- a/indexer/sql/countDiri_tmp.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT count(*) AS etab_tmp from dirigeants_tmp; diff --git a/indexer/sql/finIndexationDiri.sql b/indexer/sql/rotate-dir.sql similarity index 100% rename from indexer/sql/finIndexationDiri.sql rename to indexer/sql/rotate-dir.sql diff --git a/indexer/sql/finIndexationEtab.sql b/indexer/sql/rotate-ent.sql similarity index 100% rename from indexer/sql/finIndexationEtab.sql rename to indexer/sql/rotate-ent.sql From a666ab3720537b280b79ca7a652a3d0ad94e98c1 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 18 Apr 2014 13:34:59 +0000 Subject: [PATCH 05/40] issue #0001933 : Prepare --- indexer/indexer-act.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/indexer/indexer-act.sh b/indexer/indexer-act.sh index 95fa4c4..8495708 100644 --- a/indexer/indexer-act.sh +++ b/indexer/indexer-act.sh @@ -2,6 +2,6 @@ PATH_BIN=/usr/local/sphinx/bin PATH_LOG=/dbs/sphinxlog -echo "$(date '+%Y-%m-%d %H:%M:%S') === DEBUT de l'indexation" >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ACT" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --rotate act >> $PATH_LOG/indexer.log -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN" >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ACT" >> $PATH_LOG/indexer.log From 418318bd0188d29f02c2d3e5fd0e90019ba05372 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 18 Apr 2014 15:47:05 +0000 Subject: [PATCH 06/40] issue #0001933 : Use table to know when we start indexing --- indexer/indexer-ciblage.sh | 4 +-- indexer/indexer-dir.sh | 43 +++++++++++++---------------- indexer/indexer-ent.sh | 43 +++++++++++++++++++++++++++++ indexer/indexer-etab.sh | 49 --------------------------------- indexer/sql/consolidate-dir.sql | 6 ++++ indexer/sql/consolidate-ent.sql | 6 ++++ 6 files changed, 76 insertions(+), 75 deletions(-) create mode 100755 indexer/indexer-ent.sh delete mode 100755 indexer/indexer-etab.sh create mode 100644 indexer/sql/consolidate-dir.sql create mode 100644 indexer/sql/consolidate-ent.sql diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh index f542839..ec14b18 100644 --- a/indexer/indexer-ciblage.sh +++ b/indexer/indexer-ciblage.sh @@ -23,7 +23,7 @@ if [ -n "$idx" ]; then # Enregistrement Debut Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log - + # Sphinx rotate echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ciblage >> $PATH_LOG/indexer.log @@ -31,7 +31,7 @@ if [ -n "$idx" ]; then # Enregistrement Fin Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log - + fi fi diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 8a6055a..26a2ef8 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -8,41 +8,36 @@ MYSQL_PASS=indexer echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log -# Nombre de lignes dans la table actuelle -output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-dir.sql) +# Is consolidated ? +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-dir.sql) +idx=''; for line in "$output"; do - nbC="$line" + idx="$line"; done # Suppression fin de ligne -nbC=$(echo $nbC|sed -e "s/^[nbC ]*//g"||sed -e "s/[ ]*$//g") +idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") -# Nombre de lignes dans la table nouvelle -output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-dirtmp.sql) -for line in "$output"; do - nbT="$line" -done -# Suppression fin de ligne -nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") - -# Comparaison du nombre de lignes entre nbC (ancien) et nbT (nouveau) -if [ -z "$nbC" ]; then - echo "etab = null" -elif [ -z "$nbT" ]; then - echo "etab_tmp = null" -elif [ $nbT -gt $nbC ]; then +# Lancement de l'indexation si la consolidation a eu lieu +if [ -n "$idx" ]; then + if [[ "$idx" > 0 ]]; then - echo "Il y a $nbT lignes dans la nouvelle table dirigeants ($nbC lignes dans l'ancienne)" >> $PATH_LOG/indexer.log + # Enregistrement Debut Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + # Sphinx rotate echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log - # Rotation des tables MySQL - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log + # Enregistrement Fin Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log -else - # Il y a plus de lignes dans l'ancienne table dirigeants ==> On ne fait rien - echo "Il n'y a pas assez de lignes dans la nouvelle table dirigeants ($nbT / $nbC) !" >> $PATH_LOG/indexer.log + # Rotation des tables MySQL + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log + + fi fi echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh new file mode 100755 index 0000000..f377be5 --- /dev/null +++ b/indexer/indexer-ent.sh @@ -0,0 +1,43 @@ +#!/bin/bash +PATH_BIN=/usr/local/sphinx/bin +PATH_LOG=/dbs/sphinxlog +PATH_SQL=/home/scripts/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log + +# Is consolidated ? +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ent.sql) +idx=''; +for line in "$output"; do + idx="$line"; +done +# Suppression fin de ligne +idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") + +# Lancement de l'indexation si la consolidation a eu lieu +if [ -n "$idx" ]; then + if [[ "$idx" > 0 ]]; then + + # Enregistrement Debut Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Enregistrement Fin Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + # Rotation des tables MySQL + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ent.sql >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log + + fi +fi + +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-etab.sh b/indexer/indexer-etab.sh deleted file mode 100755 index 0bf0d98..0000000 --- a/indexer/indexer-etab.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/sphinxlog -PATH_SQL=/home/scripts/indexer/sql -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer - -echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log - -# Nombre de lignes dans la table actuelle -output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-ent.sql) -for line in "$output"; do - nbC="$line" -done -# Suppression fin de ligne -nbC=$(echo $nbC|sed -e "s/^[nbC ]*//g"||sed -e "s/[ ]*$//g") - -# Nombre de lignes dans la table nouvelle -output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-enttmp.sql) -for line in "$output"; do - nbT="$line" -done -# Suppression fin de ligne -nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") - -# Comparaison du nombre de lignes entre etab (ancien) et etab_tmp (nouveau) -if [ -z "$nbC" ]; then - echo "nbC = null" -elif [ -z "$nbT" ]; then - echo "nbT = null" -elif [ $nbT -gt $nbC ]; then - - echo "Il y a $nbT lignes dans la nouvelle table etablissement ($nbC lignes dans l'ancienne)" >> $PATH_LOG/indexer.log - - # Sphinx rotate - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log - $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log - - # Rotation des tables MySQL - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ent.sql >> $PATH_LOG/indexer.log - -else - # Il y a plus de lignes dans l'ancienne table etablissements ==> On ne fait rien - echo "Il n'y a pas assez de lignes dans la nouvelle table etablissement ($nbT / $nbC) !" >> $PATH_LOG/indexer.log -fi - -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log diff --git a/indexer/sql/consolidate-dir.sql b/indexer/sql/consolidate-dir.sql new file mode 100644 index 0000000..25ee3fb --- /dev/null +++ b/indexer/sql/consolidate-dir.sql @@ -0,0 +1,6 @@ +SELECT id FROM sphinx_idx +WHERE createEnd BETWEEN (NOW() - INTERVAL 1 HOUR) AND NOW() +AND nom = 'jo.dirigeants' +AND indexingBegin IS NULL +AND indexingEnd IS NULL +ORDER BY createEnd DESC LIMIT 1; \ No newline at end of file diff --git a/indexer/sql/consolidate-ent.sql b/indexer/sql/consolidate-ent.sql new file mode 100644 index 0000000..fe1ecd1 --- /dev/null +++ b/indexer/sql/consolidate-ent.sql @@ -0,0 +1,6 @@ +SELECT id FROM sphinx_idx +WHERE createEnd BETWEEN (NOW() - INTERVAL 1 HOUR) AND NOW() +AND nom = 'jo.etablissements' +AND indexingBegin IS NULL +AND indexingEnd IS NULL +ORDER BY createEnd DESC LIMIT 1; \ No newline at end of file From cb0229ae48d470c20d06a74d69a8a245d210059c Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Tue, 22 Apr 2014 08:38:41 +0000 Subject: [PATCH 07/40] Scripts is validated --- indexer/indexer-ciblage.sh | 8 ++++---- indexer/indexer-dir.sh | 10 +++++----- indexer/indexer-ent.sh | 8 ++++---- indexer/sql/consolidate-ciblage.sql | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh index ec14b18..d415ad1 100644 --- a/indexer/indexer-ciblage.sh +++ b/indexer/indexer-ciblage.sh @@ -6,8 +6,6 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log - # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ciblage.sql) idx=''; @@ -21,6 +19,8 @@ idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + # Enregistrement Debut Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log @@ -32,9 +32,9 @@ if [ -n "$idx" ]; then # Enregistrement Fin Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + fi fi -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log - diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 26a2ef8..8307c67 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -6,8 +6,6 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log - # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-dir.sql) idx=''; @@ -21,6 +19,8 @@ idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log + # Enregistrement Debut Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log @@ -36,8 +36,8 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log - + + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log + fi fi - -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index f377be5..3b3a428 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -6,8 +6,6 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log - # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ent.sql) idx=''; @@ -21,6 +19,8 @@ idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log + # Enregistrement Debut Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log @@ -37,7 +37,7 @@ if [ -n "$idx" ]; then mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ent.sql >> $PATH_LOG/indexer.log echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log + fi fi - -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log diff --git a/indexer/sql/consolidate-ciblage.sql b/indexer/sql/consolidate-ciblage.sql index b52b843..2e14254 100644 --- a/indexer/sql/consolidate-ciblage.sql +++ b/indexer/sql/consolidate-ciblage.sql @@ -1,5 +1,5 @@ SELECT id FROM sphinx_idx -WHERE createEnd BETWEEN (NOW() - INTERVAL 1 HOUR) AND NOW() +WHERE createEnd BETWEEN (NOW() - INTERVAL 2 HOUR) AND NOW() AND nom = 'jo.etablissements_act' AND indexingBegin IS NULL AND indexingEnd IS NULL From 8c95b1d569aac3dd42f994c5bf39ae7bd9d4f181 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Thu, 15 May 2014 10:17:02 +0000 Subject: [PATCH 08/40] =?UTF-8?q?issue=20#0001933,=20issue=20#0001932=20:?= =?UTF-8?q?=20Gestion=20du=20d=C3=A9part=20de=20l'indexation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- indexer/indexer-dir.sh | 53 +++++++++++++++++++++++++++++----------- indexer/indexer-ent.sh | 55 ++++++++++++++++++++++++++++++------------ 2 files changed, 79 insertions(+), 29 deletions(-) diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 8307c67..e8a5ed1 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -21,23 +21,48 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log - # Enregistrement Debut Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + # Nombre de lignes dans la table etablissements + output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-dir.sql) + nbC=''; + for line in "$output"; do + nbC="$line" + done + # Suppression fin de ligne + nbC=$(echo $nbC|sed -e "s/^[nbC ]*//g"||sed -e "s/[ ]*$//g") + nbC=$(echo $(($nbC - 1000))) - # Sphinx rotate - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log - $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + # Nombre de lignes dans la table etablissements_tmp + output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS < $PATH_SQL/count-dirtmp.sql) + nbT=''; + for line in "$output"; do + nbT="$line" + done + # Suppression fin de ligne + nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") - # Enregistrement Fin Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + if [ -z "$nbC" ]; then + elif [ -z "$nbT" ]; then + elif [ "$nbT" -gt "$nbC" ]; then - # Rotation des tables MySQL - echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log - - echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log + # Enregistrement Debut Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Rotation des tables MySQL + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log + + # Enregistrement Fin Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log + + fi fi fi diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index 3b3a428..1e45e44 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -21,23 +21,48 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log - # Enregistrement Debut Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + # Nombre de lignes dans la table etablissements + output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-ent.sql) + nbC=''; + for line in "$output"; do + nbC="$line" + done + # Suppression fin de ligne + nbC=$(echo $nbC|sed -e "s/^[nbC ]*//g"||sed -e "s/[ ]*$//g") + nbC=$(echo $(($nbC - 1000))) - # Sphinx rotate - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log - $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + # Nombre de lignes dans la table etablissements_tmp + output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS < $PATH_SQL/count-enttmp.sql) + nbT=''; + for line in "$output"; do + nbT="$line" + done + # Suppression fin de ligne + nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") - # Enregistrement Fin Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log - - # Rotation des tables MySQL - echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ent.sql >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log + if [ -z "$nbC" ]; then + elif [ -z "$nbT" ]; then + elif [ "$nbT" -gt "$nbC" ]; then + + # Enregistrement Debut Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Rotation des tables MySQL + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ent.sql >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log - + # Enregistrement Fin Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log + + fi + fi fi From e86f75adec9ac6db29bd155733d9fdf6fa18163d Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 13 Aug 2014 13:55:26 +0000 Subject: [PATCH 09/40] Reorganize file --- scripts/build/README | 90 +++++++++ scripts/build/config-dev/act.conf | 42 ++++ scripts/build/config-dev/ciblage.conf | 157 +++++++++++++++ scripts/build/config-dev/ciblage.txt | 110 +++++++++++ scripts/build/config-dev/dir.conf | 64 ++++++ scripts/build/config-dev/dirtmp.conf | 63 ++++++ scripts/build/config-dev/ent.conf | 68 +++++++ scripts/build/config-dev/enttmp.conf | 69 +++++++ scripts/build/config-dev/histo.conf | 24 +++ scripts/build/config-dev/sphinx.conf | 79 ++++++++ scripts/build/config-dev/stopwords-ent.txt | 0 scripts/build/config-dev/wordforms-ent.txt | 207 ++++++++++++++++++++ scripts/build/config-prod/act.conf | 42 ++++ scripts/build/config-prod/ciblage.conf | 157 +++++++++++++++ scripts/build/config-prod/dir.conf | 64 ++++++ scripts/build/config-prod/dirtmp.conf | 63 ++++++ scripts/build/config-prod/ent.conf | 73 +++++++ scripts/build/config-prod/enttmp.conf | 73 +++++++ scripts/build/config-prod/sphinx.conf | 79 ++++++++ scripts/build/config-prod/wordforms-ent.txt | 207 ++++++++++++++++++++ 20 files changed, 1731 insertions(+) create mode 100644 scripts/build/README create mode 100644 scripts/build/config-dev/act.conf create mode 100644 scripts/build/config-dev/ciblage.conf create mode 100644 scripts/build/config-dev/ciblage.txt create mode 100644 scripts/build/config-dev/dir.conf create mode 100644 scripts/build/config-dev/dirtmp.conf create mode 100644 scripts/build/config-dev/ent.conf create mode 100644 scripts/build/config-dev/enttmp.conf create mode 100644 scripts/build/config-dev/histo.conf create mode 100644 scripts/build/config-dev/sphinx.conf create mode 100644 scripts/build/config-dev/stopwords-ent.txt create mode 100644 scripts/build/config-dev/wordforms-ent.txt create mode 100644 scripts/build/config-prod/act.conf create mode 100644 scripts/build/config-prod/ciblage.conf create mode 100644 scripts/build/config-prod/dir.conf create mode 100644 scripts/build/config-prod/dirtmp.conf create mode 100644 scripts/build/config-prod/ent.conf create mode 100644 scripts/build/config-prod/enttmp.conf create mode 100644 scripts/build/config-prod/sphinx.conf create mode 100644 scripts/build/config-prod/wordforms-ent.txt diff --git a/scripts/build/README b/scripts/build/README new file mode 100644 index 0000000..9ee4c35 --- /dev/null +++ b/scripts/build/README @@ -0,0 +1,90 @@ +La base de données doit communiqué en UTF-8 + +sql_query_pre = SET NAMES utf8 +charset_type = utf-8 + +# 'utf-8' defaults for English and Russian +charset_table = 0..9, A..Z->a..z, _, a..z, \ + U+410..U+42F->U+430..U+44F, U+430..U+44F + +# For french +charset_table = 0..9, A..Z->a..z, a..z, \ + U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, \ + U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, \ + U+0152->U+0153, U+0153 \ + +Morphology +========== +# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', +# 'soundex', and 'metaphone'; additional preprocessors available from +# libstemmer are 'libstemmer_XXX', where XXX is algorithm code +# (see libstemmer_c/libstemmer/modules.txt) + +morphology = libstemmer_french + +Taille minimum des mots ou l'on applique la morphology +min_stemming_len = 4 + + +According to libstimmer.c/libstimmer/modules.txt, the french module can be refered to by either + french, fr, fre, fra... french UTF_8,ISO_8859_1 french,fr,fre,fra + + +Utilisation des stopwords +========================= +un, une, le, la, les, de, des, du, dans, l', d', @ + + + + + +Utilisation des wordforms +========================= +Appliquer après les règles de charset_table +Stemming n'est pas appliquer à ces mots , ils deviennent des exceptions + +Les mots définis sont utilisé pour normalisé les mots durant l'indexation et la recherche, +il est donc nécessaire de réindexé et redémarrer la recherche pour appliquer les changements + +Divers +------ + +& > ET + +Les chiffres +------------ + +un > 1 +deux > 2 +trois > 3 +quatre > 4 +cinq > 5 +six > 6 +sept > 7 +huit > 8 +neuf > 9 +dix > 10 +onze > 11 +douze > 12 +treize > 13 +quatorze > 14 +quinze > 15 +seize > 16 +vingt > 20 +vingts > 20 +trente > 30 +quarante > 40 +cinquante > 50 +soixante > 60 +quatrevingt > 80 +cent > 100 +cents > 100 +mille > 1000 + +Les chiffres romain +------------------- + + +Les éléments de voies et abbréviations +-------------------------------------- +Voir le wordforms diff --git a/scripts/build/config-dev/act.conf b/scripts/build/config-dev/act.conf new file mode 100644 index 0000000..0ba5e99 --- /dev/null +++ b/scripts/build/config-dev/act.conf @@ -0,0 +1,42 @@ + +source act +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + + sql_query = \ + SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, p.libPays AS pays, l.PDetention \ + FROM liens2 l, liensRef r, tabPays p \ + WHERE l.dateSuppr = '0000-00-00 00:00:00' \ + AND r.id = l.idAct \ + AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ + AND p.codPays3 = r.adresse_pays; + + sql_attr_uint = idAct + sql_attr_uint = actif + sql_attr_float= PDetention + sql_attr_string = pays +} + +index act +{ + source = act + path = /dbs/sphinx/act + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} diff --git a/scripts/build/config-dev/ciblage.conf b/scripts/build/config-dev/ciblage.conf new file mode 100644 index 0000000..088bbd8 --- /dev/null +++ b/scripts/build/config-dev/ciblage.conf @@ -0,0 +1,157 @@ + +source ciblage +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, LPAD(siren, 9, '000000000') AS siren, LPAD(nic, 5, '00000') AS nic, \ + siege, \ + adr_cp, \ + REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + IF(tel>0,1,0) AS tel, \ + IF(fax>0,1,0) AS fax, \ + cj, \ + capital, \ + CONCAT('EX ', ape_etab) AS ape_etab, \ + CONCAT('EX ', ape_entrep) AS ape_entrep, \ + age_entrep, \ + age_etab, \ + tca, \ + tcaexp, \ + IF(teff_entrep IS NULL,99,teff_entrep) AS teff_entrep, \ + IF(teff_etab IS NULL,99,teff_etab) AS teff_etab, \ + IF(web='',0,1) AS web, \ + IF(mail='',0,1) AS mail, \ + IF(adrDom>0,1,0) AS adrDom, \ + actifEco, \ + presentRcs, \ + procolHisto, \ + tvaIntraValide, \ + dateCrea_etab,\ + dateCrea_ent, \ + dateImmat, \ + eff_entrep, \ + eff_etab, \ + IF(dirNom='',0,1) AS dirNom, \ + nbEtab, \ + IF(nbMPubli>0,1,0) AS nbMPubli, \ + IF(CAST(sirenGrp AS UNSIGNED)>100,1,0) AS sirenGrp, \ + nbActio, \ + IF(nbActio>0,1,0) AS actio, \ + nbPart, \ + IF(nbPart>0,1,0) AS part, \ + CASE bilType WHEN 'I' THEN 1 WHEN 'R' THEN 2 WHEN 'E' THEN 3 ELSE 0 END as bilType, \ + bilAnnee, \ + bilCloture, \ + bilDuree, \ + bilTca, \ + bilEE, \ + bilFL, \ + bilFK, \ + bilFR, \ + bilGF, \ + bilGP, \ + bilGU, \ + bilGW, \ + bilHD, \ + bilHH, \ + bilHL, \ + bilHM, \ + bilHN, \ + bilYP, \ + CAST(codeCommune AS UNSIGNED) AS codeCommune, \ + CASE zus WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zus, \ + CASE zfu WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zfu, \ + CASE zru WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as zru, \ + CASE cucs WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as cucs, \ + IF(zrr=1,1,0) as zrr, \ + IF(zafr=1,1,0) as zafr, \ + CASE avisCs WHEN 0 THEN 1 WHEN 10 THEN 2 WHEN 15 THEN 3 WHEN 23 THEN 4 WHEN 29 THEN 4 WHEN 39 THEN 4 WHEN 43 THEN 4 WHEN 21 THEN 5 WHEN 26 THEN 5 WHEN 28 THEN 5 WHEN 31 THEN 6 WHEN 50 THEN 6 WHEN 24 THEN 7 ELSE 0 END as avisCs \ + FROM etablissements_act WHERE siren>100; + + sql_field_string = siren + sql_field_string = nic + sql_attr_uint = siege + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = tel + sql_attr_uint = fax + sql_attr_uint = cj + sql_attr_uint = capital + sql_attr_uint = age_entrep + sql_attr_uint = age_etab + sql_attr_uint = tca + sql_attr_uint = tcaexp + sql_attr_uint = teff_entrep + sql_attr_uint = teff_etab + sql_attr_uint = web + sql_attr_uint = mail + sql_attr_uint = adrDom + sql_attr_uint = actifEco + sql_attr_uint = presentRcs + sql_attr_uint = procolHisto + sql_attr_uint = tvaIntraValide + sql_attr_uint = dateCrea_etab + sql_attr_uint = dateCrea_ent + sql_attr_uint = dateImmat + sql_attr_uint = eff_entrep + sql_attr_uint = eff_etab + sql_attr_uint = dirNom + sql_attr_uint = nbEtab + sql_attr_uint = nbMPubli + sql_attr_uint = sirenGrp + sql_attr_uint = nbActio + sql_attr_uint = actio + sql_attr_uint = nbPart + sql_attr_uint = part + sql_attr_uint = bilType + sql_attr_uint = bilAnnee + sql_attr_uint = bilCloture + sql_attr_uint = bilDuree + sql_attr_uint = bilTca + sql_attr_uint = bilEE + sql_attr_uint = bilFL + sql_attr_uint = bilFK + sql_attr_uint = bilFR + sql_attr_uint = bilGF + sql_attr_uint = bilGP + sql_attr_uint = bilGU + sql_attr_uint = bilGW + sql_attr_uint = bilHD + sql_attr_uint = bilHH + sql_attr_uint = bilHL + sql_attr_uint = bilHM + sql_attr_uint = bilHN + sql_attr_uint = bilYP + sql_attr_uint = codeCommune + sql_attr_uint = zus + sql_attr_uint = zru + sql_attr_uint = zfu + sql_attr_uint = cucs + sql_attr_uint = zrr + sql_attr_uint = zafr + sql_attr_uint = avisCs +} + +index ciblage +{ + source = ciblage + path = /dbs/sphinx/ciblage + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} diff --git a/scripts/build/config-dev/ciblage.txt b/scripts/build/config-dev/ciblage.txt new file mode 100644 index 0000000..5cede41 --- /dev/null +++ b/scripts/build/config-dev/ciblage.txt @@ -0,0 +1,110 @@ +############################################################################# +# Documentation +# ============= +# +# Présence d'éléments +# IF(element=='',0,1) AS pElement +# +# Liste des champs dans la BDD +# ----------------------------- +# id +# source +# source_id +# triCode +# autre_id +# siren => LPAD(siren, 9, '000000000') +# nic => LPAD(nic, 5, '00000') +# actif => 0 ou 1 +# siege => 0 ou 1 +# raisonSociale +# enseigne +# sigle +# identite_pre +# marques => IF(marques=='',0,1) AS pMarques +# adr_num +# adr_btq +# adr_typeVoie +# adr_libVoie +# adr_comp +# adr_cp => Code postal +# adr_ville => Ville +# adr_dep => Departement (Corse 2A,2B => 201, 202) +# adr_com +# tel => IF(tel>0,1,0) AS pTel +# fax => IF(fax>0,1,0) AS pFax +# cj => Texte +# capital => Float +# capitalDev +# capitalSrc +# ape_etab => Texte => Code APE de l'etablissement +# ape_entrep => Texte => Code APE de l'entreprise +# age_entrep => Entier => Age de l'entreprise +# age_etab => Entier => Age de l'etablissement +# tca => Entier => Tranche de chiffre d'affaire +# tcaexp => Entier => Tranche de chiffre d'affaire à l'export +# teff_entrep => Tranche Effectif de l'entreprise +# teff_etab => Tranche Effectif de l'etablissement +# rang +# web => IF(web=='',0,1) AS pWeb +# mail => IF(mail=='',0,1) AS pMail +# adrDom => 0,1,2 +# lieuAct +# actifEco => 0,1 +# presentRcs => 0,1 +# procolHisto => +# tvaIntraCle +# tvaIntraValide => 0,1 +# ape4_etab +# ape4_entrep +# NaceEtab +# NaceEntrep +# dateCrea_etab => Date +# dateCrea_ent => Date +# dateImmat => Date +# eff_entrep => Entier => Effectif de l'entreprise +# eff_etab => Entier => Effectir de l'etablissement +# distSP +# achPost +# rivoli +# dirCiv +# dirNom => IF(pDirNom=='',0,1) AS pDirNom +# dirPrenom +# dirDateNaiss +# dirFct +# nbEtab => Nombre d'établissement +# nbMPubli +# sirenGrp => IF(pSirenGrp>0,1,0) AS pSirenGrp +# nbActio => Entier => Nombre d'actionnaires (actio,bool) +# nbPart => Entier => Nombre de participations (part, bool) +# bilType => +# bilAnnee => +# bilCloture => +# bilDuree => +# bilTca => +# bilEE => +# bilFL => +# bilFK => +# bilFR => +# bilGF => +# bilGP => +# bilGU => +# bilGW => +# bilHD => +# bilHH => +# bilHL => +# bilHM => +# bilHN => +# bilYP => +# avisCs +# codeCommune => +# l93_x +# l93_y +# alt +# precis +# zus => +# zru => +# zfu => +# cucs => +# zrr => +# zafr => +############################################################################# \ No newline at end of file diff --git a/scripts/build/config-dev/dir.conf b/scripts/build/config-dev/dir.conf new file mode 100644 index 0000000..c390aa5 --- /dev/null +++ b/scripts/build/config-dev/dir.conf @@ -0,0 +1,64 @@ + +source dir +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ + YEAR(naissance_date) AS naiss_annee, \ + MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, \ + naissance_lieu, fonction_code, actif \ + FROM dirigeants; + sql_attr_string = civilite + sql_attr_string = typeDir + sql_attr_string = fonction_code + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep +} + +index dir +{ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} + +index dir_phx +{ + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + min_stemming_len = 4 +} \ No newline at end of file diff --git a/scripts/build/config-dev/dirtmp.conf b/scripts/build/config-dev/dirtmp.conf new file mode 100644 index 0000000..87c3399 --- /dev/null +++ b/scripts/build/config-dev/dirtmp.conf @@ -0,0 +1,63 @@ +source dir +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ + YEAR(naissance_date) AS naiss_annee, \ + MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, \ + naissance_lieu, fonction_code, actif \ + FROM dirigeants_tmp; + sql_attr_string = civilite + sql_attr_string = typeDir + sql_attr_string = fonction_code + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep +} + +index dir +{ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} + +index dir_phx +{ + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + min_stemming_len = 4 +} \ No newline at end of file diff --git a/scripts/build/config-dev/ent.conf b/scripts/build/config-dev/ent.conf new file mode 100644 index 0000000..7627279 --- /dev/null +++ b/scripts/build/config-dev/ent.conf @@ -0,0 +1,68 @@ + +source ent +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, siren, nic, siege, \ + CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ + REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ + FROM etablissements; + sql_attr_uint = siren + sql_attr_uint = nic + sql_attr_uint = siege + sql_attr_uint = actif + sql_attr_uint = adr_num + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = cj + sql_attr_uint = sirenValide + sql_attr_uint = rang +} + +index ent +{ + source = ent + path = /dbs/sphinx/ent + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + wordforms = /etc/sphinxsearch/wordforms-ent.txt + enable_star = 1 +} + +index ent_phx +{ + source = ent + path = /dbs/sphinx/ent_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + enable_star = 1 + morphology = libstemmer_fr + min_stemming_len = 4 +} diff --git a/scripts/build/config-dev/enttmp.conf b/scripts/build/config-dev/enttmp.conf new file mode 100644 index 0000000..2389645 --- /dev/null +++ b/scripts/build/config-dev/enttmp.conf @@ -0,0 +1,69 @@ + +source ent +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, siren, nic, siege, \ + CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ + REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ + FROM etablissements_tmp; + sql_attr_uint = siren + sql_attr_uint = nic + sql_attr_uint = siege + sql_attr_uint = actif + sql_attr_uint = adr_num + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = cj + sql_attr_uint = sirenValide + sql_attr_uint = rang +} + +index ent +{ + source = ent + path = /dbs/sphinx/ent + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + + wordforms = /etc/sphinxsearch/wordforms-ent.txt + enable_star = 1 +} + +index ent_phx +{ + source = ent + path = /dbs/sphinx/ent_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + enable_star = 1 + morphology = libstemmer_fr + min_stemming_len = 4 +} diff --git a/scripts/build/config-dev/histo.conf b/scripts/build/config-dev/histo.conf new file mode 100644 index 0000000..35bdff2 --- /dev/null +++ b/scripts/build/config-dev/histo.conf @@ -0,0 +1,24 @@ + +source histo +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = histobodacc + sql_port = 3306 # optional, default is 3306 + sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; + sql_attr_uint = annee1 + sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id +} + +index histo +{ + source = histo + path = /dbs/sphinx/histo + docinfo = extern + morphology = none + min_word_len = 2 + charset_type = sbcs + html_strip = 1 +} \ No newline at end of file diff --git a/scripts/build/config-dev/sphinx.conf b/scripts/build/config-dev/sphinx.conf new file mode 100644 index 0000000..eb7f627 --- /dev/null +++ b/scripts/build/config-dev/sphinx.conf @@ -0,0 +1,79 @@ + +############################################################################# +## indexer settings +############################################################################# + +indexer +{ + # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) + # optional, default is 32M, max is 2047M, recommended is 256M to 1024M + mem_limit = 256M + + # maximum IO calls per second (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iops = 40 + + + # maximum IO call size, bytes (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iosize = 1048576 +} + +############################################################################# +## searchd settings +############################################################################# + +searchd +{ + # IP address to bind on + # optional, default is 0.0.0.0 (ie. listen on all interfaces) + # + # address = 127.0.0.1 + # address = 192.168.0.1 + + + # searchd TCP port number + # mandatory, default is 3312 + listen = 3312 + + # log file, searchd run info is logged here + # optional, default is 'searchd.log' + log = /dbs/sphinxlog/searchd.log + + # query log file, all search queries are logged here + # optional, default is empty (do not log queries) + query_log = /dbs/sphinxlog/query.log + + # client read timeout, seconds + # optional, default is 5 + read_timeout = 5 + + # maximum amount of children to fork (concurrent searches to run) + # optional, default is 0 (unlimited) + max_children = 30 + + # PID file, searchd process ID file name + # mandatory + pid_file = /var/log/searchd.pid + + # max amount of matches the daemon ever keeps in RAM, per-index + # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL + # default is 1000 (just like Google) + max_matches = 1000 + + # seamless rotate, prevents rotate stalls if precaching huge datasets + # optional, default is 1 + seamless_rotate = 1 + + # whether to forcibly preopen all indexes on startup + # optional, default is 0 (do not preopen) + preopen_indexes = 1 + + # whether to unlink .old index copies on succesful rotation. + # optional, default is 1 (do unlink) + unlink_old = 1 + + compat_sphinxql_magics=0 +} diff --git a/scripts/build/config-dev/stopwords-ent.txt b/scripts/build/config-dev/stopwords-ent.txt new file mode 100644 index 0000000..e69de29 diff --git a/scripts/build/config-dev/wordforms-ent.txt b/scripts/build/config-dev/wordforms-ent.txt new file mode 100644 index 0000000..929912d --- /dev/null +++ b/scripts/build/config-dev/wordforms-ent.txt @@ -0,0 +1,207 @@ +& > et +un > 1 +deux > 2 +trois > 3 +quatre > 4 +cinq > 5 +six > 6 +sept > 7 +huit > 8 +neuf > 9 +dix > 10 +onze > 11 +douze > 12 +treize > 13 +quatorze > 14 +quinze > 15 +seize > 16 +vingt > 20 +vingts > 20 +trente > 30 +quarante > 40 +cinquante > 50 +soixante > 60 +quatrevingt > 80 +cent > 100 +cents > 100 +mille > 1000 +zac > zone +zad > zone +za > zone +zi > zone +zup > zone +general > gal +abbaye > abe +agglomeration > agl +aglo > agl +allee > all +ancien > ach +ancienne > art +anse > anse +arcade > arc +autoroute > aut +avenue > av +barriere > bre +bas > bch +bastide > bstd +baston > bast +beguinage > begi +berge > ber +bois > bois +boite postal > bp +boucle > bcle +boulevard > bd +bourg > brg +butte > but +campagne > cgne +camping > cpg +carre > carr +carreau > cau +carrefour > car +carriere > care +castel > cst +cavee > cav +central > ctre +centre > ctre +chalet > chl +chapelle > chp +charmille > chi +chateau > cht +chaussee > chs +che > chemin +chv > chemin +cheminement > chem +cloitre > cloi +colline > coli +contour > ctr +corniche > cor +cottage > cott +cours > crs +darse > dars +degre > deg +dsg > descente +dsc > descente +digue > dig +domaine > dom +docteur > dr +ecart > eca +ecluse > ecl +eglise > egl +enceinte > en +enclave > env +enclos > enc +escalier > esc +espace > espa +esplanade > esp +etang > eting +faubourg > fg +ferme > frm +fontaine > fon +fort > fort +forum > form +fosse > fos +foyer > foyr +galerie > gal +garenne > garn +grand > gbd +gden > grande +gr > grande +grille > gri +grimpette > grim +groupe > gpe +groupement > gpt +halle > hle +hameau > ham +haut > hch +hippodrome > hip +immeuble > imm +impasse > imp +jardin > jard +jetee > jte +levee > leve +lieu > ld +lieudit > ld +lotissement > lot +maison > mf +manoir > man +marche > mar +metro > met +montee > mte +moulin > mln +musee > mus +nouvelle > nte +palais > pal +parc > parc +parking > pkg +parvis > prv +pas > passage +pn > passage +passe > pass +passerelle > ple +patio > pat +pavillon > pav +peripherique > peri +peristyle > psty +petites > pta +pae > petite +pim > petite +prt > petite +ptr > petite +place > pl +placis > plci +plage > plag +plaine > pln +plan > plan +plateau > plt +pointe > pnt +porche > pch +porte > pte +portique > porq +poterne > pot +pourtour > pour +presqu ile > prq +promenade > prom +quai > qu +quartier > qua +raccourci > rac +raidillon > raid +rampe > rpe +rempart > rem +residence > res +rocade > roc +rond > rpt +roquet > roqt +rotonde > rtd +route > rte +rue > r +ruelle > rle +sente > sen +sentier > sen +square > sq +st > saint +ste > saint +sainte > saint +stade > stde +station > sta +terre > tpl +terrain > trn +terrasse > tsse +tertre > trt +traverse > tra +vallon > val +vallee > val +venelle > ven +vieille > vte +villa > vla +vge > village +vlge > village +voie > voi +centre cial > ccal +centre com > ccal +centre comm > ccal +centre commercial > ccal +ville > mairie +commune > mairie +conseil regional > region +conseil general > departement +companie > cie diff --git a/scripts/build/config-prod/act.conf b/scripts/build/config-prod/act.conf new file mode 100644 index 0000000..0ba5e99 --- /dev/null +++ b/scripts/build/config-prod/act.conf @@ -0,0 +1,42 @@ + +source act +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + + sql_query = \ + SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, p.libPays AS pays, l.PDetention \ + FROM liens2 l, liensRef r, tabPays p \ + WHERE l.dateSuppr = '0000-00-00 00:00:00' \ + AND r.id = l.idAct \ + AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ + AND p.codPays3 = r.adresse_pays; + + sql_attr_uint = idAct + sql_attr_uint = actif + sql_attr_float= PDetention + sql_attr_string = pays +} + +index act +{ + source = act + path = /dbs/sphinx/act + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} diff --git a/scripts/build/config-prod/ciblage.conf b/scripts/build/config-prod/ciblage.conf new file mode 100644 index 0000000..4867f7e --- /dev/null +++ b/scripts/build/config-prod/ciblage.conf @@ -0,0 +1,157 @@ + +source ciblage +{ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, LPAD(siren, 9, '000000000') AS siren, LPAD(nic, 5, '00000') AS nic, \ + siege, \ + adr_cp, \ + REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + IF(tel>0,1,0) AS tel, \ + IF(fax>0,1,0) AS fax, \ + cj, \ + capital, \ + CONCAT('EX ', ape_etab) AS ape_etab, \ + CONCAT('EX ', ape_entrep) AS ape_entrep, \ + age_entrep, \ + age_etab, \ + tca, \ + tcaexp, \ + IF(teff_entrep IS NULL,99,teff_entrep) AS teff_entrep, \ + IF(teff_etab IS NULL,99,teff_etab) AS teff_etab, \ + IF(web='',0,1) AS web, \ + IF(mail='',0,1) AS mail, \ + IF(adrDom>0,1,0) AS adrDom, \ + actifEco, \ + presentRcs, \ + procolHisto, \ + tvaIntraValide, \ + dateCrea_etab,\ + dateCrea_ent, \ + dateImmat, \ + eff_entrep, \ + eff_etab, \ + IF(dirNom='',0,1) AS dirNom, \ + nbEtab, \ + IF(nbMPubli>0,1,0) AS nbMPubli, \ + IF(CAST(sirenGrp AS UNSIGNED)>100,1,0) AS sirenGrp, \ + nbActio, \ + IF(nbActio>0,1,0) AS actio, \ + nbPart, \ + IF(nbPart>0,1,0) AS part, \ + CASE bilType WHEN 'I' THEN 1 WHEN 'R' THEN 2 WHEN 'E' THEN 3 ELSE 0 END as bilType, \ + bilAnnee, \ + bilCloture, \ + bilDuree, \ + bilTca, \ + bilEE, \ + bilFL, \ + bilFK, \ + bilFR, \ + bilGF, \ + bilGP, \ + bilGU, \ + bilGW, \ + bilHD, \ + bilHH, \ + bilHL, \ + bilHM, \ + bilHN, \ + bilYP, \ + CAST(codeCommune AS UNSIGNED) AS codeCommune, \ + CASE zus WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zus, \ + CASE zfu WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zfu, \ + CASE zru WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as zru, \ + CASE cucs WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as cucs, \ + IF(zrr=1,1,0) as zrr, \ + IF(zafr=1,1,0) as zafr, \ + CASE avisCs WHEN 0 THEN 1 WHEN 10 THEN 2 WHEN 15 THEN 3 WHEN 23 THEN 4 WHEN 29 THEN 4 WHEN 39 THEN 4 WHEN 43 THEN 4 WHEN 21 THEN 5 WHEN 26 THEN 5 WHEN 28 THEN 5 WHEN 31 THEN 6 WHEN 50 THEN 6 WHEN 24 THEN 7 ELSE 0 END as avisCs \ + FROM etablissements_act WHERE siren>100; + + sql_field_string = siren + sql_field_string = nic + sql_attr_uint = siege + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = tel + sql_attr_uint = fax + sql_attr_uint = cj + sql_attr_uint = capital + sql_attr_uint = age_entrep + sql_attr_uint = age_etab + sql_attr_uint = tca + sql_attr_uint = tcaexp + sql_attr_uint = teff_entrep + sql_attr_uint = teff_etab + sql_attr_uint = web + sql_attr_uint = mail + sql_attr_uint = adrDom + sql_attr_uint = actifEco + sql_attr_uint = presentRcs + sql_attr_uint = procolHisto + sql_attr_uint = tvaIntraValide + sql_attr_uint = dateCrea_etab + sql_attr_uint = dateCrea_ent + sql_attr_uint = dateImmat + sql_attr_uint = eff_entrep + sql_attr_uint = eff_etab + sql_attr_uint = dirNom + sql_attr_uint = nbEtab + sql_attr_uint = nbMPubli + sql_attr_uint = sirenGrp + sql_attr_uint = nbActio + sql_attr_uint = actio + sql_attr_uint = nbPart + sql_attr_uint = part + sql_attr_uint = bilType + sql_attr_uint = bilAnnee + sql_attr_uint = bilCloture + sql_attr_uint = bilDuree + sql_attr_uint = bilTca + sql_attr_uint = bilEE + sql_attr_uint = bilFL + sql_attr_uint = bilFK + sql_attr_uint = bilFR + sql_attr_uint = bilGF + sql_attr_uint = bilGP + sql_attr_uint = bilGU + sql_attr_uint = bilGW + sql_attr_uint = bilHD + sql_attr_uint = bilHH + sql_attr_uint = bilHL + sql_attr_uint = bilHM + sql_attr_uint = bilHN + sql_attr_uint = bilYP + sql_attr_uint = codeCommune + sql_attr_uint = zus + sql_attr_uint = zru + sql_attr_uint = zfu + sql_attr_uint = cucs + sql_attr_uint = zrr + sql_attr_uint = zafr + sql_attr_uint = avisCs +} + +index ciblage +{ + source = ciblage + path = /dbs/sphinx/ciblage + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} diff --git a/scripts/build/config-prod/dir.conf b/scripts/build/config-prod/dir.conf new file mode 100644 index 0000000..c390aa5 --- /dev/null +++ b/scripts/build/config-prod/dir.conf @@ -0,0 +1,64 @@ + +source dir +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ + YEAR(naissance_date) AS naiss_annee, \ + MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, \ + naissance_lieu, fonction_code, actif \ + FROM dirigeants; + sql_attr_string = civilite + sql_attr_string = typeDir + sql_attr_string = fonction_code + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep +} + +index dir +{ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} + +index dir_phx +{ + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + min_stemming_len = 4 +} \ No newline at end of file diff --git a/scripts/build/config-prod/dirtmp.conf b/scripts/build/config-prod/dirtmp.conf new file mode 100644 index 0000000..87c3399 --- /dev/null +++ b/scripts/build/config-prod/dirtmp.conf @@ -0,0 +1,63 @@ +source dir +{ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ + YEAR(naissance_date) AS naiss_annee, \ + MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, \ + naissance_lieu, fonction_code, actif \ + FROM dirigeants_tmp; + sql_attr_string = civilite + sql_attr_string = typeDir + sql_attr_string = fonction_code + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep +} + +index dir +{ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} + +index dir_phx +{ + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + min_stemming_len = 4 +} \ No newline at end of file diff --git a/scripts/build/config-prod/ent.conf b/scripts/build/config-prod/ent.conf new file mode 100644 index 0000000..fe84d10 --- /dev/null +++ b/scripts/build/config-prod/ent.conf @@ -0,0 +1,73 @@ +source ent +{ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, siren, nic, siege, \ + CONCAT_WS(" ", \ + raisonSociale, \ + enseigne, \ + sigle, \ + identite_pre\ + ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ + FROM etablissements; + + sql_attr_uint = siren + sql_attr_uint = nic + sql_attr_uint = siege + sql_attr_uint = actif + sql_attr_uint = adr_num + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = cj + sql_attr_uint = sirenValide + sql_attr_uint = rang +} + +index ent +{ + source = ent + path = /dbs/sphinx/ent + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + + wordforms = /etc/sphinxsearch/wordforms-ent.txt + enable_star = 1 +} + +index ent_phx +{ + source = ent + path = /dbs/sphinx/ent_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + enable_star = 1 + morphology = libstemmer_fr + min_stemming_len = 4 +} diff --git a/scripts/build/config-prod/enttmp.conf b/scripts/build/config-prod/enttmp.conf new file mode 100644 index 0000000..103d122 --- /dev/null +++ b/scripts/build/config-prod/enttmp.conf @@ -0,0 +1,73 @@ +source ent +{ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, siren, nic, siege, \ + CONCAT_WS(" ", \ + raisonSociale, \ + enseigne, \ + sigle, \ + identite_pre\ + ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ + FROM etablissements_tmp; + + sql_attr_uint = siren + sql_attr_uint = nic + sql_attr_uint = siege + sql_attr_uint = actif + sql_attr_uint = adr_num + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = cj + sql_attr_uint = sirenValide + sql_attr_uint = rang +} + +index ent +{ + source = ent + path = /dbs/sphinx/ent + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + + wordforms = /etc/sphinxsearch/wordforms-ent.txt + enable_star = 1 +} + +index ent_phx +{ + source = ent + path = /dbs/sphinx/ent_phx + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + enable_star = 1 + morphology = libstemmer_fr + min_stemming_len = 4 +} diff --git a/scripts/build/config-prod/sphinx.conf b/scripts/build/config-prod/sphinx.conf new file mode 100644 index 0000000..eb7f627 --- /dev/null +++ b/scripts/build/config-prod/sphinx.conf @@ -0,0 +1,79 @@ + +############################################################################# +## indexer settings +############################################################################# + +indexer +{ + # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) + # optional, default is 32M, max is 2047M, recommended is 256M to 1024M + mem_limit = 256M + + # maximum IO calls per second (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iops = 40 + + + # maximum IO call size, bytes (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iosize = 1048576 +} + +############################################################################# +## searchd settings +############################################################################# + +searchd +{ + # IP address to bind on + # optional, default is 0.0.0.0 (ie. listen on all interfaces) + # + # address = 127.0.0.1 + # address = 192.168.0.1 + + + # searchd TCP port number + # mandatory, default is 3312 + listen = 3312 + + # log file, searchd run info is logged here + # optional, default is 'searchd.log' + log = /dbs/sphinxlog/searchd.log + + # query log file, all search queries are logged here + # optional, default is empty (do not log queries) + query_log = /dbs/sphinxlog/query.log + + # client read timeout, seconds + # optional, default is 5 + read_timeout = 5 + + # maximum amount of children to fork (concurrent searches to run) + # optional, default is 0 (unlimited) + max_children = 30 + + # PID file, searchd process ID file name + # mandatory + pid_file = /var/log/searchd.pid + + # max amount of matches the daemon ever keeps in RAM, per-index + # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL + # default is 1000 (just like Google) + max_matches = 1000 + + # seamless rotate, prevents rotate stalls if precaching huge datasets + # optional, default is 1 + seamless_rotate = 1 + + # whether to forcibly preopen all indexes on startup + # optional, default is 0 (do not preopen) + preopen_indexes = 1 + + # whether to unlink .old index copies on succesful rotation. + # optional, default is 1 (do unlink) + unlink_old = 1 + + compat_sphinxql_magics=0 +} diff --git a/scripts/build/config-prod/wordforms-ent.txt b/scripts/build/config-prod/wordforms-ent.txt new file mode 100644 index 0000000..929912d --- /dev/null +++ b/scripts/build/config-prod/wordforms-ent.txt @@ -0,0 +1,207 @@ +& > et +un > 1 +deux > 2 +trois > 3 +quatre > 4 +cinq > 5 +six > 6 +sept > 7 +huit > 8 +neuf > 9 +dix > 10 +onze > 11 +douze > 12 +treize > 13 +quatorze > 14 +quinze > 15 +seize > 16 +vingt > 20 +vingts > 20 +trente > 30 +quarante > 40 +cinquante > 50 +soixante > 60 +quatrevingt > 80 +cent > 100 +cents > 100 +mille > 1000 +zac > zone +zad > zone +za > zone +zi > zone +zup > zone +general > gal +abbaye > abe +agglomeration > agl +aglo > agl +allee > all +ancien > ach +ancienne > art +anse > anse +arcade > arc +autoroute > aut +avenue > av +barriere > bre +bas > bch +bastide > bstd +baston > bast +beguinage > begi +berge > ber +bois > bois +boite postal > bp +boucle > bcle +boulevard > bd +bourg > brg +butte > but +campagne > cgne +camping > cpg +carre > carr +carreau > cau +carrefour > car +carriere > care +castel > cst +cavee > cav +central > ctre +centre > ctre +chalet > chl +chapelle > chp +charmille > chi +chateau > cht +chaussee > chs +che > chemin +chv > chemin +cheminement > chem +cloitre > cloi +colline > coli +contour > ctr +corniche > cor +cottage > cott +cours > crs +darse > dars +degre > deg +dsg > descente +dsc > descente +digue > dig +domaine > dom +docteur > dr +ecart > eca +ecluse > ecl +eglise > egl +enceinte > en +enclave > env +enclos > enc +escalier > esc +espace > espa +esplanade > esp +etang > eting +faubourg > fg +ferme > frm +fontaine > fon +fort > fort +forum > form +fosse > fos +foyer > foyr +galerie > gal +garenne > garn +grand > gbd +gden > grande +gr > grande +grille > gri +grimpette > grim +groupe > gpe +groupement > gpt +halle > hle +hameau > ham +haut > hch +hippodrome > hip +immeuble > imm +impasse > imp +jardin > jard +jetee > jte +levee > leve +lieu > ld +lieudit > ld +lotissement > lot +maison > mf +manoir > man +marche > mar +metro > met +montee > mte +moulin > mln +musee > mus +nouvelle > nte +palais > pal +parc > parc +parking > pkg +parvis > prv +pas > passage +pn > passage +passe > pass +passerelle > ple +patio > pat +pavillon > pav +peripherique > peri +peristyle > psty +petites > pta +pae > petite +pim > petite +prt > petite +ptr > petite +place > pl +placis > plci +plage > plag +plaine > pln +plan > plan +plateau > plt +pointe > pnt +porche > pch +porte > pte +portique > porq +poterne > pot +pourtour > pour +presqu ile > prq +promenade > prom +quai > qu +quartier > qua +raccourci > rac +raidillon > raid +rampe > rpe +rempart > rem +residence > res +rocade > roc +rond > rpt +roquet > roqt +rotonde > rtd +route > rte +rue > r +ruelle > rle +sente > sen +sentier > sen +square > sq +st > saint +ste > saint +sainte > saint +stade > stde +station > sta +terre > tpl +terrain > trn +terrasse > tsse +tertre > trt +traverse > tra +vallon > val +vallee > val +venelle > ven +vieille > vte +villa > vla +vge > village +vlge > village +voie > voi +centre cial > ccal +centre com > ccal +centre comm > ccal +centre commercial > ccal +ville > mairie +commune > mairie +conseil regional > region +conseil general > departement +companie > cie From a039f5fe206613e3e91e5ab72e80cdb855e48092 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 13 Aug 2014 13:55:48 +0000 Subject: [PATCH 10/40] Add Ubuntu 14.04 configuration --- .../ubuntu-14.04/etc/default/sphinxsearch | 10 ++++++ .../ubuntu-14.04/etc/init/sphinxsearch.conf | 34 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 scripts/build/ubuntu-14.04/etc/default/sphinxsearch create mode 100644 scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf diff --git a/scripts/build/ubuntu-14.04/etc/default/sphinxsearch b/scripts/build/ubuntu-14.04/etc/default/sphinxsearch new file mode 100644 index 0000000..8d9bf66 --- /dev/null +++ b/scripts/build/ubuntu-14.04/etc/default/sphinxsearch @@ -0,0 +1,10 @@ +# +# Settings for the sphinxsearch searchd daemon +# Please read /usr/share/doc/sphinxsearch/README.Debian for details. +# + +# Should sphinxsearch run automatically on startup? (default: no) +# Before doing this you might want to modify /etc/sphinxsearch/sphinx.conf +# so that it works for you. +START=yes + diff --git a/scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf b/scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf new file mode 100644 index 0000000..c2da12e --- /dev/null +++ b/scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf @@ -0,0 +1,34 @@ +# SphinxSearch Service + +description "SphinxSearch Daemon" +author "Andrey Aksyonoff " + +start on (net-device-up + and local-filesystems + and runlevel [2345]) +stop on runlevel [016] + +respawn +respawn limit 10 35 + +# The default of 5 seconds is too low if we have rt indices and have to flush them +kill timeout 30 + +env DEFAULTFILE="/etc/default/sphinxsearch" +pre-start script + if [ -f "$DEFAULTFILE" ]; then + . "$DEFAULTFILE" + fi + if [ "z$START" != "zyes" ]; then + stop + logger "To enable sphinxsearch, edit /etc/default/sphinxsearch and set START=yes" + exit 0 + fi + if [ ! -f /etc/sphinxsearch/sphinx.conf ]; then + logger "Please create an /etc/sphinxsearch/sphinx.conf configuration file." + logger "Templates are in the /etc/sphinxsearch/ directory." + exit 0 + fi +end script + +exec /usr/local/sphinx/bin/searchd --nodetach --config /etc/sphinxsearch/sphinx.conf From c8abc8905a6311d727771e69104765e13278e5f8 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 13 Aug 2014 15:14:20 +0000 Subject: [PATCH 11/40] =?UTF-8?q?Mise=20=C3=A0=20jour=20config=20searchd?= =?UTF-8?q?=20et=20indexer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/build/config-dev/sphinx.conf | 387 ++++++++++++++++++++++---- scripts/build/config-prod/sphinx.conf | 387 ++++++++++++++++++++++---- 2 files changed, 674 insertions(+), 100 deletions(-) diff --git a/scripts/build/config-dev/sphinx.conf b/scripts/build/config-dev/sphinx.conf index eb7f627..b41e165 100644 --- a/scripts/build/config-dev/sphinx.conf +++ b/scripts/build/config-dev/sphinx.conf @@ -5,20 +5,82 @@ indexer { - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 256M + # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) + # optional, default is 32M, max is 2047M, recommended is 256M to 1024M + mem_limit = 256M - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iops = 40 + # maximum IO calls per second (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iops = 40 - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 + # maximum IO call size, bytes (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iosize = 1048576 + + + # maximum xmlpipe2 field length, bytes + # optional, default is 2M + # + # max_xmlpipe2_field = 4M + + + # write buffer size, bytes + # several (currently up to 4) buffers will be allocated + # write buffers are allocated in addition to mem_limit + # optional, default is 1M + # + # write_buffer = 1M + + + # maximum file field adaptive buffer size + # optional, default is 8M, minimum is 1M + # + # max_file_field_buffer = 32M + + + # how to handle IO errors in file fields + # known values are 'ignore_field', 'skip_document', and 'fail_index' + # optional, default is 'ignore_field' + # + # on_file_field_error = skip_document + + + # how to handle syntax errors in JSON attributes + # known values are 'ignore_attr' and 'fail_index' + # optional, default is 'ignore_attr' + # + # on_json_attr_error = fail_index + + + # whether to auto-convert numeric values from strings in JSON attributes + # with auto-conversion, string value with actually numeric data + # (as in {"key":"12345"}) gets stored as a number, rather than string + # optional, allowed values are 0 and 1, default is 0 (do not convert) + # + # json_autoconv_numbers = 1 + + + # whether and how to auto-convert key names in JSON attributes + # known value is 'lowercase' + # optional, default is unspecified (do nothing) + # + # json_autoconv_keynames = lowercase + + + # lemmatizer dictionaries base path + # optional, defaut is /usr/local/share (see ./configure --datadir) + # + # lemmatizer_base = /usr/local/share/sphinx/dicts + + + # lemmatizer cache size + # improves the indexing time when the lemmatization is enabled + # optional, default is 256K + # + # lemmatizer_cache = 512M } ############################################################################# @@ -27,53 +89,278 @@ indexer searchd { - # IP address to bind on - # optional, default is 0.0.0.0 (ie. listen on all interfaces) - # - # address = 127.0.0.1 - # address = 192.168.0.1 + # [hostname:]port[:protocol], or /unix/socket/path to listen on + # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) + # + # multi-value, multiple listen points are allowed + # optional, defaults are 9312:sphinx and 9306:mysql41, as below + # + # listen = 127.0.0.1 + # listen = 192.168.0.1:9312 + # listen = 9312 + # listen = /var/run/searchd.sock + listen = 3312 + + # log file, searchd run info is logged here + # optional, default is 'searchd.log' + log = /dbs/sphinxlog/searchd.log + + # query log file, all search queries are logged here + # optional, default is empty (do not log queries) + query_log = /dbs/sphinxlog/query.log + + # client read timeout, seconds + # optional, default is 5 + read_timeout = 5 + + # request timeout, seconds + # optional, default is 5 minutes + client_timeout = 300 + + # maximum amount of children to fork (concurrent searches to run) + # optional, default is 0 (unlimited) + max_children = 30 + + # maximum amount of persistent connections from this master to each agent host + # optional, but necessary if you use agent_persistent. It is reasonable to set the value + # as max_children, or less on the agent's hosts. + persistent_connections_limit = 30 + + # PID file, searchd process ID file name + # mandatory + pid_file = /var/run/sphinxsearch/searchd.pid + + # max amount of matches the daemon ever keeps in RAM, per-index + # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL + # default is 1000 (just like Google) + max_matches = 1000 + + # seamless rotate, prevents rotate stalls if precaching huge datasets + # optional, default is 1 + seamless_rotate = 1 + + # whether to forcibly preopen all indexes on startup + # optional, default is 1 (preopen everything) + preopen_indexes = 1 + + # whether to unlink .old index copies on succesful rotation. + # optional, default is 1 (do unlink) + unlink_old = 1 + + # attribute updates periodic flush timeout, seconds + # updates will be automatically dumped to disk this frequently + # optional, default is 0 (disable periodic flush) + # + # attr_flush_period = 900 - # searchd TCP port number - # mandatory, default is 3312 - listen = 3312 + # instance-wide ondisk_dict defaults (per-index value take precedence) + # optional, default is 0 (precache all dictionaries in RAM) + # + # ondisk_dict_default = 1 - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log + # MVA updates pool size + # shared between all instances of searchd, disables attr flushes! + # optional, default size is 1M + mva_updates_pool = 1M - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 + # max allowed network packet size + # limits both query packets from clients, and responses from agents + # optional, default size is 8M + max_packet_size = 8M - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 + # crash log path + # searchd will (try to) log crashed query to 'crash_log_path.PID' file + # optional, default is empty (do not create crash logs) + # + # crash_log_path = /var/log/sphinxsearch/crash - # PID file, searchd process ID file name - # mandatory - pid_file = /var/log/searchd.pid - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 + # max allowed per-query filter count + # optional, default is 256 + max_filters = 256 - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 + # max allowed per-filter values count + # optional, default is 4096 + max_filter_values = 4096 - # whether to forcibly preopen all indexes on startup - # optional, default is 0 (do not preopen) - preopen_indexes = 1 - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 - - compat_sphinxql_magics=0 -} + # socket listen queue length + # optional, default is 5 + # + # listen_backlog = 5 + + + # per-keyword read buffer size + # optional, default is 256K + # + # read_buffer = 256K + + + # unhinted read size (currently used when reading hits) + # optional, default is 32K + # + # read_unhinted = 32K + + + # max allowed per-batch query count (aka multi-query count) + # optional, default is 32 + max_batch_queries = 32 + + + # max common subtree document cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_docs_cache = 4M + + + # max common subtree hit cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_hits_cache = 8M + + + # multi-processing mode (MPM) + # known values are none, fork, prefork, and threads + # threads is required for RT backend to work + # optional, default is fork + workers = prefork + + + # max threads to create for searching local parts of a distributed index + # optional, default is 0, which means disable multi-threaded searching + # should work with all MPMs (ie. does NOT require workers=threads) + # + dist_threads = 2 + + + # binlog files path; use empty string to disable binlog + # optional, default is build-time configured data directory + # + # binlog_path = # disable logging + # binlog_path = /var/lib/sphinxsearch/data # binlog.001 etc will be created there + + + # binlog flush/sync mode + # 0 means flush and sync every second + # 1 means flush and sync every transaction + # 2 means flush every transaction, sync every second + # optional, default is 2 + # + # binlog_flush = 2 + + + # binlog per-file size limit + # optional, default is 128M, 0 means no limit + # + # binlog_max_log_size = 256M + + + # per-thread stack size, only affects workers=threads mode + # optional, default is 64K + # + # thread_stack = 128K + + + # per-keyword expansion limit (for dict=keywords prefix searches) + # optional, default is 0 (no limit) + # + # expansion_limit = 1000 + + + # RT RAM chunks flush period + # optional, default is 0 (no periodic flush) + # + # rt_flush_period = 900 + + + # query log file format + # optional, known values are plain and sphinxql, default is plain + # + # query_log_format = sphinxql + + + # version string returned to MySQL network protocol clients + # optional, default is empty (use Sphinx version) + # + # mysql_version_string = 5.0.37 + + + # trusted plugin directory + # optional, default is empty (disable UDFs) + # + # plugin_dir = /usr/local/sphinx/lib + + + # default server-wide collation + # optional, default is libc_ci + # + # collation_server = utf8_general_ci + + + # server-wide locale for libc based collations + # optional, default is C + # + # collation_libc_locale = ru_RU.UTF-8 + + + # threaded server watchdog (only used in workers=threads mode) + # optional, values are 0 and 1, default is 1 (watchdog on) + # + # watchdog = 1 + + + # SphinxQL compatibility mode (legacy columns and their names) + # optional, default is 1 (old-style) + # + # compat_sphinxql_magics = 1 + + + # costs for max_predicted_time model, in (imaginary) nanoseconds + # optional, default is "doc=64, hit=48, skip=2048, match=64" + # + # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 + + + # current SphinxQL state (uservars etc) serialization path + # optional, default is none (do not serialize SphinxQL state) + # + # sphinxql_state = sphinxvars.sql + + + # maximum RT merge thread IO calls per second, and per-call IO size + # useful for throttling (the background) OPTIMIZE INDEX impact + # optional, default is 0 (unlimited) + # + # rt_merge_iops = 40 + # rt_merge_maxiosize = 1M + + + # interval between agent mirror pings, in milliseconds + # 0 means disable pings + # optional, default is 1000 + # + # ha_ping_interval = 0 + + + # agent mirror statistics window size, in seconds + # stats older than the window size (karma) are retired + # that is, they will not affect master choice of agents in any way + # optional, default is 60 seconds + # + # ha_period_karma = 60 + + + # delay between preforked children restarts on rotation, in milliseconds + # optional, default is 0 (no delay) + # + # prefork_rotation_throttle = 100 + + + # a prefix to prepend to the local file names when creating snippets + # with load_files and/or load_files_scatter options + # optional, default is empty + # + # snippets_file_prefix = /mnt/common/server1/ +} \ No newline at end of file diff --git a/scripts/build/config-prod/sphinx.conf b/scripts/build/config-prod/sphinx.conf index eb7f627..b41e165 100644 --- a/scripts/build/config-prod/sphinx.conf +++ b/scripts/build/config-prod/sphinx.conf @@ -5,20 +5,82 @@ indexer { - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 256M + # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) + # optional, default is 32M, max is 2047M, recommended is 256M to 1024M + mem_limit = 256M - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iops = 40 + # maximum IO calls per second (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iops = 40 - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 + # maximum IO call size, bytes (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iosize = 1048576 + + + # maximum xmlpipe2 field length, bytes + # optional, default is 2M + # + # max_xmlpipe2_field = 4M + + + # write buffer size, bytes + # several (currently up to 4) buffers will be allocated + # write buffers are allocated in addition to mem_limit + # optional, default is 1M + # + # write_buffer = 1M + + + # maximum file field adaptive buffer size + # optional, default is 8M, minimum is 1M + # + # max_file_field_buffer = 32M + + + # how to handle IO errors in file fields + # known values are 'ignore_field', 'skip_document', and 'fail_index' + # optional, default is 'ignore_field' + # + # on_file_field_error = skip_document + + + # how to handle syntax errors in JSON attributes + # known values are 'ignore_attr' and 'fail_index' + # optional, default is 'ignore_attr' + # + # on_json_attr_error = fail_index + + + # whether to auto-convert numeric values from strings in JSON attributes + # with auto-conversion, string value with actually numeric data + # (as in {"key":"12345"}) gets stored as a number, rather than string + # optional, allowed values are 0 and 1, default is 0 (do not convert) + # + # json_autoconv_numbers = 1 + + + # whether and how to auto-convert key names in JSON attributes + # known value is 'lowercase' + # optional, default is unspecified (do nothing) + # + # json_autoconv_keynames = lowercase + + + # lemmatizer dictionaries base path + # optional, defaut is /usr/local/share (see ./configure --datadir) + # + # lemmatizer_base = /usr/local/share/sphinx/dicts + + + # lemmatizer cache size + # improves the indexing time when the lemmatization is enabled + # optional, default is 256K + # + # lemmatizer_cache = 512M } ############################################################################# @@ -27,53 +89,278 @@ indexer searchd { - # IP address to bind on - # optional, default is 0.0.0.0 (ie. listen on all interfaces) - # - # address = 127.0.0.1 - # address = 192.168.0.1 + # [hostname:]port[:protocol], or /unix/socket/path to listen on + # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) + # + # multi-value, multiple listen points are allowed + # optional, defaults are 9312:sphinx and 9306:mysql41, as below + # + # listen = 127.0.0.1 + # listen = 192.168.0.1:9312 + # listen = 9312 + # listen = /var/run/searchd.sock + listen = 3312 + + # log file, searchd run info is logged here + # optional, default is 'searchd.log' + log = /dbs/sphinxlog/searchd.log + + # query log file, all search queries are logged here + # optional, default is empty (do not log queries) + query_log = /dbs/sphinxlog/query.log + + # client read timeout, seconds + # optional, default is 5 + read_timeout = 5 + + # request timeout, seconds + # optional, default is 5 minutes + client_timeout = 300 + + # maximum amount of children to fork (concurrent searches to run) + # optional, default is 0 (unlimited) + max_children = 30 + + # maximum amount of persistent connections from this master to each agent host + # optional, but necessary if you use agent_persistent. It is reasonable to set the value + # as max_children, or less on the agent's hosts. + persistent_connections_limit = 30 + + # PID file, searchd process ID file name + # mandatory + pid_file = /var/run/sphinxsearch/searchd.pid + + # max amount of matches the daemon ever keeps in RAM, per-index + # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL + # default is 1000 (just like Google) + max_matches = 1000 + + # seamless rotate, prevents rotate stalls if precaching huge datasets + # optional, default is 1 + seamless_rotate = 1 + + # whether to forcibly preopen all indexes on startup + # optional, default is 1 (preopen everything) + preopen_indexes = 1 + + # whether to unlink .old index copies on succesful rotation. + # optional, default is 1 (do unlink) + unlink_old = 1 + + # attribute updates periodic flush timeout, seconds + # updates will be automatically dumped to disk this frequently + # optional, default is 0 (disable periodic flush) + # + # attr_flush_period = 900 - # searchd TCP port number - # mandatory, default is 3312 - listen = 3312 + # instance-wide ondisk_dict defaults (per-index value take precedence) + # optional, default is 0 (precache all dictionaries in RAM) + # + # ondisk_dict_default = 1 - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log + # MVA updates pool size + # shared between all instances of searchd, disables attr flushes! + # optional, default size is 1M + mva_updates_pool = 1M - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 + # max allowed network packet size + # limits both query packets from clients, and responses from agents + # optional, default size is 8M + max_packet_size = 8M - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 + # crash log path + # searchd will (try to) log crashed query to 'crash_log_path.PID' file + # optional, default is empty (do not create crash logs) + # + # crash_log_path = /var/log/sphinxsearch/crash - # PID file, searchd process ID file name - # mandatory - pid_file = /var/log/searchd.pid - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 + # max allowed per-query filter count + # optional, default is 256 + max_filters = 256 - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 + # max allowed per-filter values count + # optional, default is 4096 + max_filter_values = 4096 - # whether to forcibly preopen all indexes on startup - # optional, default is 0 (do not preopen) - preopen_indexes = 1 - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 - - compat_sphinxql_magics=0 -} + # socket listen queue length + # optional, default is 5 + # + # listen_backlog = 5 + + + # per-keyword read buffer size + # optional, default is 256K + # + # read_buffer = 256K + + + # unhinted read size (currently used when reading hits) + # optional, default is 32K + # + # read_unhinted = 32K + + + # max allowed per-batch query count (aka multi-query count) + # optional, default is 32 + max_batch_queries = 32 + + + # max common subtree document cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_docs_cache = 4M + + + # max common subtree hit cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_hits_cache = 8M + + + # multi-processing mode (MPM) + # known values are none, fork, prefork, and threads + # threads is required for RT backend to work + # optional, default is fork + workers = prefork + + + # max threads to create for searching local parts of a distributed index + # optional, default is 0, which means disable multi-threaded searching + # should work with all MPMs (ie. does NOT require workers=threads) + # + dist_threads = 2 + + + # binlog files path; use empty string to disable binlog + # optional, default is build-time configured data directory + # + # binlog_path = # disable logging + # binlog_path = /var/lib/sphinxsearch/data # binlog.001 etc will be created there + + + # binlog flush/sync mode + # 0 means flush and sync every second + # 1 means flush and sync every transaction + # 2 means flush every transaction, sync every second + # optional, default is 2 + # + # binlog_flush = 2 + + + # binlog per-file size limit + # optional, default is 128M, 0 means no limit + # + # binlog_max_log_size = 256M + + + # per-thread stack size, only affects workers=threads mode + # optional, default is 64K + # + # thread_stack = 128K + + + # per-keyword expansion limit (for dict=keywords prefix searches) + # optional, default is 0 (no limit) + # + # expansion_limit = 1000 + + + # RT RAM chunks flush period + # optional, default is 0 (no periodic flush) + # + # rt_flush_period = 900 + + + # query log file format + # optional, known values are plain and sphinxql, default is plain + # + # query_log_format = sphinxql + + + # version string returned to MySQL network protocol clients + # optional, default is empty (use Sphinx version) + # + # mysql_version_string = 5.0.37 + + + # trusted plugin directory + # optional, default is empty (disable UDFs) + # + # plugin_dir = /usr/local/sphinx/lib + + + # default server-wide collation + # optional, default is libc_ci + # + # collation_server = utf8_general_ci + + + # server-wide locale for libc based collations + # optional, default is C + # + # collation_libc_locale = ru_RU.UTF-8 + + + # threaded server watchdog (only used in workers=threads mode) + # optional, values are 0 and 1, default is 1 (watchdog on) + # + # watchdog = 1 + + + # SphinxQL compatibility mode (legacy columns and their names) + # optional, default is 1 (old-style) + # + # compat_sphinxql_magics = 1 + + + # costs for max_predicted_time model, in (imaginary) nanoseconds + # optional, default is "doc=64, hit=48, skip=2048, match=64" + # + # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 + + + # current SphinxQL state (uservars etc) serialization path + # optional, default is none (do not serialize SphinxQL state) + # + # sphinxql_state = sphinxvars.sql + + + # maximum RT merge thread IO calls per second, and per-call IO size + # useful for throttling (the background) OPTIMIZE INDEX impact + # optional, default is 0 (unlimited) + # + # rt_merge_iops = 40 + # rt_merge_maxiosize = 1M + + + # interval between agent mirror pings, in milliseconds + # 0 means disable pings + # optional, default is 1000 + # + # ha_ping_interval = 0 + + + # agent mirror statistics window size, in seconds + # stats older than the window size (karma) are retired + # that is, they will not affect master choice of agents in any way + # optional, default is 60 seconds + # + # ha_period_karma = 60 + + + # delay between preforked children restarts on rotation, in milliseconds + # optional, default is 0 (no delay) + # + # prefork_rotation_throttle = 100 + + + # a prefix to prepend to the local file names when creating snippets + # with load_files and/or load_files_scatter options + # optional, default is empty + # + # snippets_file_prefix = /mnt/common/server1/ +} \ No newline at end of file From aa524fa52a9a40322e514d7d25b5e416eb941a47 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 13 Aug 2014 15:28:28 +0000 Subject: [PATCH 12/40] New configuration --- scripts/build/README | 3 +- scripts/build/config-dev/ent.conf | 49 +++-- scripts/build/config-dev/enttmp.conf | 34 +-- scripts/build/config-dev/sphinx.conf | 2 +- scripts/build/config-dev/stopwords-ent.txt | 67 ++++++ scripts/build/config-dev/wordforms-ent.txt | 1 - scripts/build/config-prod/dir.conf | 2 +- scripts/build/config-prod/dirtmp.conf | 2 +- scripts/build/config-prod/ent.conf | 84 ++++--- scripts/build/config-prod/enttmp.conf | 52 +++-- scripts/build/config-prod/stopwords-ent.txt | 67 ++++++ scripts/build/config-prod/wordforms-ent.txt | 1 - .../config/Odea/MySqlServer/ciblage.conf | 157 ------------- scripts/build/config/Odea/sphinx.conf | 79 ------- scripts/build/config/Sphinx/DOC | 90 -------- scripts/build/config/Sphinx/Dev/act.conf | 42 ---- scripts/build/config/Sphinx/Dev/ciblage.conf | 157 ------------- scripts/build/config/Sphinx/Dev/ciblage.txt | 110 ---------- scripts/build/config/Sphinx/Dev/dir.conf | 64 ------ scripts/build/config/Sphinx/Dev/dirtmp.conf | 63 ------ scripts/build/config/Sphinx/Dev/ent.conf | 74 ------- scripts/build/config/Sphinx/Dev/enttmp.conf | 74 ------- scripts/build/config/Sphinx/Dev/histo.conf | 24 -- .../build/config/Sphinx/MysqlServer/ent.conf | 73 ------ .../config/Sphinx/MysqlServer/enttmp.conf | 73 ------ scripts/build/config/Sphinx/sphinx.conf | 79 ------- scripts/build/config/Sphinx/wordforms-ent.txt | 207 ------------------ .../config/SphinxHisto/MysqlServer/act.conf | 41 ---- .../SphinxHisto/MysqlServer/dirtmp.conf | 44 ---- .../config/SphinxHisto/MysqlServer/histo.conf | 23 -- scripts/build/config/SphinxHisto/sphinx.conf | 77 ------- scripts/build/config/srvws02/sphinx.conf | 77 ------- scripts/build/config/srvws02/srvws02/dir.conf | 37 ---- .../ubuntu-14.04/etc/logrotate.d/indexer | 8 + .../ubuntu-14.04/etc/logrotate.d/searchd | 12 + 35 files changed, 269 insertions(+), 1780 deletions(-) create mode 100644 scripts/build/config-prod/stopwords-ent.txt delete mode 100644 scripts/build/config/Odea/MySqlServer/ciblage.conf delete mode 100644 scripts/build/config/Odea/sphinx.conf delete mode 100644 scripts/build/config/Sphinx/DOC delete mode 100644 scripts/build/config/Sphinx/Dev/act.conf delete mode 100644 scripts/build/config/Sphinx/Dev/ciblage.conf delete mode 100644 scripts/build/config/Sphinx/Dev/ciblage.txt delete mode 100644 scripts/build/config/Sphinx/Dev/dir.conf delete mode 100644 scripts/build/config/Sphinx/Dev/dirtmp.conf delete mode 100644 scripts/build/config/Sphinx/Dev/ent.conf delete mode 100644 scripts/build/config/Sphinx/Dev/enttmp.conf delete mode 100644 scripts/build/config/Sphinx/Dev/histo.conf delete mode 100644 scripts/build/config/Sphinx/MysqlServer/ent.conf delete mode 100644 scripts/build/config/Sphinx/MysqlServer/enttmp.conf delete mode 100644 scripts/build/config/Sphinx/sphinx.conf delete mode 100644 scripts/build/config/Sphinx/wordforms-ent.txt delete mode 100644 scripts/build/config/SphinxHisto/MysqlServer/act.conf delete mode 100644 scripts/build/config/SphinxHisto/MysqlServer/dirtmp.conf delete mode 100644 scripts/build/config/SphinxHisto/MysqlServer/histo.conf delete mode 100644 scripts/build/config/SphinxHisto/sphinx.conf delete mode 100644 scripts/build/config/srvws02/sphinx.conf delete mode 100644 scripts/build/config/srvws02/srvws02/dir.conf create mode 100644 scripts/build/ubuntu-14.04/etc/logrotate.d/indexer create mode 100644 scripts/build/ubuntu-14.04/etc/logrotate.d/searchd diff --git a/scripts/build/README b/scripts/build/README index 9ee4c35..5dd51f4 100644 --- a/scripts/build/README +++ b/scripts/build/README @@ -34,7 +34,8 @@ Utilisation des stopwords ========================= un, une, le, la, les, de, des, du, dans, l', d', @ - +Créer un fichier de stopwords +sudo /usr/local/sphinx/bin/indexer --config /etc/sphinxsearch/sphinx.conf --buildstops stopwords-ent.txt 100 ent diff --git a/scripts/build/config-dev/ent.conf b/scripts/build/config-dev/ent.conf index 7627279..42e8a84 100644 --- a/scripts/build/config-dev/ent.conf +++ b/scripts/build/config-dev/ent.conf @@ -21,34 +21,15 @@ source ent sql_attr_uint = adr_cp sql_attr_uint = adr_dep sql_attr_uint = cj - sql_attr_uint = sirenValide + sql_attr_bool = sirenValide sql_attr_uint = rang } index ent -{ +{ source = ent path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 -} - -index ent_phx -{ - source = ent - path = /dbs/sphinx/ent_phx + mlock = 1 docinfo = extern charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ @@ -61,8 +42,30 @@ index ent_phx U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + stopwords = /etc/sphinxsearch/stopwords-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt +} + +index ent_phx +{ + source = ent + path = /dbs/sphinx/ent_phx + mlock = 1 + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 morphology = libstemmer_fr min_stemming_len = 4 } diff --git a/scripts/build/config-dev/enttmp.conf b/scripts/build/config-dev/enttmp.conf index 2389645..4ea7acd 100644 --- a/scripts/build/config-dev/enttmp.conf +++ b/scripts/build/config-dev/enttmp.conf @@ -26,13 +26,14 @@ source ent } index ent -{ - source = ent - path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ +{ + source = ent + path = /dbs/sphinx/ent + mlock = 1 + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -41,19 +42,19 @@ index ent U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - + stopwords = /etc/sphinxsearch/stopwords-ent.txt wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 } index ent_phx { - source = ent - path = /dbs/sphinx/ent_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ + source = ent + path = /dbs/sphinx/ent_phx + mlock = 1 + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -62,8 +63,9 @@ index ent_phx U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 morphology = libstemmer_fr min_stemming_len = 4 } diff --git a/scripts/build/config-dev/sphinx.conf b/scripts/build/config-dev/sphinx.conf index b41e165..10c27ee 100644 --- a/scripts/build/config-dev/sphinx.conf +++ b/scripts/build/config-dev/sphinx.conf @@ -232,7 +232,7 @@ searchd # optional, default is 0, which means disable multi-threaded searching # should work with all MPMs (ie. does NOT require workers=threads) # - dist_threads = 2 + dist_threads = 1 # binlog files path; use empty string to disable binlog diff --git a/scripts/build/config-dev/stopwords-ent.txt b/scripts/build/config-dev/stopwords-ent.txt index e69de29..e319798 100644 --- a/scripts/build/config-dev/stopwords-ent.txt +++ b/scripts/build/config-dev/stopwords-ent.txt @@ -0,0 +1,67 @@ +de +la +du +r +des +le +av +d +les +l +jean +sci +sur +et +sarl +a +bd +pierre +societe +marie +pl +en +rte +france +c +s +m +michel +association +paul +louis +andre +claude +civile +francois +jacques +georges +ld +b +immobiliere +p +e +i +all +philippe +services +henri +bernard +charles +martin +chez +denis +alain +gaulle +mer +republique +ecole +seine +rene +joseph +robert +laurent +bat +f +marcel +au +grande diff --git a/scripts/build/config-dev/wordforms-ent.txt b/scripts/build/config-dev/wordforms-ent.txt index 929912d..e095e49 100644 --- a/scripts/build/config-dev/wordforms-ent.txt +++ b/scripts/build/config-dev/wordforms-ent.txt @@ -47,7 +47,6 @@ bastide > bstd baston > bast beguinage > begi berge > ber -bois > bois boite postal > bp boucle > bcle boulevard > bd diff --git a/scripts/build/config-prod/dir.conf b/scripts/build/config-prod/dir.conf index c390aa5..140b1b1 100644 --- a/scripts/build/config-prod/dir.conf +++ b/scripts/build/config-prod/dir.conf @@ -2,7 +2,7 @@ source dir { type = mysql - sql_host = 192.168.78.230 + sql_host = 192.168.3.30 sql_user = sphinx sql_pass = indexer sql_db = jo diff --git a/scripts/build/config-prod/dirtmp.conf b/scripts/build/config-prod/dirtmp.conf index 87c3399..64dc9bf 100644 --- a/scripts/build/config-prod/dirtmp.conf +++ b/scripts/build/config-prod/dirtmp.conf @@ -1,7 +1,7 @@ source dir { type = mysql - sql_host = 192.168.78.230 + sql_host = 192.168.3.30 sql_user = sphinx sql_pass = indexer sql_db = jo diff --git a/scripts/build/config-prod/ent.conf b/scripts/build/config-prod/ent.conf index fe84d10..505eac8 100644 --- a/scripts/build/config-prod/ent.conf +++ b/scripts/build/config-prod/ent.conf @@ -1,42 +1,39 @@ + source ent { - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", \ - raisonSociale, \ - enseigne, \ - sigle, \ - identite_pre\ - ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ - FROM etablissements; - - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_uint = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_uint = sirenValide - sql_attr_uint = rang + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, siren, nic, siege, \ + CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ + REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ + FROM etablissements; + sql_attr_uint = siren + sql_attr_uint = nic + sql_attr_uint = siege + sql_attr_uint = actif + sql_attr_uint = adr_num + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = cj + sql_attr_bool = sirenValide + sql_attr_uint = rang } index ent -{ - source = ent - path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ +{ + source = ent + path = /dbs/sphinx/ent + mlock = 1 + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -45,19 +42,19 @@ index ent U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - + stopwords = /etc/sphinxsearch/stopwords-ent.txt wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 } index ent_phx { - source = ent - path = /dbs/sphinx/ent_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ + source = ent + path = /dbs/sphinx/ent_phx + mlock = 1 + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -66,8 +63,9 @@ index ent_phx U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 morphology = libstemmer_fr min_stemming_len = 4 } diff --git a/scripts/build/config-prod/enttmp.conf b/scripts/build/config-prod/enttmp.conf index 103d122..2d73e9c 100644 --- a/scripts/build/config-prod/enttmp.conf +++ b/scripts/build/config-prod/enttmp.conf @@ -1,3 +1,4 @@ + source ent { type = mysql @@ -7,17 +8,12 @@ source ent sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", \ - raisonSociale, \ - enseigne, \ - sigle, \ - identite_pre\ - ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ - FROM etablissements_tmp; - - sql_attr_uint = siren + CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ + REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ + FROM etablissements_tmp; + sql_attr_uint = siren sql_attr_uint = nic sql_attr_uint = siege sql_attr_uint = actif @@ -30,13 +26,14 @@ source ent } index ent -{ - source = ent - path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ +{ + source = ent + path = /dbs/sphinx/ent + mlock = 1 + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -45,19 +42,19 @@ index ent U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - + stopwords = /etc/sphinxsearch/stopwords-ent.txt wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 } index ent_phx { - source = ent - path = /dbs/sphinx/ent_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ + source = ent + path = /dbs/sphinx/ent_phx + mlock = 1 + docinfo = extern + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -66,8 +63,9 @@ index ent_phx U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 morphology = libstemmer_fr min_stemming_len = 4 } diff --git a/scripts/build/config-prod/stopwords-ent.txt b/scripts/build/config-prod/stopwords-ent.txt new file mode 100644 index 0000000..e319798 --- /dev/null +++ b/scripts/build/config-prod/stopwords-ent.txt @@ -0,0 +1,67 @@ +de +la +du +r +des +le +av +d +les +l +jean +sci +sur +et +sarl +a +bd +pierre +societe +marie +pl +en +rte +france +c +s +m +michel +association +paul +louis +andre +claude +civile +francois +jacques +georges +ld +b +immobiliere +p +e +i +all +philippe +services +henri +bernard +charles +martin +chez +denis +alain +gaulle +mer +republique +ecole +seine +rene +joseph +robert +laurent +bat +f +marcel +au +grande diff --git a/scripts/build/config-prod/wordforms-ent.txt b/scripts/build/config-prod/wordforms-ent.txt index 929912d..e095e49 100644 --- a/scripts/build/config-prod/wordforms-ent.txt +++ b/scripts/build/config-prod/wordforms-ent.txt @@ -47,7 +47,6 @@ bastide > bstd baston > bast beguinage > begi berge > ber -bois > bois boite postal > bp boucle > bcle boulevard > bd diff --git a/scripts/build/config/Odea/MySqlServer/ciblage.conf b/scripts/build/config/Odea/MySqlServer/ciblage.conf deleted file mode 100644 index 4867f7e..0000000 --- a/scripts/build/config/Odea/MySqlServer/ciblage.conf +++ /dev/null @@ -1,157 +0,0 @@ - -source ciblage -{ - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, LPAD(siren, 9, '000000000') AS siren, LPAD(nic, 5, '00000') AS nic, \ - siege, \ - adr_cp, \ - REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - IF(tel>0,1,0) AS tel, \ - IF(fax>0,1,0) AS fax, \ - cj, \ - capital, \ - CONCAT('EX ', ape_etab) AS ape_etab, \ - CONCAT('EX ', ape_entrep) AS ape_entrep, \ - age_entrep, \ - age_etab, \ - tca, \ - tcaexp, \ - IF(teff_entrep IS NULL,99,teff_entrep) AS teff_entrep, \ - IF(teff_etab IS NULL,99,teff_etab) AS teff_etab, \ - IF(web='',0,1) AS web, \ - IF(mail='',0,1) AS mail, \ - IF(adrDom>0,1,0) AS adrDom, \ - actifEco, \ - presentRcs, \ - procolHisto, \ - tvaIntraValide, \ - dateCrea_etab,\ - dateCrea_ent, \ - dateImmat, \ - eff_entrep, \ - eff_etab, \ - IF(dirNom='',0,1) AS dirNom, \ - nbEtab, \ - IF(nbMPubli>0,1,0) AS nbMPubli, \ - IF(CAST(sirenGrp AS UNSIGNED)>100,1,0) AS sirenGrp, \ - nbActio, \ - IF(nbActio>0,1,0) AS actio, \ - nbPart, \ - IF(nbPart>0,1,0) AS part, \ - CASE bilType WHEN 'I' THEN 1 WHEN 'R' THEN 2 WHEN 'E' THEN 3 ELSE 0 END as bilType, \ - bilAnnee, \ - bilCloture, \ - bilDuree, \ - bilTca, \ - bilEE, \ - bilFL, \ - bilFK, \ - bilFR, \ - bilGF, \ - bilGP, \ - bilGU, \ - bilGW, \ - bilHD, \ - bilHH, \ - bilHL, \ - bilHM, \ - bilHN, \ - bilYP, \ - CAST(codeCommune AS UNSIGNED) AS codeCommune, \ - CASE zus WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zus, \ - CASE zfu WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zfu, \ - CASE zru WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as zru, \ - CASE cucs WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as cucs, \ - IF(zrr=1,1,0) as zrr, \ - IF(zafr=1,1,0) as zafr, \ - CASE avisCs WHEN 0 THEN 1 WHEN 10 THEN 2 WHEN 15 THEN 3 WHEN 23 THEN 4 WHEN 29 THEN 4 WHEN 39 THEN 4 WHEN 43 THEN 4 WHEN 21 THEN 5 WHEN 26 THEN 5 WHEN 28 THEN 5 WHEN 31 THEN 6 WHEN 50 THEN 6 WHEN 24 THEN 7 ELSE 0 END as avisCs \ - FROM etablissements_act WHERE siren>100; - - sql_field_string = siren - sql_field_string = nic - sql_attr_uint = siege - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = tel - sql_attr_uint = fax - sql_attr_uint = cj - sql_attr_uint = capital - sql_attr_uint = age_entrep - sql_attr_uint = age_etab - sql_attr_uint = tca - sql_attr_uint = tcaexp - sql_attr_uint = teff_entrep - sql_attr_uint = teff_etab - sql_attr_uint = web - sql_attr_uint = mail - sql_attr_uint = adrDom - sql_attr_uint = actifEco - sql_attr_uint = presentRcs - sql_attr_uint = procolHisto - sql_attr_uint = tvaIntraValide - sql_attr_uint = dateCrea_etab - sql_attr_uint = dateCrea_ent - sql_attr_uint = dateImmat - sql_attr_uint = eff_entrep - sql_attr_uint = eff_etab - sql_attr_uint = dirNom - sql_attr_uint = nbEtab - sql_attr_uint = nbMPubli - sql_attr_uint = sirenGrp - sql_attr_uint = nbActio - sql_attr_uint = actio - sql_attr_uint = nbPart - sql_attr_uint = part - sql_attr_uint = bilType - sql_attr_uint = bilAnnee - sql_attr_uint = bilCloture - sql_attr_uint = bilDuree - sql_attr_uint = bilTca - sql_attr_uint = bilEE - sql_attr_uint = bilFL - sql_attr_uint = bilFK - sql_attr_uint = bilFR - sql_attr_uint = bilGF - sql_attr_uint = bilGP - sql_attr_uint = bilGU - sql_attr_uint = bilGW - sql_attr_uint = bilHD - sql_attr_uint = bilHH - sql_attr_uint = bilHL - sql_attr_uint = bilHM - sql_attr_uint = bilHN - sql_attr_uint = bilYP - sql_attr_uint = codeCommune - sql_attr_uint = zus - sql_attr_uint = zru - sql_attr_uint = zfu - sql_attr_uint = cucs - sql_attr_uint = zrr - sql_attr_uint = zafr - sql_attr_uint = avisCs -} - -index ciblage -{ - source = ciblage - path = /dbs/sphinx/ciblage - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} diff --git a/scripts/build/config/Odea/sphinx.conf b/scripts/build/config/Odea/sphinx.conf deleted file mode 100644 index eb7f627..0000000 --- a/scripts/build/config/Odea/sphinx.conf +++ /dev/null @@ -1,79 +0,0 @@ - -############################################################################# -## indexer settings -############################################################################# - -indexer -{ - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 256M - - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iops = 40 - - - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 -} - -############################################################################# -## searchd settings -############################################################################# - -searchd -{ - # IP address to bind on - # optional, default is 0.0.0.0 (ie. listen on all interfaces) - # - # address = 127.0.0.1 - # address = 192.168.0.1 - - - # searchd TCP port number - # mandatory, default is 3312 - listen = 3312 - - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log - - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log - - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 - - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 - - # PID file, searchd process ID file name - # mandatory - pid_file = /var/log/searchd.pid - - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 - - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 - - # whether to forcibly preopen all indexes on startup - # optional, default is 0 (do not preopen) - preopen_indexes = 1 - - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 - - compat_sphinxql_magics=0 -} diff --git a/scripts/build/config/Sphinx/DOC b/scripts/build/config/Sphinx/DOC deleted file mode 100644 index 9ee4c35..0000000 --- a/scripts/build/config/Sphinx/DOC +++ /dev/null @@ -1,90 +0,0 @@ -La base de données doit communiqué en UTF-8 - -sql_query_pre = SET NAMES utf8 -charset_type = utf-8 - -# 'utf-8' defaults for English and Russian -charset_table = 0..9, A..Z->a..z, _, a..z, \ - U+410..U+42F->U+430..U+44F, U+430..U+44F - -# For french -charset_table = 0..9, A..Z->a..z, a..z, \ - U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, \ - U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, \ - U+0152->U+0153, U+0153 \ - -Morphology -========== -# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', -# 'soundex', and 'metaphone'; additional preprocessors available from -# libstemmer are 'libstemmer_XXX', where XXX is algorithm code -# (see libstemmer_c/libstemmer/modules.txt) - -morphology = libstemmer_french - -Taille minimum des mots ou l'on applique la morphology -min_stemming_len = 4 - - -According to libstimmer.c/libstimmer/modules.txt, the french module can be refered to by either - french, fr, fre, fra... french UTF_8,ISO_8859_1 french,fr,fre,fra - - -Utilisation des stopwords -========================= -un, une, le, la, les, de, des, du, dans, l', d', @ - - - - - -Utilisation des wordforms -========================= -Appliquer après les règles de charset_table -Stemming n'est pas appliquer à ces mots , ils deviennent des exceptions - -Les mots définis sont utilisé pour normalisé les mots durant l'indexation et la recherche, -il est donc nécessaire de réindexé et redémarrer la recherche pour appliquer les changements - -Divers ------- - -& > ET - -Les chiffres ------------- - -un > 1 -deux > 2 -trois > 3 -quatre > 4 -cinq > 5 -six > 6 -sept > 7 -huit > 8 -neuf > 9 -dix > 10 -onze > 11 -douze > 12 -treize > 13 -quatorze > 14 -quinze > 15 -seize > 16 -vingt > 20 -vingts > 20 -trente > 30 -quarante > 40 -cinquante > 50 -soixante > 60 -quatrevingt > 80 -cent > 100 -cents > 100 -mille > 1000 - -Les chiffres romain -------------------- - - -Les éléments de voies et abbréviations --------------------------------------- -Voir le wordforms diff --git a/scripts/build/config/Sphinx/Dev/act.conf b/scripts/build/config/Sphinx/Dev/act.conf deleted file mode 100644 index 0ba5e99..0000000 --- a/scripts/build/config/Sphinx/Dev/act.conf +++ /dev/null @@ -1,42 +0,0 @@ - -source act -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - - sql_query = \ - SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, p.libPays AS pays, l.PDetention \ - FROM liens2 l, liensRef r, tabPays p \ - WHERE l.dateSuppr = '0000-00-00 00:00:00' \ - AND r.id = l.idAct \ - AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ - AND p.codPays3 = r.adresse_pays; - - sql_attr_uint = idAct - sql_attr_uint = actif - sql_attr_float= PDetention - sql_attr_string = pays -} - -index act -{ - source = act - path = /dbs/sphinx/act - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} diff --git a/scripts/build/config/Sphinx/Dev/ciblage.conf b/scripts/build/config/Sphinx/Dev/ciblage.conf deleted file mode 100644 index 088bbd8..0000000 --- a/scripts/build/config/Sphinx/Dev/ciblage.conf +++ /dev/null @@ -1,157 +0,0 @@ - -source ciblage -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, LPAD(siren, 9, '000000000') AS siren, LPAD(nic, 5, '00000') AS nic, \ - siege, \ - adr_cp, \ - REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - IF(tel>0,1,0) AS tel, \ - IF(fax>0,1,0) AS fax, \ - cj, \ - capital, \ - CONCAT('EX ', ape_etab) AS ape_etab, \ - CONCAT('EX ', ape_entrep) AS ape_entrep, \ - age_entrep, \ - age_etab, \ - tca, \ - tcaexp, \ - IF(teff_entrep IS NULL,99,teff_entrep) AS teff_entrep, \ - IF(teff_etab IS NULL,99,teff_etab) AS teff_etab, \ - IF(web='',0,1) AS web, \ - IF(mail='',0,1) AS mail, \ - IF(adrDom>0,1,0) AS adrDom, \ - actifEco, \ - presentRcs, \ - procolHisto, \ - tvaIntraValide, \ - dateCrea_etab,\ - dateCrea_ent, \ - dateImmat, \ - eff_entrep, \ - eff_etab, \ - IF(dirNom='',0,1) AS dirNom, \ - nbEtab, \ - IF(nbMPubli>0,1,0) AS nbMPubli, \ - IF(CAST(sirenGrp AS UNSIGNED)>100,1,0) AS sirenGrp, \ - nbActio, \ - IF(nbActio>0,1,0) AS actio, \ - nbPart, \ - IF(nbPart>0,1,0) AS part, \ - CASE bilType WHEN 'I' THEN 1 WHEN 'R' THEN 2 WHEN 'E' THEN 3 ELSE 0 END as bilType, \ - bilAnnee, \ - bilCloture, \ - bilDuree, \ - bilTca, \ - bilEE, \ - bilFL, \ - bilFK, \ - bilFR, \ - bilGF, \ - bilGP, \ - bilGU, \ - bilGW, \ - bilHD, \ - bilHH, \ - bilHL, \ - bilHM, \ - bilHN, \ - bilYP, \ - CAST(codeCommune AS UNSIGNED) AS codeCommune, \ - CASE zus WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zus, \ - CASE zfu WHEN '' THEN 0 WHEN 'HORSZONE' THEN 0 WHEN 'NSP' THEN 0 WHEN 'X' THEN 0 ELSE 1 END as zfu, \ - CASE zru WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as zru, \ - CASE cucs WHEN '' THEN 0 WHEN 'NSP' THEN 0 ELSE 1 END as cucs, \ - IF(zrr=1,1,0) as zrr, \ - IF(zafr=1,1,0) as zafr, \ - CASE avisCs WHEN 0 THEN 1 WHEN 10 THEN 2 WHEN 15 THEN 3 WHEN 23 THEN 4 WHEN 29 THEN 4 WHEN 39 THEN 4 WHEN 43 THEN 4 WHEN 21 THEN 5 WHEN 26 THEN 5 WHEN 28 THEN 5 WHEN 31 THEN 6 WHEN 50 THEN 6 WHEN 24 THEN 7 ELSE 0 END as avisCs \ - FROM etablissements_act WHERE siren>100; - - sql_field_string = siren - sql_field_string = nic - sql_attr_uint = siege - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = tel - sql_attr_uint = fax - sql_attr_uint = cj - sql_attr_uint = capital - sql_attr_uint = age_entrep - sql_attr_uint = age_etab - sql_attr_uint = tca - sql_attr_uint = tcaexp - sql_attr_uint = teff_entrep - sql_attr_uint = teff_etab - sql_attr_uint = web - sql_attr_uint = mail - sql_attr_uint = adrDom - sql_attr_uint = actifEco - sql_attr_uint = presentRcs - sql_attr_uint = procolHisto - sql_attr_uint = tvaIntraValide - sql_attr_uint = dateCrea_etab - sql_attr_uint = dateCrea_ent - sql_attr_uint = dateImmat - sql_attr_uint = eff_entrep - sql_attr_uint = eff_etab - sql_attr_uint = dirNom - sql_attr_uint = nbEtab - sql_attr_uint = nbMPubli - sql_attr_uint = sirenGrp - sql_attr_uint = nbActio - sql_attr_uint = actio - sql_attr_uint = nbPart - sql_attr_uint = part - sql_attr_uint = bilType - sql_attr_uint = bilAnnee - sql_attr_uint = bilCloture - sql_attr_uint = bilDuree - sql_attr_uint = bilTca - sql_attr_uint = bilEE - sql_attr_uint = bilFL - sql_attr_uint = bilFK - sql_attr_uint = bilFR - sql_attr_uint = bilGF - sql_attr_uint = bilGP - sql_attr_uint = bilGU - sql_attr_uint = bilGW - sql_attr_uint = bilHD - sql_attr_uint = bilHH - sql_attr_uint = bilHL - sql_attr_uint = bilHM - sql_attr_uint = bilHN - sql_attr_uint = bilYP - sql_attr_uint = codeCommune - sql_attr_uint = zus - sql_attr_uint = zru - sql_attr_uint = zfu - sql_attr_uint = cucs - sql_attr_uint = zrr - sql_attr_uint = zafr - sql_attr_uint = avisCs -} - -index ciblage -{ - source = ciblage - path = /dbs/sphinx/ciblage - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} diff --git a/scripts/build/config/Sphinx/Dev/ciblage.txt b/scripts/build/config/Sphinx/Dev/ciblage.txt deleted file mode 100644 index 5cede41..0000000 --- a/scripts/build/config/Sphinx/Dev/ciblage.txt +++ /dev/null @@ -1,110 +0,0 @@ -############################################################################# -# Documentation -# ============= -# -# Présence d'éléments -# IF(element=='',0,1) AS pElement -# -# Liste des champs dans la BDD -# ----------------------------- -# id -# source -# source_id -# triCode -# autre_id -# siren => LPAD(siren, 9, '000000000') -# nic => LPAD(nic, 5, '00000') -# actif => 0 ou 1 -# siege => 0 ou 1 -# raisonSociale -# enseigne -# sigle -# identite_pre -# marques => IF(marques=='',0,1) AS pMarques -# adr_num -# adr_btq -# adr_typeVoie -# adr_libVoie -# adr_comp -# adr_cp => Code postal -# adr_ville => Ville -# adr_dep => Departement (Corse 2A,2B => 201, 202) -# adr_com -# tel => IF(tel>0,1,0) AS pTel -# fax => IF(fax>0,1,0) AS pFax -# cj => Texte -# capital => Float -# capitalDev -# capitalSrc -# ape_etab => Texte => Code APE de l'etablissement -# ape_entrep => Texte => Code APE de l'entreprise -# age_entrep => Entier => Age de l'entreprise -# age_etab => Entier => Age de l'etablissement -# tca => Entier => Tranche de chiffre d'affaire -# tcaexp => Entier => Tranche de chiffre d'affaire à l'export -# teff_entrep => Tranche Effectif de l'entreprise -# teff_etab => Tranche Effectif de l'etablissement -# rang -# web => IF(web=='',0,1) AS pWeb -# mail => IF(mail=='',0,1) AS pMail -# adrDom => 0,1,2 -# lieuAct -# actifEco => 0,1 -# presentRcs => 0,1 -# procolHisto => -# tvaIntraCle -# tvaIntraValide => 0,1 -# ape4_etab -# ape4_entrep -# NaceEtab -# NaceEntrep -# dateCrea_etab => Date -# dateCrea_ent => Date -# dateImmat => Date -# eff_entrep => Entier => Effectif de l'entreprise -# eff_etab => Entier => Effectir de l'etablissement -# distSP -# achPost -# rivoli -# dirCiv -# dirNom => IF(pDirNom=='',0,1) AS pDirNom -# dirPrenom -# dirDateNaiss -# dirFct -# nbEtab => Nombre d'établissement -# nbMPubli -# sirenGrp => IF(pSirenGrp>0,1,0) AS pSirenGrp -# nbActio => Entier => Nombre d'actionnaires (actio,bool) -# nbPart => Entier => Nombre de participations (part, bool) -# bilType => -# bilAnnee => -# bilCloture => -# bilDuree => -# bilTca => -# bilEE => -# bilFL => -# bilFK => -# bilFR => -# bilGF => -# bilGP => -# bilGU => -# bilGW => -# bilHD => -# bilHH => -# bilHL => -# bilHM => -# bilHN => -# bilYP => -# avisCs -# codeCommune => -# l93_x -# l93_y -# alt -# precis -# zus => -# zru => -# zfu => -# cucs => -# zrr => -# zafr => -############################################################################# \ No newline at end of file diff --git a/scripts/build/config/Sphinx/Dev/dir.conf b/scripts/build/config/Sphinx/Dev/dir.conf deleted file mode 100644 index c390aa5..0000000 --- a/scripts/build/config/Sphinx/Dev/dir.conf +++ /dev/null @@ -1,64 +0,0 @@ - -source dir -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ - FROM dirigeants; - sql_attr_string = civilite - sql_attr_string = typeDir - sql_attr_string = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep -} - -index dir -{ - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index dir_phx -{ - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - min_stemming_len = 4 -} \ No newline at end of file diff --git a/scripts/build/config/Sphinx/Dev/dirtmp.conf b/scripts/build/config/Sphinx/Dev/dirtmp.conf deleted file mode 100644 index 87c3399..0000000 --- a/scripts/build/config/Sphinx/Dev/dirtmp.conf +++ /dev/null @@ -1,63 +0,0 @@ -source dir -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ - FROM dirigeants_tmp; - sql_attr_string = civilite - sql_attr_string = typeDir - sql_attr_string = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep -} - -index dir -{ - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index dir_phx -{ - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - min_stemming_len = 4 -} \ No newline at end of file diff --git a/scripts/build/config/Sphinx/Dev/ent.conf b/scripts/build/config/Sphinx/Dev/ent.conf deleted file mode 100644 index 58b4b9b..0000000 --- a/scripts/build/config/Sphinx/Dev/ent.conf +++ /dev/null @@ -1,74 +0,0 @@ - -source ent -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", \ - raisonSociale, \ - enseigne, \ - sigle, \ - identite_pre\ - ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ - FROM etablissements; - - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_uint = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_uint = sirenValide - sql_attr_uint = rang -} - -index ent -{ - source = ent - path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - - wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 -} - -index ent_phx -{ - source = ent - path = /dbs/sphinx/ent_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 - morphology = libstemmer_fr - min_stemming_len = 4 -} diff --git a/scripts/build/config/Sphinx/Dev/enttmp.conf b/scripts/build/config/Sphinx/Dev/enttmp.conf deleted file mode 100644 index 6f7ed4a..0000000 --- a/scripts/build/config/Sphinx/Dev/enttmp.conf +++ /dev/null @@ -1,74 +0,0 @@ - -source ent -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", \ - raisonSociale, \ - enseigne, \ - sigle, \ - identite_pre\ - ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ - FROM etablissements_tmp; - - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_uint = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_uint = sirenValide - sql_attr_uint = rang -} - -index ent -{ - source = ent - path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - - wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 -} - -index ent_phx -{ - source = ent - path = /dbs/sphinx/ent_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 - morphology = libstemmer_fr - min_stemming_len = 4 -} diff --git a/scripts/build/config/Sphinx/Dev/histo.conf b/scripts/build/config/Sphinx/Dev/histo.conf deleted file mode 100644 index 35bdff2..0000000 --- a/scripts/build/config/Sphinx/Dev/histo.conf +++ /dev/null @@ -1,24 +0,0 @@ - -source histo -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = histobodacc - sql_port = 3306 # optional, default is 3306 - sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; - sql_attr_uint = annee1 - sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id -} - -index histo -{ - source = histo - path = /dbs/sphinx/histo - docinfo = extern - morphology = none - min_word_len = 2 - charset_type = sbcs - html_strip = 1 -} \ No newline at end of file diff --git a/scripts/build/config/Sphinx/MysqlServer/ent.conf b/scripts/build/config/Sphinx/MysqlServer/ent.conf deleted file mode 100644 index fe84d10..0000000 --- a/scripts/build/config/Sphinx/MysqlServer/ent.conf +++ /dev/null @@ -1,73 +0,0 @@ -source ent -{ - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", \ - raisonSociale, \ - enseigne, \ - sigle, \ - identite_pre\ - ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ - FROM etablissements; - - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_uint = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_uint = sirenValide - sql_attr_uint = rang -} - -index ent -{ - source = ent - path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - - wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 -} - -index ent_phx -{ - source = ent - path = /dbs/sphinx/ent_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 - morphology = libstemmer_fr - min_stemming_len = 4 -} diff --git a/scripts/build/config/Sphinx/MysqlServer/enttmp.conf b/scripts/build/config/Sphinx/MysqlServer/enttmp.conf deleted file mode 100644 index 103d122..0000000 --- a/scripts/build/config/Sphinx/MysqlServer/enttmp.conf +++ /dev/null @@ -1,73 +0,0 @@ -source ent -{ - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", \ - raisonSociale, \ - enseigne, \ - sigle, \ - identite_pre\ - ) AS nom, REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, rang \ - FROM etablissements_tmp; - - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_uint = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_uint = sirenValide - sql_attr_uint = rang -} - -index ent -{ - source = ent - path = /dbs/sphinx/ent - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - - wordforms = /etc/sphinxsearch/wordforms-ent.txt - enable_star = 1 -} - -index ent_phx -{ - source = ent - path = /dbs/sphinx/ent_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - enable_star = 1 - morphology = libstemmer_fr - min_stemming_len = 4 -} diff --git a/scripts/build/config/Sphinx/sphinx.conf b/scripts/build/config/Sphinx/sphinx.conf deleted file mode 100644 index eb7f627..0000000 --- a/scripts/build/config/Sphinx/sphinx.conf +++ /dev/null @@ -1,79 +0,0 @@ - -############################################################################# -## indexer settings -############################################################################# - -indexer -{ - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 256M - - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iops = 40 - - - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 -} - -############################################################################# -## searchd settings -############################################################################# - -searchd -{ - # IP address to bind on - # optional, default is 0.0.0.0 (ie. listen on all interfaces) - # - # address = 127.0.0.1 - # address = 192.168.0.1 - - - # searchd TCP port number - # mandatory, default is 3312 - listen = 3312 - - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log - - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log - - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 - - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 - - # PID file, searchd process ID file name - # mandatory - pid_file = /var/log/searchd.pid - - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 - - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 - - # whether to forcibly preopen all indexes on startup - # optional, default is 0 (do not preopen) - preopen_indexes = 1 - - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 - - compat_sphinxql_magics=0 -} diff --git a/scripts/build/config/Sphinx/wordforms-ent.txt b/scripts/build/config/Sphinx/wordforms-ent.txt deleted file mode 100644 index 929912d..0000000 --- a/scripts/build/config/Sphinx/wordforms-ent.txt +++ /dev/null @@ -1,207 +0,0 @@ -& > et -un > 1 -deux > 2 -trois > 3 -quatre > 4 -cinq > 5 -six > 6 -sept > 7 -huit > 8 -neuf > 9 -dix > 10 -onze > 11 -douze > 12 -treize > 13 -quatorze > 14 -quinze > 15 -seize > 16 -vingt > 20 -vingts > 20 -trente > 30 -quarante > 40 -cinquante > 50 -soixante > 60 -quatrevingt > 80 -cent > 100 -cents > 100 -mille > 1000 -zac > zone -zad > zone -za > zone -zi > zone -zup > zone -general > gal -abbaye > abe -agglomeration > agl -aglo > agl -allee > all -ancien > ach -ancienne > art -anse > anse -arcade > arc -autoroute > aut -avenue > av -barriere > bre -bas > bch -bastide > bstd -baston > bast -beguinage > begi -berge > ber -bois > bois -boite postal > bp -boucle > bcle -boulevard > bd -bourg > brg -butte > but -campagne > cgne -camping > cpg -carre > carr -carreau > cau -carrefour > car -carriere > care -castel > cst -cavee > cav -central > ctre -centre > ctre -chalet > chl -chapelle > chp -charmille > chi -chateau > cht -chaussee > chs -che > chemin -chv > chemin -cheminement > chem -cloitre > cloi -colline > coli -contour > ctr -corniche > cor -cottage > cott -cours > crs -darse > dars -degre > deg -dsg > descente -dsc > descente -digue > dig -domaine > dom -docteur > dr -ecart > eca -ecluse > ecl -eglise > egl -enceinte > en -enclave > env -enclos > enc -escalier > esc -espace > espa -esplanade > esp -etang > eting -faubourg > fg -ferme > frm -fontaine > fon -fort > fort -forum > form -fosse > fos -foyer > foyr -galerie > gal -garenne > garn -grand > gbd -gden > grande -gr > grande -grille > gri -grimpette > grim -groupe > gpe -groupement > gpt -halle > hle -hameau > ham -haut > hch -hippodrome > hip -immeuble > imm -impasse > imp -jardin > jard -jetee > jte -levee > leve -lieu > ld -lieudit > ld -lotissement > lot -maison > mf -manoir > man -marche > mar -metro > met -montee > mte -moulin > mln -musee > mus -nouvelle > nte -palais > pal -parc > parc -parking > pkg -parvis > prv -pas > passage -pn > passage -passe > pass -passerelle > ple -patio > pat -pavillon > pav -peripherique > peri -peristyle > psty -petites > pta -pae > petite -pim > petite -prt > petite -ptr > petite -place > pl -placis > plci -plage > plag -plaine > pln -plan > plan -plateau > plt -pointe > pnt -porche > pch -porte > pte -portique > porq -poterne > pot -pourtour > pour -presqu ile > prq -promenade > prom -quai > qu -quartier > qua -raccourci > rac -raidillon > raid -rampe > rpe -rempart > rem -residence > res -rocade > roc -rond > rpt -roquet > roqt -rotonde > rtd -route > rte -rue > r -ruelle > rle -sente > sen -sentier > sen -square > sq -st > saint -ste > saint -sainte > saint -stade > stde -station > sta -terre > tpl -terrain > trn -terrasse > tsse -tertre > trt -traverse > tra -vallon > val -vallee > val -venelle > ven -vieille > vte -villa > vla -vge > village -vlge > village -voie > voi -centre cial > ccal -centre com > ccal -centre comm > ccal -centre commercial > ccal -ville > mairie -commune > mairie -conseil regional > region -conseil general > departement -companie > cie diff --git a/scripts/build/config/SphinxHisto/MysqlServer/act.conf b/scripts/build/config/SphinxHisto/MysqlServer/act.conf deleted file mode 100644 index 6613a28..0000000 --- a/scripts/build/config/SphinxHisto/MysqlServer/act.conf +++ /dev/null @@ -1,41 +0,0 @@ - -source act -{ - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, p.libPays AS pays, l.PDetention \ - FROM liens2 l, liensRef r, tabPays p \ - WHERE l.dateSuppr = '0000-00-00 00:00:00' \ - AND r.id = l.idAct \ - AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ - AND p.codPays3 = r.adresse_pays; - - sql_attr_uint = idAct - sql_attr_uint = actif - sql_attr_float= PDetention - sql_attr_string = pays -} - -index act -{ - source = act - path = /dbs/sphinx/act - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} diff --git a/scripts/build/config/SphinxHisto/MysqlServer/dirtmp.conf b/scripts/build/config/SphinxHisto/MysqlServer/dirtmp.conf deleted file mode 100644 index cf94333..0000000 --- a/scripts/build/config/SphinxHisto/MysqlServer/dirtmp.conf +++ /dev/null @@ -1,44 +0,0 @@ -source dir -{ - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ - FROM dirigeants_tmp; - sql_attr_string = civilite - sql_attr_string = typeDir - sql_attr_string = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep -} - -index dir -{ - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} \ No newline at end of file diff --git a/scripts/build/config/SphinxHisto/MysqlServer/histo.conf b/scripts/build/config/SphinxHisto/MysqlServer/histo.conf deleted file mode 100644 index 407bafe..0000000 --- a/scripts/build/config/SphinxHisto/MysqlServer/histo.conf +++ /dev/null @@ -1,23 +0,0 @@ -source histo -{ - type = mysql - sql_host = 192.168.3.24 - sql_user = sphinx_histo - sql_pass = sphinx - sql_db = histobodacc - sql_port = 3306 # optional, default is 3306 - sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; - sql_attr_uint = annee1 - sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id -} - -index histo -{ - source = histo - path = /home/sphinx/histo - docinfo = extern - morphology = none - min_word_len = 1 - charset_type = sbcs - html_strip = 1 -} diff --git a/scripts/build/config/SphinxHisto/sphinx.conf b/scripts/build/config/SphinxHisto/sphinx.conf deleted file mode 100644 index 36fd0cf..0000000 --- a/scripts/build/config/SphinxHisto/sphinx.conf +++ /dev/null @@ -1,77 +0,0 @@ - -############################################################################# -## indexer settings -############################################################################# - -indexer -{ - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 2047M - - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iops = 40 - - - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 -} - -############################################################################# -## searchd settings -############################################################################# - -searchd -{ - # IP address to bind on - # optional, default is 0.0.0.0 (ie. listen on all interfaces) - # - # address = 127.0.0.1 - # address = 192.168.0.1 - - - # searchd TCP port number - # mandatory, default is 3312 - listen = 3312 - - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log - - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log - - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 - - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 - - # PID file, searchd process ID file name - # mandatory - pid_file = /var/log/searchd.pid - - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 - - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 - - # whether to forcibly preopen all indexes on startup - # optional, default is 0 (do not preopen) - preopen_indexes = 0 - - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 -} diff --git a/scripts/build/config/srvws02/sphinx.conf b/scripts/build/config/srvws02/sphinx.conf deleted file mode 100644 index b518273..0000000 --- a/scripts/build/config/srvws02/sphinx.conf +++ /dev/null @@ -1,77 +0,0 @@ - -############################################################################# -## indexer settings -############################################################################# - -indexer -{ - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 1024M - - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - max_iops = 40 - - - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 -} - -############################################################################# -## searchd settings -############################################################################# - -searchd -{ - # IP address to bind on - # optional, default is 0.0.0.0 (ie. listen on all interfaces) - # - # address = 127.0.0.1 - # address = 192.168.0.1 - - - # searchd TCP port number - # mandatory, default is 3312 - listen = 9312 - - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log - - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log - - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 - - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 - - # PID file, searchd process ID file name - # mandatory - pid_file = /var/log/searchd.pid - - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 - - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 - - # whether to forcibly preopen all indexes on startup - # optional, default is 0 (do not preopen) - preopen_indexes = 0 - - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 -} diff --git a/scripts/build/config/srvws02/srvws02/dir.conf b/scripts/build/config/srvws02/srvws02/dir.conf deleted file mode 100644 index 129a5aa..0000000 --- a/scripts/build/config/srvws02/srvws02/dir.conf +++ /dev/null @@ -1,37 +0,0 @@ -source dir -{ - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ - FROM dirigeants; - sql_attr_str2ordinal = civilite - sql_attr_str2ordinal = typeDir - sql_attr_str2ordinal = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep -} - -index dir -{ - source = source_dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = sbcs - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0..U+DE->U+E0..U+FE, U+DF, U+E0..U+FF -} diff --git a/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer b/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer new file mode 100644 index 0000000..222a5dc --- /dev/null +++ b/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer @@ -0,0 +1,8 @@ +/dbs/sphinxlog/indexer.log { + missingok + notifempty + weekly + rotate 5 + compress + delaycompress +} \ No newline at end of file diff --git a/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd b/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd new file mode 100644 index 0000000..bbe7a31 --- /dev/null +++ b/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd @@ -0,0 +1,12 @@ +/dbs/sphinxlog/query.log /dbs/sphinxlog/searchd.log { + missingok + notifempty + weekly + rotate 5 + compress + delaycompress + create 640 root root + postrotate + killall -SIGUSR1 searchd + endscript +} \ No newline at end of file From c6a433fa1f7d6edaca476f64d2d1f975377ce6f5 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 13 Aug 2014 15:32:41 +0000 Subject: [PATCH 13/40] Move definition config --- scripts/build/config-dev/dir.conf | 2 +- scripts/build/config-dev/dirtmp.conf | 4 ++-- scripts/build/config-dev/ent.conf | 18 +++++++-------- scripts/build/config-dev/enttmp.conf | 18 +++++++-------- scripts/build/config-dev/histo.conf | 32 +++++++++++++-------------- scripts/build/config-prod/ent.conf | 18 +++++++-------- scripts/build/config-prod/enttmp.conf | 18 +++++++-------- 7 files changed, 55 insertions(+), 55 deletions(-) diff --git a/scripts/build/config-dev/dir.conf b/scripts/build/config-dev/dir.conf index c390aa5..60fde69 100644 --- a/scripts/build/config-dev/dir.conf +++ b/scripts/build/config-dev/dir.conf @@ -49,6 +49,7 @@ index dir_phx source = dir path = /dbs/sphinx/dir_phx docinfo = extern + min_stemming_len = 4 charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -60,5 +61,4 @@ index dir_phx U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - min_stemming_len = 4 } \ No newline at end of file diff --git a/scripts/build/config-dev/dirtmp.conf b/scripts/build/config-dev/dirtmp.conf index 87c3399..1efecc4 100644 --- a/scripts/build/config-dev/dirtmp.conf +++ b/scripts/build/config-dev/dirtmp.conf @@ -48,6 +48,7 @@ index dir_phx source = dir path = /dbs/sphinx/dir_phx docinfo = extern + min_stemming_len = 4 charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -58,6 +59,5 @@ index dir_phx U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - min_stemming_len = 4 + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } \ No newline at end of file diff --git a/scripts/build/config-dev/ent.conf b/scripts/build/config-dev/ent.conf index 42e8a84..072aaa8 100644 --- a/scripts/build/config-dev/ent.conf +++ b/scripts/build/config-dev/ent.conf @@ -31,6 +31,8 @@ index ent path = /dbs/sphinx/ent mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -41,9 +43,7 @@ index ent U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - wordforms = /etc/sphinxsearch/wordforms-ent.txt + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } index ent_phx @@ -52,6 +52,11 @@ index ent_phx path = /dbs/sphinx/ent_phx mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + morphology = libstemmer_fr + min_stemming_len = 4 charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -62,10 +67,5 @@ index ent_phx U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - morphology = libstemmer_fr - min_stemming_len = 4 + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } diff --git a/scripts/build/config-dev/enttmp.conf b/scripts/build/config-dev/enttmp.conf index 4ea7acd..f933ea7 100644 --- a/scripts/build/config-dev/enttmp.conf +++ b/scripts/build/config-dev/enttmp.conf @@ -31,6 +31,8 @@ index ent path = /dbs/sphinx/ent mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -41,9 +43,7 @@ index ent U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - wordforms = /etc/sphinxsearch/wordforms-ent.txt + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } index ent_phx @@ -52,6 +52,11 @@ index ent_phx path = /dbs/sphinx/ent_phx mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + morphology = libstemmer_fr + min_stemming_len = 4 charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -62,10 +67,5 @@ index ent_phx U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - morphology = libstemmer_fr - min_stemming_len = 4 + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } diff --git a/scripts/build/config-dev/histo.conf b/scripts/build/config-dev/histo.conf index 35bdff2..4744333 100644 --- a/scripts/build/config-dev/histo.conf +++ b/scripts/build/config-dev/histo.conf @@ -1,24 +1,24 @@ source histo { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = histobodacc - sql_port = 3306 # optional, default is 3306 - sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; - sql_attr_uint = annee1 - sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = histobodacc + sql_port = 3306 # optional, default is 3306 + sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; + sql_attr_uint = annee1 + sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id } index histo { - source = histo - path = /dbs/sphinx/histo - docinfo = extern - morphology = none - min_word_len = 2 - charset_type = sbcs - html_strip = 1 + source = histo + path = /dbs/sphinx/histo + docinfo = extern + morphology = none + min_word_len = 2 + charset_type = sbcs + html_strip = 1 } \ No newline at end of file diff --git a/scripts/build/config-prod/ent.conf b/scripts/build/config-prod/ent.conf index 505eac8..c01093c 100644 --- a/scripts/build/config-prod/ent.conf +++ b/scripts/build/config-prod/ent.conf @@ -31,6 +31,8 @@ index ent path = /dbs/sphinx/ent mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -41,9 +43,7 @@ index ent U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - wordforms = /etc/sphinxsearch/wordforms-ent.txt + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } index ent_phx @@ -52,6 +52,11 @@ index ent_phx path = /dbs/sphinx/ent_phx mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + morphology = libstemmer_fr + min_stemming_len = 4 charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -62,10 +67,5 @@ index ent_phx U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - morphology = libstemmer_fr - min_stemming_len = 4 + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } diff --git a/scripts/build/config-prod/enttmp.conf b/scripts/build/config-prod/enttmp.conf index 2d73e9c..0603f00 100644 --- a/scripts/build/config-prod/enttmp.conf +++ b/scripts/build/config-prod/enttmp.conf @@ -31,6 +31,8 @@ index ent path = /dbs/sphinx/ent mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -41,9 +43,7 @@ index ent U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - wordforms = /etc/sphinxsearch/wordforms-ent.txt + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } index ent_phx @@ -52,6 +52,11 @@ index ent_phx path = /dbs/sphinx/ent_phx mlock = 1 docinfo = extern + stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords_unstemmed = 1 + wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + morphology = libstemmer_fr + min_stemming_len = 4 charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ @@ -62,10 +67,5 @@ index ent_phx U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 - stopwords = /etc/sphinxsearch/stopwords-ent.txt - stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt - morphology = libstemmer_fr - min_stemming_len = 4 + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } From 058bd998440cb98608eb237632f8438e9845b989 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Thu, 14 Aug 2014 07:30:58 +0000 Subject: [PATCH 14/40] Wordforms path --- scripts/build/config-dev/ent.conf | 2 +- scripts/build/config-dev/enttmp.conf | 2 +- scripts/build/config-prod/ent.conf | 4 ++-- scripts/build/config-prod/enttmp.conf | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/build/config-dev/ent.conf b/scripts/build/config-dev/ent.conf index 072aaa8..01afb44 100644 --- a/scripts/build/config-dev/ent.conf +++ b/scripts/build/config-dev/ent.conf @@ -54,7 +54,7 @@ index ent_phx docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt morphology = libstemmer_fr min_stemming_len = 4 charset_type = utf-8 diff --git a/scripts/build/config-dev/enttmp.conf b/scripts/build/config-dev/enttmp.conf index f933ea7..a3c757e 100644 --- a/scripts/build/config-dev/enttmp.conf +++ b/scripts/build/config-dev/enttmp.conf @@ -54,7 +54,7 @@ index ent_phx docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt morphology = libstemmer_fr min_stemming_len = 4 charset_type = utf-8 diff --git a/scripts/build/config-prod/ent.conf b/scripts/build/config-prod/ent.conf index c01093c..cd946cc 100644 --- a/scripts/build/config-prod/ent.conf +++ b/scripts/build/config-prod/ent.conf @@ -31,7 +31,7 @@ index ent path = /dbs/sphinx/ent mlock = 1 docinfo = extern - stopwords = /etc/sphinxsearch/stopwords-ent.txt + stopwords = /etc/sphinxsearch/stopwords-ent.txt wordforms = /etc/sphinxsearch/wordforms-ent.txt charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ @@ -54,7 +54,7 @@ index ent_phx docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt morphology = libstemmer_fr min_stemming_len = 4 charset_type = utf-8 diff --git a/scripts/build/config-prod/enttmp.conf b/scripts/build/config-prod/enttmp.conf index 0603f00..dbdf5c2 100644 --- a/scripts/build/config-prod/enttmp.conf +++ b/scripts/build/config-prod/enttmp.conf @@ -54,7 +54,7 @@ index ent_phx docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt stopwords_unstemmed = 1 - wordforms = /usr/local/sphinx/etc/wordforms-ent.txt + wordforms = /etc/sphinxsearch/wordforms-ent.txt morphology = libstemmer_fr min_stemming_len = 4 charset_type = utf-8 From 903e3cb3940559865f25b4a3e276349c1abcf72c Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Tue, 19 Aug 2014 15:01:48 +0000 Subject: [PATCH 15/40] Boolean on some attributes --- scripts/build/config-prod/ent.conf | 3 ++- scripts/build/config-prod/enttmp.conf | 34 +++++++++++++-------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/scripts/build/config-prod/ent.conf b/scripts/build/config-prod/ent.conf index cd946cc..6de5ebd 100644 --- a/scripts/build/config-prod/ent.conf +++ b/scripts/build/config-prod/ent.conf @@ -16,7 +16,7 @@ source ent sql_attr_uint = siren sql_attr_uint = nic sql_attr_uint = siege - sql_attr_uint = actif + sql_attr_bool = actif sql_attr_uint = adr_num sql_attr_uint = adr_cp sql_attr_uint = adr_dep @@ -25,6 +25,7 @@ source ent sql_attr_uint = rang } + index ent { source = ent diff --git a/scripts/build/config-prod/enttmp.conf b/scripts/build/config-prod/enttmp.conf index dbdf5c2..4843e56 100644 --- a/scripts/build/config-prod/enttmp.conf +++ b/scripts/build/config-prod/enttmp.conf @@ -1,28 +1,28 @@ source ent { - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, siren, nic, siege, \ CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ FROM etablissements_tmp; - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_uint = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_uint = sirenValide - sql_attr_uint = rang + sql_attr_uint = siren + sql_attr_uint = nic + sql_attr_uint = siege + sql_attr_bool = actif + sql_attr_uint = adr_num + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = cj:4 + sql_attr_bool = sirenValide + sql_attr_uint = rang } index ent From 2662bbf3525346f73a37d5cb36c3c034b3ec9f0e Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Tue, 19 Aug 2014 15:18:28 +0000 Subject: [PATCH 16/40] Boolean on some attributes --- scripts/build/config-dev/ent.conf | 10 +++---- scripts/build/config-dev/enttmp.conf | 40 ++++++++++++++-------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/scripts/build/config-dev/ent.conf b/scripts/build/config-dev/ent.conf index 01afb44..c79d218 100644 --- a/scripts/build/config-dev/ent.conf +++ b/scripts/build/config-dev/ent.conf @@ -10,13 +10,13 @@ source ent sql_query = SELECT id, siren, nic, siege, \ CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ - FROM etablissements; - sql_attr_uint = siren + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ + FROM etablissements; + sql_attr_uint = siren sql_attr_uint = nic sql_attr_uint = siege - sql_attr_uint = actif + sql_attr_bool = actif sql_attr_uint = adr_num sql_attr_uint = adr_cp sql_attr_uint = adr_dep diff --git a/scripts/build/config-dev/enttmp.conf b/scripts/build/config-dev/enttmp.conf index a3c757e..256af8c 100644 --- a/scripts/build/config-dev/enttmp.conf +++ b/scripts/build/config-dev/enttmp.conf @@ -1,28 +1,28 @@ source ent { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, siren, nic, siege, \ CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ - FROM etablissements_tmp; - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_uint = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_uint = sirenValide - sql_attr_uint = rang + actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ + adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ + FROM etablissements_tmp; + sql_attr_uint = siren + sql_attr_uint = nic + sql_attr_uint = siege + sql_attr_bool = actif + sql_attr_uint = adr_num + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_uint = cj:4 + sql_attr_bool = sirenValide + sql_attr_uint = rang } index ent From 918a55aa70affe36289a14d3a54f53474b209ebe Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 10 Oct 2014 08:38:07 +0000 Subject: [PATCH 17/40] Update config --- scripts/build/config-dev/act.conf | 44 +++---- scripts/build/config-dev/ciblage.conf | 152 +++++++++++----------- scripts/build/config-dev/dir.conf | 61 ++++----- scripts/build/config-dev/dirtmp.conf | 92 +++++++------ scripts/build/config-dev/histo.conf | 40 +++--- scripts/build/config-prod/act.conf | 62 +++++---- scripts/build/config-prod/ciblage.conf | 170 ++++++++++++------------- scripts/build/config-prod/dir.conf | 87 ++++++------- scripts/build/config-prod/dirtmp.conf | 88 ++++++------- scripts/build/config-prod/histo.conf | 32 +++++ 10 files changed, 421 insertions(+), 407 deletions(-) create mode 100644 scripts/build/config-prod/histo.conf diff --git a/scripts/build/config-dev/act.conf b/scripts/build/config-dev/act.conf index 0ba5e99..cc1bc72 100644 --- a/scripts/build/config-dev/act.conf +++ b/scripts/build/config-dev/act.conf @@ -1,35 +1,31 @@ source act { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo sql_query_pre = SET NAMES utf8 - - sql_query = \ - SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, p.libPays AS pays, l.PDetention \ - FROM liens2 l, liensRef r, tabPays p \ - WHERE l.dateSuppr = '0000-00-00 00:00:00' \ - AND r.id = l.idAct \ - AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ - AND p.codPays3 = r.adresse_pays; - - sql_attr_uint = idAct - sql_attr_uint = actif - sql_attr_float= PDetention - sql_attr_string = pays + sql_query = SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, \ + p.libPays AS pays, l.PDetention FROM liens2 l, liensRef r, tabPays p \ + WHERE l.dateSuppr = '0000-00-00 00:00:00' AND r.id = l.idAct \ + AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ + AND p.codPays3 = r.adresse_pays; + sql_attr_uint = idAct + sql_attr_bool = actif + sql_attr_float = PDetention + sql_attr_string = pays } index act { - source = act - path = /dbs/sphinx/act - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ + source = act + path = /dbs/sphinx/act + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ diff --git a/scripts/build/config-dev/ciblage.conf b/scripts/build/config-dev/ciblage.conf index 088bbd8..8d820e7 100644 --- a/scripts/build/config-dev/ciblage.conf +++ b/scripts/build/config-dev/ciblage.conf @@ -1,14 +1,14 @@ source ciblage { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, LPAD(siren, 9, '000000000') AS siren, LPAD(nic, 5, '00000') AS nic, \ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, LPAD(siren, 9, '0') AS siren, LPAD(nic, 5, '0') AS nic, \ siege, \ adr_cp, \ REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ @@ -73,78 +73,78 @@ source ciblage CASE avisCs WHEN 0 THEN 1 WHEN 10 THEN 2 WHEN 15 THEN 3 WHEN 23 THEN 4 WHEN 29 THEN 4 WHEN 39 THEN 4 WHEN 43 THEN 4 WHEN 21 THEN 5 WHEN 26 THEN 5 WHEN 28 THEN 5 WHEN 31 THEN 6 WHEN 50 THEN 6 WHEN 24 THEN 7 ELSE 0 END as avisCs \ FROM etablissements_act WHERE siren>100; - sql_field_string = siren - sql_field_string = nic - sql_attr_uint = siege - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = tel - sql_attr_uint = fax - sql_attr_uint = cj - sql_attr_uint = capital - sql_attr_uint = age_entrep - sql_attr_uint = age_etab - sql_attr_uint = tca - sql_attr_uint = tcaexp - sql_attr_uint = teff_entrep - sql_attr_uint = teff_etab - sql_attr_uint = web - sql_attr_uint = mail - sql_attr_uint = adrDom - sql_attr_uint = actifEco - sql_attr_uint = presentRcs - sql_attr_uint = procolHisto - sql_attr_uint = tvaIntraValide - sql_attr_uint = dateCrea_etab - sql_attr_uint = dateCrea_ent - sql_attr_uint = dateImmat - sql_attr_uint = eff_entrep - sql_attr_uint = eff_etab - sql_attr_uint = dirNom - sql_attr_uint = nbEtab - sql_attr_uint = nbMPubli - sql_attr_uint = sirenGrp - sql_attr_uint = nbActio - sql_attr_uint = actio - sql_attr_uint = nbPart - sql_attr_uint = part - sql_attr_uint = bilType - sql_attr_uint = bilAnnee - sql_attr_uint = bilCloture - sql_attr_uint = bilDuree - sql_attr_uint = bilTca - sql_attr_uint = bilEE - sql_attr_uint = bilFL - sql_attr_uint = bilFK - sql_attr_uint = bilFR - sql_attr_uint = bilGF - sql_attr_uint = bilGP - sql_attr_uint = bilGU - sql_attr_uint = bilGW - sql_attr_uint = bilHD - sql_attr_uint = bilHH - sql_attr_uint = bilHL - sql_attr_uint = bilHM - sql_attr_uint = bilHN - sql_attr_uint = bilYP - sql_attr_uint = codeCommune - sql_attr_uint = zus - sql_attr_uint = zru - sql_attr_uint = zfu - sql_attr_uint = cucs - sql_attr_uint = zrr - sql_attr_uint = zafr - sql_attr_uint = avisCs + sql_field_string = siren + sql_field_string = nic + sql_attr_bool = siege + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_bool = tel + sql_attr_bool = fax + sql_attr_uint = cj + sql_attr_uint = capital + sql_attr_uint = age_entrep + sql_attr_uint = age_etab + sql_attr_uint = tca + sql_attr_uint = tcaexp + sql_attr_uint = teff_entrep + sql_attr_uint = teff_etab + sql_attr_bool = web + sql_attr_bool = mail + sql_attr_bool = adrDom + sql_attr_uint = actifEco + sql_attr_uint = presentRcs + sql_attr_uint = procolHisto + sql_attr_uint = tvaIntraValide + sql_attr_uint = dateCrea_etab + sql_attr_uint = dateCrea_ent + sql_attr_uint = dateImmat + sql_attr_uint = eff_entrep + sql_attr_uint = eff_etab + sql_attr_bool = dirNom + sql_attr_uint = nbEtab + sql_attr_uint = nbMPubli + sql_attr_bool = sirenGrp + sql_attr_uint = nbActio + sql_attr_bool = actio + sql_attr_uint = nbPart + sql_attr_bool = part + sql_attr_uint = bilType + sql_attr_uint = bilAnnee + sql_attr_uint = bilCloture + sql_attr_uint = bilDuree + sql_attr_uint = bilTca + sql_attr_uint = bilEE + sql_attr_uint = bilFL + sql_attr_uint = bilFK + sql_attr_uint = bilFR + sql_attr_uint = bilGF + sql_attr_uint = bilGP + sql_attr_uint = bilGU + sql_attr_uint = bilGW + sql_attr_uint = bilHD + sql_attr_uint = bilHH + sql_attr_uint = bilHL + sql_attr_uint = bilHM + sql_attr_uint = bilHN + sql_attr_uint = bilYP + sql_attr_uint = codeCommune + sql_attr_bool = zus + sql_attr_bool = zru + sql_attr_bool = zfu + sql_attr_bool = cucs + sql_attr_bool = zrr + sql_attr_bool = zafr + sql_attr_uint = avisCs } index ciblage { - source = ciblage - path = /dbs/sphinx/ciblage - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ + source = ciblage + path = /dbs/sphinx/ciblage + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ diff --git a/scripts/build/config-dev/dir.conf b/scripts/build/config-dev/dir.conf index 60fde69..13e5c1b 100644 --- a/scripts/build/config-dev/dir.conf +++ b/scripts/build/config-dev/dir.conf @@ -1,38 +1,33 @@ source dir { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, \ + prenom, YEAR(naissance_date) AS naiss_annee, MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, naissance_lieu, fonction_code, actif \ FROM dirigeants; - sql_attr_string = civilite - sql_attr_string = typeDir - sql_attr_string = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep } index dir { - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ @@ -46,13 +41,13 @@ index dir index dir_phx { - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern min_stemming_len = 4 - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ diff --git a/scripts/build/config-dev/dirtmp.conf b/scripts/build/config-dev/dirtmp.conf index 1efecc4..c642a40 100644 --- a/scripts/build/config-dev/dirtmp.conf +++ b/scripts/build/config-dev/dirtmp.conf @@ -1,57 +1,34 @@ + source dir { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, \ + prenom, YEAR(naissance_date) AS naiss_annee, MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, naissance_lieu, fonction_code, actif \ FROM dirigeants_tmp; - sql_attr_string = civilite - sql_attr_string = typeDir - sql_attr_string = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep } index dir { - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index dir_phx -{ - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern - min_stemming_len = 4 - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -59,5 +36,24 @@ index dir_phx U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} + +index dir_phx +{ + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern + min_stemming_len = 4 + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } \ No newline at end of file diff --git a/scripts/build/config-dev/histo.conf b/scripts/build/config-dev/histo.conf index 4744333..13ab19f 100644 --- a/scripts/build/config-dev/histo.conf +++ b/scripts/build/config-dev/histo.conf @@ -1,24 +1,32 @@ source histo { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = histobodacc - sql_port = 3306 # optional, default is 3306 - sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; - sql_attr_uint = annee1 - sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id + type = mysql + sql_host = 192.168.78.230 + sql_user = sphinx + sql_pass = indexer + sql_db = histobodacc + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; + sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id + sql_attr_uint = annee1 } index histo { - source = histo - path = /dbs/sphinx/histo - docinfo = extern - morphology = none - min_word_len = 2 - charset_type = sbcs - html_strip = 1 + source = histo + path = /dbs/sphinx/histo + min_word_len = 3 + html_strip = 1 + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } \ No newline at end of file diff --git a/scripts/build/config-prod/act.conf b/scripts/build/config-prod/act.conf index 0ba5e99..301e81c 100644 --- a/scripts/build/config-prod/act.conf +++ b/scripts/build/config-prod/act.conf @@ -1,42 +1,38 @@ source act { - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo sql_query_pre = SET NAMES utf8 - - sql_query = \ - SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, p.libPays AS pays, l.PDetention \ - FROM liens2 l, liensRef r, tabPays p \ - WHERE l.dateSuppr = '0000-00-00 00:00:00' \ - AND r.id = l.idAct \ - AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ - AND p.codPays3 = r.adresse_pays; - - sql_attr_uint = idAct - sql_attr_uint = actif - sql_attr_float= PDetention - sql_attr_string = pays + sql_query = SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, \ + p.libPays AS pays, l.PDetention FROM liens2 l, liensRef r, tabPays p \ + WHERE l.dateSuppr = '0000-00-00 00:00:00' AND r.id = l.idAct \ + AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ + AND p.codPays3 = r.adresse_pays; + sql_attr_uint = idAct + sql_attr_bool = actif + sql_attr_float = PDetention + sql_attr_string = pays } index act { - source = act - path = /dbs/sphinx/act - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + source = act + path = /dbs/sphinx/act + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } diff --git a/scripts/build/config-prod/ciblage.conf b/scripts/build/config-prod/ciblage.conf index 4867f7e..c8cedc7 100644 --- a/scripts/build/config-prod/ciblage.conf +++ b/scripts/build/config-prod/ciblage.conf @@ -1,14 +1,14 @@ source ciblage { - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, LPAD(siren, 9, '000000000') AS siren, LPAD(nic, 5, '00000') AS nic, \ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, LPAD(siren, 9, '0') AS siren, LPAD(nic, 5, '0') AS nic, \ siege, \ adr_cp, \ REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ @@ -73,85 +73,85 @@ source ciblage CASE avisCs WHEN 0 THEN 1 WHEN 10 THEN 2 WHEN 15 THEN 3 WHEN 23 THEN 4 WHEN 29 THEN 4 WHEN 39 THEN 4 WHEN 43 THEN 4 WHEN 21 THEN 5 WHEN 26 THEN 5 WHEN 28 THEN 5 WHEN 31 THEN 6 WHEN 50 THEN 6 WHEN 24 THEN 7 ELSE 0 END as avisCs \ FROM etablissements_act WHERE siren>100; - sql_field_string = siren - sql_field_string = nic - sql_attr_uint = siege - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = tel - sql_attr_uint = fax - sql_attr_uint = cj - sql_attr_uint = capital - sql_attr_uint = age_entrep - sql_attr_uint = age_etab - sql_attr_uint = tca - sql_attr_uint = tcaexp - sql_attr_uint = teff_entrep - sql_attr_uint = teff_etab - sql_attr_uint = web - sql_attr_uint = mail - sql_attr_uint = adrDom - sql_attr_uint = actifEco - sql_attr_uint = presentRcs - sql_attr_uint = procolHisto - sql_attr_uint = tvaIntraValide - sql_attr_uint = dateCrea_etab - sql_attr_uint = dateCrea_ent - sql_attr_uint = dateImmat - sql_attr_uint = eff_entrep - sql_attr_uint = eff_etab - sql_attr_uint = dirNom - sql_attr_uint = nbEtab - sql_attr_uint = nbMPubli - sql_attr_uint = sirenGrp - sql_attr_uint = nbActio - sql_attr_uint = actio - sql_attr_uint = nbPart - sql_attr_uint = part - sql_attr_uint = bilType - sql_attr_uint = bilAnnee - sql_attr_uint = bilCloture - sql_attr_uint = bilDuree - sql_attr_uint = bilTca - sql_attr_uint = bilEE - sql_attr_uint = bilFL - sql_attr_uint = bilFK - sql_attr_uint = bilFR - sql_attr_uint = bilGF - sql_attr_uint = bilGP - sql_attr_uint = bilGU - sql_attr_uint = bilGW - sql_attr_uint = bilHD - sql_attr_uint = bilHH - sql_attr_uint = bilHL - sql_attr_uint = bilHM - sql_attr_uint = bilHN - sql_attr_uint = bilYP - sql_attr_uint = codeCommune - sql_attr_uint = zus - sql_attr_uint = zru - sql_attr_uint = zfu - sql_attr_uint = cucs - sql_attr_uint = zrr - sql_attr_uint = zafr - sql_attr_uint = avisCs + sql_field_string = siren + sql_field_string = nic + sql_attr_bool = siege + sql_attr_uint = adr_cp + sql_attr_uint = adr_dep + sql_attr_bool = tel + sql_attr_bool = fax + sql_attr_uint = cj + sql_attr_uint = capital + sql_attr_uint = age_entrep + sql_attr_uint = age_etab + sql_attr_uint = tca + sql_attr_uint = tcaexp + sql_attr_uint = teff_entrep + sql_attr_uint = teff_etab + sql_attr_bool = web + sql_attr_bool = mail + sql_attr_bool = adrDom + sql_attr_uint = actifEco + sql_attr_uint = presentRcs + sql_attr_uint = procolHisto + sql_attr_uint = tvaIntraValide + sql_attr_uint = dateCrea_etab + sql_attr_uint = dateCrea_ent + sql_attr_uint = dateImmat + sql_attr_uint = eff_entrep + sql_attr_uint = eff_etab + sql_attr_bool = dirNom + sql_attr_uint = nbEtab + sql_attr_uint = nbMPubli + sql_attr_bool = sirenGrp + sql_attr_uint = nbActio + sql_attr_bool = actio + sql_attr_uint = nbPart + sql_attr_bool = part + sql_attr_uint = bilType + sql_attr_uint = bilAnnee + sql_attr_uint = bilCloture + sql_attr_uint = bilDuree + sql_attr_uint = bilTca + sql_attr_uint = bilEE + sql_attr_uint = bilFL + sql_attr_uint = bilFK + sql_attr_uint = bilFR + sql_attr_uint = bilGF + sql_attr_uint = bilGP + sql_attr_uint = bilGU + sql_attr_uint = bilGW + sql_attr_uint = bilHD + sql_attr_uint = bilHH + sql_attr_uint = bilHL + sql_attr_uint = bilHM + sql_attr_uint = bilHN + sql_attr_uint = bilYP + sql_attr_uint = codeCommune + sql_attr_bool = zus + sql_attr_bool = zru + sql_attr_bool = zfu + sql_attr_bool = cucs + sql_attr_bool = zrr + sql_attr_bool = zafr + sql_attr_uint = avisCs } index ciblage { - source = ciblage - path = /dbs/sphinx/ciblage - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 + source = ciblage + path = /dbs/sphinx/ciblage + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } diff --git a/scripts/build/config-prod/dir.conf b/scripts/build/config-prod/dir.conf index 140b1b1..781e567 100644 --- a/scripts/build/config-prod/dir.conf +++ b/scripts/build/config-prod/dir.conf @@ -1,57 +1,34 @@ source dir { - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, \ + prenom, YEAR(naissance_date) AS naiss_annee, MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, naissance_lieu, fonction_code, actif \ FROM dirigeants; - sql_attr_string = civilite - sql_attr_string = typeDir - sql_attr_string = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep } index dir { - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index dir_phx -{ - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -60,5 +37,23 @@ index dir_phx U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} + +index dir_phx +{ + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern min_stemming_len = 4 + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } \ No newline at end of file diff --git a/scripts/build/config-prod/dirtmp.conf b/scripts/build/config-prod/dirtmp.conf index 64dc9bf..b906e96 100644 --- a/scripts/build/config-prod/dirtmp.conf +++ b/scripts/build/config-prod/dirtmp.conf @@ -1,56 +1,34 @@ + source dir { - type = mysql - sql_host = 192.168.3.30 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, prenom, \ - YEAR(naissance_date) AS naiss_annee, \ - MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, \ - naissance_lieu, fonction_code, actif \ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = jo + sql_query_pre = SET NAMES utf8 + sql_query = \ + SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, \ + prenom, YEAR(naissance_date) AS naiss_annee, MONTH(naissance_date) AS naiss_mois, \ + DAY(naissance_date) AS naiss_jour, naissance_lieu, fonction_code, actif \ FROM dirigeants_tmp; - sql_attr_string = civilite - sql_attr_string = typeDir - sql_attr_string = fonction_code - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep + sql_attr_uint = dirSiren + sql_attr_uint = naiss_annee + sql_attr_uint = naiss_mois + sql_attr_uint = naiss_jour + sql_attr_uint = actif + sql_attr_uint = adr_dep } index dir { - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index dir_phx -{ - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ + source = dir + path = /dbs/sphinx/dir + docinfo = extern + morphology = none + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ @@ -59,5 +37,23 @@ index dir_phx U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} + +index dir_phx +{ + source = dir + path = /dbs/sphinx/dir_phx + docinfo = extern min_stemming_len = 4 + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 } \ No newline at end of file diff --git a/scripts/build/config-prod/histo.conf b/scripts/build/config-prod/histo.conf new file mode 100644 index 0000000..ce3165a --- /dev/null +++ b/scripts/build/config-prod/histo.conf @@ -0,0 +1,32 @@ + +source histo +{ + type = mysql + sql_host = 192.168.3.30 + sql_user = sphinx + sql_pass = indexer + sql_db = histobodacc + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; + sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id + sql_attr_uint = annee1 +} + +index histo +{ + source = histo + path = /dbs/sphinx/histo + min_word_len = 3 + html_strip = 1 + charset_type = utf-8 + charset_table = 0..9, A..Z->a..z, a..z, \ + U+23, U+25, U+26, U+2B, U+3D, U+40, \ + U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ + U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ + U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ + U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ + U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ + U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ + U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ + U+FD->y, U+FF->y, U+0152->U+0153, U+0153 +} \ No newline at end of file From c2bc1b87eecc2b0c49e22a4a4f227a25d2e96a29 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 20 Oct 2014 07:19:16 +0000 Subject: [PATCH 18/40] Remove stopwords --- scripts/build/config-dev/stopwords-ent.txt | 24 ++-------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/scripts/build/config-dev/stopwords-ent.txt b/scripts/build/config-dev/stopwords-ent.txt index e319798..e657876 100644 --- a/scripts/build/config-dev/stopwords-ent.txt +++ b/scripts/build/config-dev/stopwords-ent.txt @@ -8,16 +8,13 @@ av d les l -jean sci sur et sarl a bd -pierre societe -marie pl en rte @@ -25,16 +22,8 @@ france c s m -michel association -paul -louis -andre -claude civile -francois -jacques -georges ld b immobiliere @@ -42,26 +31,17 @@ p e i all -philippe services -henri -bernard -charles -martin chez -denis -alain gaulle mer republique ecole seine rene -joseph -robert -laurent bat f -marcel au grande +madame +monsieur From 9ec419af698e086c0edf9587856338132cde9c85 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 17 Nov 2014 08:30:37 +0000 Subject: [PATCH 19/40] Keep pid file in readable directory --- scripts/build/config-dev/sphinx.conf | 2 +- scripts/build/config-prod/sphinx.conf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build/config-dev/sphinx.conf b/scripts/build/config-dev/sphinx.conf index 10c27ee..77506ae 100644 --- a/scripts/build/config-dev/sphinx.conf +++ b/scripts/build/config-dev/sphinx.conf @@ -128,7 +128,7 @@ searchd # PID file, searchd process ID file name # mandatory - pid_file = /var/run/sphinxsearch/searchd.pid + pid_file = /var/run/searchd.pid # max amount of matches the daemon ever keeps in RAM, per-index # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL diff --git a/scripts/build/config-prod/sphinx.conf b/scripts/build/config-prod/sphinx.conf index b41e165..8992bd0 100644 --- a/scripts/build/config-prod/sphinx.conf +++ b/scripts/build/config-prod/sphinx.conf @@ -128,7 +128,7 @@ searchd # PID file, searchd process ID file name # mandatory - pid_file = /var/run/sphinxsearch/searchd.pid + pid_file = /var/run/searchd.pid # max amount of matches the daemon ever keeps in RAM, per-index # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL From acad0ec96ea7e3ca7e8b05e953081001a7344ada Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 13 Feb 2015 16:12:02 +0000 Subject: [PATCH 20/40] Change path --- scripts/build/config-prod/act.conf | 3 +- scripts/build/config-prod/ciblage.conf | 3 +- scripts/build/config-prod/dir.conf | 5 +- scripts/build/config-prod/dirtmp.conf | 3 +- scripts/build/config-prod/ent.conf | 5 +- scripts/build/config-prod/enttmp.conf | 5 +- scripts/build/config-prod/histo.conf | 3 +- scripts/build/config-prod/sphinx.conf | 6 +-- scripts/build/config-prod/stopwords-ent.txt | 53 +------------------ .../ubuntu-14.04/etc/logrotate.d/indexer | 2 +- .../ubuntu-14.04/etc/logrotate.d/searchd | 2 +- 11 files changed, 24 insertions(+), 66 deletions(-) diff --git a/scripts/build/config-prod/act.conf b/scripts/build/config-prod/act.conf index 301e81c..459c6ed 100644 --- a/scripts/build/config-prod/act.conf +++ b/scripts/build/config-prod/act.conf @@ -3,6 +3,7 @@ source act { type = mysql sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = jo @@ -21,7 +22,7 @@ source act index act { source = act - path = /dbs/sphinx/act + path = /dbs/idx/act docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-prod/ciblage.conf b/scripts/build/config-prod/ciblage.conf index c8cedc7..fc4272a 100644 --- a/scripts/build/config-prod/ciblage.conf +++ b/scripts/build/config-prod/ciblage.conf @@ -3,6 +3,7 @@ source ciblage { type = mysql sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = jo @@ -140,7 +141,7 @@ source ciblage index ciblage { source = ciblage - path = /dbs/sphinx/ciblage + path = /dbs/idx/ciblage docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-prod/dir.conf b/scripts/build/config-prod/dir.conf index 781e567..e0ac5d0 100644 --- a/scripts/build/config-prod/dir.conf +++ b/scripts/build/config-prod/dir.conf @@ -3,6 +3,7 @@ source dir { type = mysql sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = jo @@ -23,7 +24,7 @@ source dir index dir { source = dir - path = /dbs/sphinx/dir + path = /dbs/idx/dir docinfo = extern morphology = none charset_type = utf-8 @@ -42,7 +43,7 @@ index dir index dir_phx { source = dir - path = /dbs/sphinx/dir_phx + path = /dbs/idx/dir_phx docinfo = extern min_stemming_len = 4 charset_type = utf-8 diff --git a/scripts/build/config-prod/dirtmp.conf b/scripts/build/config-prod/dirtmp.conf index b906e96..b4455b8 100644 --- a/scripts/build/config-prod/dirtmp.conf +++ b/scripts/build/config-prod/dirtmp.conf @@ -3,6 +3,7 @@ source dir { type = mysql sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = jo @@ -23,7 +24,7 @@ source dir index dir { source = dir - path = /dbs/sphinx/dir + path = /dbs/idx/dir docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-prod/ent.conf b/scripts/build/config-prod/ent.conf index 6de5ebd..963e77c 100644 --- a/scripts/build/config-prod/ent.conf +++ b/scripts/build/config-prod/ent.conf @@ -3,6 +3,7 @@ source ent { type = mysql sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = jo @@ -29,7 +30,7 @@ source ent index ent { source = ent - path = /dbs/sphinx/ent + path = /dbs/idx/ent mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt @@ -50,7 +51,7 @@ index ent index ent_phx { source = ent - path = /dbs/sphinx/ent_phx + path = /dbs/idx/ent_phx mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt diff --git a/scripts/build/config-prod/enttmp.conf b/scripts/build/config-prod/enttmp.conf index 4843e56..a3ff957 100644 --- a/scripts/build/config-prod/enttmp.conf +++ b/scripts/build/config-prod/enttmp.conf @@ -3,6 +3,7 @@ source ent { type = mysql sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = jo @@ -28,7 +29,7 @@ source ent index ent { source = ent - path = /dbs/sphinx/ent + path = /dbs/idx/ent mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt @@ -49,7 +50,7 @@ index ent index ent_phx { source = ent - path = /dbs/sphinx/ent_phx + path = /dbs/idx/ent_phx mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt diff --git a/scripts/build/config-prod/histo.conf b/scripts/build/config-prod/histo.conf index ce3165a..28804f8 100644 --- a/scripts/build/config-prod/histo.conf +++ b/scripts/build/config-prod/histo.conf @@ -3,6 +3,7 @@ source histo { type = mysql sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = histobodacc @@ -15,7 +16,7 @@ source histo index histo { source = histo - path = /dbs/sphinx/histo + path = /dbs/idx/histo min_word_len = 3 html_strip = 1 charset_type = utf-8 diff --git a/scripts/build/config-prod/sphinx.conf b/scripts/build/config-prod/sphinx.conf index 8992bd0..0c018fb 100644 --- a/scripts/build/config-prod/sphinx.conf +++ b/scripts/build/config-prod/sphinx.conf @@ -99,15 +99,15 @@ searchd # listen = 192.168.0.1:9312 # listen = 9312 # listen = /var/run/searchd.sock - listen = 3312 + listen = 9312 # log file, searchd run info is logged here # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log + log = /dbs/log/searchd.log # query log file, all search queries are logged here # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log + query_log = /dbs/log/query.log # client read timeout, seconds # optional, default is 5 diff --git a/scripts/build/config-prod/stopwords-ent.txt b/scripts/build/config-prod/stopwords-ent.txt index e319798..0f855e3 100644 --- a/scripts/build/config-prod/stopwords-ent.txt +++ b/scripts/build/config-prod/stopwords-ent.txt @@ -1,67 +1,18 @@ -de -la -du -r -des -le -av -d -les -l -jean sci -sur -et sarl -a -bd -pierre societe -marie -pl -en -rte france -c -s -m -michel association -paul -louis -andre -claude civile -francois -jacques -georges -ld -b immobiliere -p -e -i -all -philippe services -henri -bernard -charles -martin chez -denis -alain gaulle mer republique ecole seine rene -joseph -robert -laurent bat -f -marcel -au -grande +madame +monsieur \ No newline at end of file diff --git a/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer b/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer index 222a5dc..54738eb 100644 --- a/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer +++ b/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer @@ -1,4 +1,4 @@ -/dbs/sphinxlog/indexer.log { +/dbs/log/indexer.log { missingok notifempty weekly diff --git a/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd b/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd index bbe7a31..17124ba 100644 --- a/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd +++ b/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd @@ -1,4 +1,4 @@ -/dbs/sphinxlog/query.log /dbs/sphinxlog/searchd.log { +/dbs/log/query.log /dbs/log/searchd.log { missingok notifempty weekly From 6859bfa18ce58ebd5c3330f93cb7098a098e30c0 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 25 May 2015 09:11:56 +0000 Subject: [PATCH 21/40] Add example to compile --- README | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README b/README index e69de29..c32ee9a 100644 --- a/README +++ b/README @@ -0,0 +1,15 @@ +htop +bwm-ng +mysql-client-5.5 +gcc g++ make libmysqld-dev +wget -nv -O - http://snowball.tartarus.org/dist/libstemmer_c.tgz | tar zx +wget -nv -O - https://re2.googlecode.com/files/re2-20140304.tgz | tar zx +wget http://sphinxsearch.com/files/sphinx-2.2.7-release.tar.gz +tar xzvf sphinx-2.2.7-release.tar.gz +cp -R libstemmer_c/* sphinx-2.2.7-release/libstemmer_c/ +sed -i -e 's/stem_ISO_8859_1_hungarian/stem_ISO_8859_2_hungarian/g' sphinx-2.2.7-release/libstemmer_c/Makefile.in +cp -R re2/* sphinx-2.2.7-release/libre2/ +cd sphinx-2.2.7-release +./configure --with-libstemmer --with-re2 --prefix=/usr/local/sphinx +make +make install \ No newline at end of file From 39c05b2ab13a0db3b7638e92374642f1e4899d76 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 17 Jun 2015 14:30:30 +0000 Subject: [PATCH 22/40] Changement du chemin des logs --- indexer/indexer-dir.sh | 2 +- indexer/indexer-ent.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index e8a5ed1..9603aa3 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -1,6 +1,6 @@ #!/bin/bash PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/sphinxlog +PATH_LOG=/dbs/log PATH_SQL=/home/scripts/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index 1e45e44..da59790 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -1,6 +1,6 @@ #!/bin/bash PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/sphinxlog +PATH_LOG=/dbs/log PATH_SQL=/home/scripts/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx From 06a2861fc5ea4d450562a81f3ee3ccacc308ef52 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 06:08:12 +0000 Subject: [PATCH 23/40] Sphinx version 2.2.9 From dd36d5939aeb26f3fef7e14527a628fc5767cd40 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 06:20:32 +0000 Subject: [PATCH 24/40] Simplificiation --- scripts/build/README => NOTE | 0 README | 35 +- .../build/config-prod => config}/act.conf | 2 +- .../build/config-prod => config}/ciblage.conf | 4 +- .../ciblage.conf => config/ciblagetmp.conf | 9 +- .../build/config-prod => config}/dir.conf | 2 +- .../build/config-prod => config}/dirtmp.conf | 2 +- .../build/config-prod => config}/ent.conf | 4 +- .../build/config-prod => config}/enttmp.conf | 2 +- .../build/config-prod => config}/histo.conf | 2 +- .../build/config-prod => config}/sphinx.conf | 4 +- .../config-prod => config}/stopwords-ent.txt | 0 .../config-dev => config}/wordforms-ent.txt | 0 indexer/dev-indexer-all.sh | 3 + indexer/indexer-act.sh | 14 +- indexer/indexer-ciblage.sh | 11 +- indexer/indexer-dir.sh | 4 +- indexer/indexer-ent.sh | 6 +- indexer/manual-indexer-ciblage.sh | 40 ++ indexer/sql/manual-consolidate-ciblage.sql | 6 + indexer/sql/rotate-ciblage.sql | 3 + scripts/build/config-dev/act.conf | 38 -- scripts/build/config-dev/ciblage.txt | 110 ------ scripts/build/config-dev/dir.conf | 59 --- scripts/build/config-dev/dirtmp.conf | 59 --- scripts/build/config-dev/ent.conf | 71 ---- scripts/build/config-dev/enttmp.conf | 71 ---- scripts/build/config-dev/histo.conf | 32 -- scripts/build/config-dev/sphinx.conf | 366 ------------------ scripts/build/config-dev/stopwords-ent.txt | 47 --- scripts/build/config-prod/wordforms-ent.txt | 206 ---------- scripts/build/configure.php | 97 ----- .../ubuntu-12.04/etc/default/sphinxsearch | 10 - .../ubuntu-12.04/etc/init.d/sphinxsearch | 1 - .../ubuntu-12.04/etc/init/sphinxsearch.conf | 34 -- .../ubuntu-12.04/etc/logrotate.d/indexer | 8 - .../ubuntu-12.04/etc/logrotate.d/searchd | 12 - .../ubuntu-14.04/etc/default/sphinxsearch | 10 - .../ubuntu-14.04/etc/init/sphinxsearch.conf | 34 -- .../ubuntu-14.04/etc/logrotate.d/indexer | 8 - .../ubuntu-14.04/etc/logrotate.d/searchd | 12 - 41 files changed, 122 insertions(+), 1316 deletions(-) rename scripts/build/README => NOTE (100%) rename {scripts/build/config-prod => config}/act.conf (97%) rename {scripts/build/config-prod => config}/ciblage.conf (95%) rename scripts/build/config-dev/ciblage.conf => config/ciblagetmp.conf (93%) rename {scripts/build/config-prod => config}/dir.conf (98%) rename {scripts/build/config-prod => config}/dirtmp.conf (98%) rename {scripts/build/config-prod => config}/ent.conf (97%) rename {scripts/build/config-prod => config}/enttmp.conf (98%) rename {scripts/build/config-prod => config}/histo.conf (96%) rename {scripts/build/config-prod => config}/sphinx.conf (98%) rename {scripts/build/config-prod => config}/stopwords-ent.txt (100%) rename {scripts/build/config-dev => config}/wordforms-ent.txt (100%) create mode 100644 indexer/dev-indexer-all.sh create mode 100644 indexer/manual-indexer-ciblage.sh create mode 100644 indexer/sql/manual-consolidate-ciblage.sql create mode 100644 indexer/sql/rotate-ciblage.sql delete mode 100644 scripts/build/config-dev/act.conf delete mode 100644 scripts/build/config-dev/ciblage.txt delete mode 100644 scripts/build/config-dev/dir.conf delete mode 100644 scripts/build/config-dev/dirtmp.conf delete mode 100644 scripts/build/config-dev/ent.conf delete mode 100644 scripts/build/config-dev/enttmp.conf delete mode 100644 scripts/build/config-dev/histo.conf delete mode 100644 scripts/build/config-dev/sphinx.conf delete mode 100644 scripts/build/config-dev/stopwords-ent.txt delete mode 100644 scripts/build/config-prod/wordforms-ent.txt delete mode 100644 scripts/build/configure.php delete mode 100644 scripts/build/ubuntu-12.04/etc/default/sphinxsearch delete mode 100644 scripts/build/ubuntu-12.04/etc/init.d/sphinxsearch delete mode 100644 scripts/build/ubuntu-12.04/etc/init/sphinxsearch.conf delete mode 100644 scripts/build/ubuntu-12.04/etc/logrotate.d/indexer delete mode 100644 scripts/build/ubuntu-12.04/etc/logrotate.d/searchd delete mode 100644 scripts/build/ubuntu-14.04/etc/default/sphinxsearch delete mode 100644 scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf delete mode 100644 scripts/build/ubuntu-14.04/etc/logrotate.d/indexer delete mode 100644 scripts/build/ubuntu-14.04/etc/logrotate.d/searchd diff --git a/scripts/build/README b/NOTE similarity index 100% rename from scripts/build/README rename to NOTE diff --git a/README b/README index c32ee9a..0afb7ce 100644 --- a/README +++ b/README @@ -1,3 +1,34 @@ +How to Use +========== + +Sphinx Engine configuration +--------------------------- +Create a /etc/sphinxsearch/sphinx.conf + +See in scripts/build/config-* + sphinx.conf : Sphinx Engine config + *.conf : one file per index + +Concat only needed file + +Indexing +-------- + indexer-* + slave-* + reprise-* + manual-* + dev-* + +Ubuntu PPA repository +===================== + +sudo apt-get install software-properties-common +sudo add-apt-repository ppa:builds/sphinxsearch-rel22 +sudo apt-get install libstemmer0d +sudo apt-get update && apt-get install sphinxsearch + +Manual installation +=================== htop bwm-ng mysql-client-5.5 @@ -12,4 +43,6 @@ cp -R re2/* sphinx-2.2.7-release/libre2/ cd sphinx-2.2.7-release ./configure --with-libstemmer --with-re2 --prefix=/usr/local/sphinx make -make install \ No newline at end of file +make install + + diff --git a/scripts/build/config-prod/act.conf b/config/act.conf similarity index 97% rename from scripts/build/config-prod/act.conf rename to config/act.conf index 459c6ed..a1afa14 100644 --- a/scripts/build/config-prod/act.conf +++ b/config/act.conf @@ -22,7 +22,7 @@ source act index act { source = act - path = /dbs/idx/act + path = /var/lib/sphinx/idx/act docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-prod/ciblage.conf b/config/ciblage.conf similarity index 95% rename from scripts/build/config-prod/ciblage.conf rename to config/ciblage.conf index fc4272a..afd16fb 100644 --- a/scripts/build/config-prod/ciblage.conf +++ b/config/ciblage.conf @@ -21,7 +21,7 @@ source ciblage CONCAT('EX ', ape_entrep) AS ape_entrep, \ age_entrep, \ age_etab, \ - tca, \ + IF(tca IS NULL,99,tca) AS tca, \ tcaexp, \ IF(teff_entrep IS NULL,99,teff_entrep) AS teff_entrep, \ IF(teff_etab IS NULL,99,teff_etab) AS teff_etab, \ @@ -141,7 +141,7 @@ source ciblage index ciblage { source = ciblage - path = /dbs/idx/ciblage + path = /var/lib/sphinx/idx/ciblage docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-dev/ciblage.conf b/config/ciblagetmp.conf similarity index 93% rename from scripts/build/config-dev/ciblage.conf rename to config/ciblagetmp.conf index 8d820e7..d69b75a 100644 --- a/scripts/build/config-dev/ciblage.conf +++ b/config/ciblagetmp.conf @@ -2,7 +2,8 @@ source ciblage { type = mysql - sql_host = 192.168.78.230 + sql_host = 192.168.3.30 + sql_port = 3306 sql_user = sphinx sql_pass = indexer sql_db = jo @@ -20,7 +21,7 @@ source ciblage CONCAT('EX ', ape_entrep) AS ape_entrep, \ age_entrep, \ age_etab, \ - tca, \ + IF(tca IS NULL,99,tca) AS tca, \ tcaexp, \ IF(teff_entrep IS NULL,99,teff_entrep) AS teff_entrep, \ IF(teff_etab IS NULL,99,teff_etab) AS teff_etab, \ @@ -71,7 +72,7 @@ source ciblage IF(zrr=1,1,0) as zrr, \ IF(zafr=1,1,0) as zafr, \ CASE avisCs WHEN 0 THEN 1 WHEN 10 THEN 2 WHEN 15 THEN 3 WHEN 23 THEN 4 WHEN 29 THEN 4 WHEN 39 THEN 4 WHEN 43 THEN 4 WHEN 21 THEN 5 WHEN 26 THEN 5 WHEN 28 THEN 5 WHEN 31 THEN 6 WHEN 50 THEN 6 WHEN 24 THEN 7 ELSE 0 END as avisCs \ - FROM etablissements_act WHERE siren>100; + FROM etablissements_act_tmp WHERE siren>100; sql_field_string = siren sql_field_string = nic @@ -140,7 +141,7 @@ source ciblage index ciblage { source = ciblage - path = /dbs/sphinx/ciblage + path = /var/lib/sphinx/idx/ciblage docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-prod/dir.conf b/config/dir.conf similarity index 98% rename from scripts/build/config-prod/dir.conf rename to config/dir.conf index e0ac5d0..11cfb58 100644 --- a/scripts/build/config-prod/dir.conf +++ b/config/dir.conf @@ -24,7 +24,7 @@ source dir index dir { source = dir - path = /dbs/idx/dir + path = /var/lib/sphinx/idx/dir docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-prod/dirtmp.conf b/config/dirtmp.conf similarity index 98% rename from scripts/build/config-prod/dirtmp.conf rename to config/dirtmp.conf index b4455b8..8a289fc 100644 --- a/scripts/build/config-prod/dirtmp.conf +++ b/config/dirtmp.conf @@ -24,7 +24,7 @@ source dir index dir { source = dir - path = /dbs/idx/dir + path = /var/lib/sphinx/idx/dir docinfo = extern morphology = none charset_type = utf-8 diff --git a/scripts/build/config-prod/ent.conf b/config/ent.conf similarity index 97% rename from scripts/build/config-prod/ent.conf rename to config/ent.conf index 963e77c..e1fbf46 100644 --- a/scripts/build/config-prod/ent.conf +++ b/config/ent.conf @@ -30,7 +30,7 @@ source ent index ent { source = ent - path = /dbs/idx/ent + path = /var/lib/sphinx/idx/ent mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt @@ -51,7 +51,7 @@ index ent index ent_phx { source = ent - path = /dbs/idx/ent_phx + path = /var/lib/sphinx/idx/ent_phx mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt diff --git a/scripts/build/config-prod/enttmp.conf b/config/enttmp.conf similarity index 98% rename from scripts/build/config-prod/enttmp.conf rename to config/enttmp.conf index a3ff957..f27cc2f 100644 --- a/scripts/build/config-prod/enttmp.conf +++ b/config/enttmp.conf @@ -29,7 +29,7 @@ source ent index ent { source = ent - path = /dbs/idx/ent + path = /var/lib/sphinx/idx/ent mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt diff --git a/scripts/build/config-prod/histo.conf b/config/histo.conf similarity index 96% rename from scripts/build/config-prod/histo.conf rename to config/histo.conf index 28804f8..9597a08 100644 --- a/scripts/build/config-prod/histo.conf +++ b/config/histo.conf @@ -16,7 +16,7 @@ source histo index histo { source = histo - path = /dbs/idx/histo + path = /var/lib/sphinx/idx/histo min_word_len = 3 html_strip = 1 charset_type = utf-8 diff --git a/scripts/build/config-prod/sphinx.conf b/config/sphinx.conf similarity index 98% rename from scripts/build/config-prod/sphinx.conf rename to config/sphinx.conf index 0c018fb..e235c7b 100644 --- a/scripts/build/config-prod/sphinx.conf +++ b/config/sphinx.conf @@ -103,11 +103,11 @@ searchd # log file, searchd run info is logged here # optional, default is 'searchd.log' - log = /dbs/log/searchd.log + log = /var/lib/sphinx/log/searchd.log # query log file, all search queries are logged here # optional, default is empty (do not log queries) - query_log = /dbs/log/query.log + query_log = /var/lib/sphinx/log/query.log # client read timeout, seconds # optional, default is 5 diff --git a/scripts/build/config-prod/stopwords-ent.txt b/config/stopwords-ent.txt similarity index 100% rename from scripts/build/config-prod/stopwords-ent.txt rename to config/stopwords-ent.txt diff --git a/scripts/build/config-dev/wordforms-ent.txt b/config/wordforms-ent.txt similarity index 100% rename from scripts/build/config-dev/wordforms-ent.txt rename to config/wordforms-ent.txt diff --git a/indexer/dev-indexer-all.sh b/indexer/dev-indexer-all.sh new file mode 100644 index 0000000..140d37f --- /dev/null +++ b/indexer/dev-indexer-all.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +/usr/bin/indexer act dir dir_phx ent ent_phx ciblage diff --git a/indexer/indexer-act.sh b/indexer/indexer-act.sh index 8495708..7a56cfe 100644 --- a/indexer/indexer-act.sh +++ b/indexer/indexer-act.sh @@ -1,7 +1,7 @@ -#!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/sphinxlog - -echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ACT" >> $PATH_LOG/indexer.log -$PATH_BIN/indexer --rotate act >> $PATH_LOG/indexer.log -echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ACT" >> $PATH_LOG/indexer.log +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log + +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ACT" >> $PATH_LOG/indexer.log +$PATH_BIN/indexer --rotate act >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ACT" >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh index d415ad1..395f68a 100644 --- a/indexer/indexer-ciblage.sh +++ b/indexer/indexer-ciblage.sh @@ -1,7 +1,7 @@ #!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/sphinxlog -PATH_SQL=/home/scripts/indexer/sql +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer @@ -29,6 +29,11 @@ if [ -n "$idx" ]; then $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ciblage >> $PATH_LOG/indexer.log echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + # Rotation des tables MySQL - @todo + #echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log + #mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ciblage.sql >> $PATH_LOG/indexer.log + #echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log + # Enregistrement Fin Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 9603aa3..2929987 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -1,7 +1,7 @@ #!/bin/bash PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/log -PATH_SQL=/home/scripts/indexer/sql +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index da59790..bb19dc1 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -1,7 +1,7 @@ #!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/log -PATH_SQL=/home/scripts/indexer/sql +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer diff --git a/indexer/manual-indexer-ciblage.sh b/indexer/manual-indexer-ciblage.sh new file mode 100644 index 0000000..36bdb3f --- /dev/null +++ b/indexer/manual-indexer-ciblage.sh @@ -0,0 +1,40 @@ +#!/bin/bash +PATH_BIN=/usr/local/sphinx/bin +PATH_LOG=/dbs/log +PATH_SQL=/home/scripts/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +# Is consolidated ? +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ciblage.sql) +idx=''; +for line in "$output"; do + idx="$line"; +done +# Suppression fin de ligne +idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") + +# Lancement de l'indexation si la consolidation a eu lieu +if [ -n "$idx" ]; then + if [[ "$idx" > 0 ]]; then + + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + + # Enregistrement Debut Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ciblage >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Enregistrement Fin Indexation + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + + fi +fi + + diff --git a/indexer/sql/manual-consolidate-ciblage.sql b/indexer/sql/manual-consolidate-ciblage.sql new file mode 100644 index 0000000..dd3e3db --- /dev/null +++ b/indexer/sql/manual-consolidate-ciblage.sql @@ -0,0 +1,6 @@ +SELECT id FROM sphinx_idx +WHERE createEnd BETWEEN (NOW() - INTERVAL 7 DAY) AND NOW() +AND nom = 'jo.etablissements_act' +AND indexingBegin IS NULL +AND indexingEnd IS NULL +ORDER BY createEnd DESC LIMIT 1; \ No newline at end of file diff --git a/indexer/sql/rotate-ciblage.sql b/indexer/sql/rotate-ciblage.sql new file mode 100644 index 0000000..943a757 --- /dev/null +++ b/indexer/sql/rotate-ciblage.sql @@ -0,0 +1,3 @@ +DROP TABLE IF EXISTS jo.etablissements_act_old; +RENAME TABLE jo.etablissements_act TO jo.etablissements_act_old; +RENAME TABLE jo.etablissements_act_tmp TO jo.etablissements_act; diff --git a/scripts/build/config-dev/act.conf b/scripts/build/config-dev/act.conf deleted file mode 100644 index cc1bc72..0000000 --- a/scripts/build/config-dev/act.conf +++ /dev/null @@ -1,38 +0,0 @@ - -source act -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, \ - p.libPays AS pays, l.PDetention FROM liens2 l, liensRef r, tabPays p \ - WHERE l.dateSuppr = '0000-00-00 00:00:00' AND r.id = l.idAct \ - AND ( r.siren>1000 OR (r.siren=0 AND r.adresse_pays!='FRA') OR (r.siren=0 AND r.PpPm='PP') ) \ - AND p.codPays3 = r.adresse_pays; - sql_attr_uint = idAct - sql_attr_bool = actif - sql_attr_float = PDetention - sql_attr_string = pays -} - -index act -{ - source = act - path = /dbs/sphinx/act - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} diff --git a/scripts/build/config-dev/ciblage.txt b/scripts/build/config-dev/ciblage.txt deleted file mode 100644 index 5cede41..0000000 --- a/scripts/build/config-dev/ciblage.txt +++ /dev/null @@ -1,110 +0,0 @@ -############################################################################# -# Documentation -# ============= -# -# Présence d'éléments -# IF(element=='',0,1) AS pElement -# -# Liste des champs dans la BDD -# ----------------------------- -# id -# source -# source_id -# triCode -# autre_id -# siren => LPAD(siren, 9, '000000000') -# nic => LPAD(nic, 5, '00000') -# actif => 0 ou 1 -# siege => 0 ou 1 -# raisonSociale -# enseigne -# sigle -# identite_pre -# marques => IF(marques=='',0,1) AS pMarques -# adr_num -# adr_btq -# adr_typeVoie -# adr_libVoie -# adr_comp -# adr_cp => Code postal -# adr_ville => Ville -# adr_dep => Departement (Corse 2A,2B => 201, 202) -# adr_com -# tel => IF(tel>0,1,0) AS pTel -# fax => IF(fax>0,1,0) AS pFax -# cj => Texte -# capital => Float -# capitalDev -# capitalSrc -# ape_etab => Texte => Code APE de l'etablissement -# ape_entrep => Texte => Code APE de l'entreprise -# age_entrep => Entier => Age de l'entreprise -# age_etab => Entier => Age de l'etablissement -# tca => Entier => Tranche de chiffre d'affaire -# tcaexp => Entier => Tranche de chiffre d'affaire à l'export -# teff_entrep => Tranche Effectif de l'entreprise -# teff_etab => Tranche Effectif de l'etablissement -# rang -# web => IF(web=='',0,1) AS pWeb -# mail => IF(mail=='',0,1) AS pMail -# adrDom => 0,1,2 -# lieuAct -# actifEco => 0,1 -# presentRcs => 0,1 -# procolHisto => -# tvaIntraCle -# tvaIntraValide => 0,1 -# ape4_etab -# ape4_entrep -# NaceEtab -# NaceEntrep -# dateCrea_etab => Date -# dateCrea_ent => Date -# dateImmat => Date -# eff_entrep => Entier => Effectif de l'entreprise -# eff_etab => Entier => Effectir de l'etablissement -# distSP -# achPost -# rivoli -# dirCiv -# dirNom => IF(pDirNom=='',0,1) AS pDirNom -# dirPrenom -# dirDateNaiss -# dirFct -# nbEtab => Nombre d'établissement -# nbMPubli -# sirenGrp => IF(pSirenGrp>0,1,0) AS pSirenGrp -# nbActio => Entier => Nombre d'actionnaires (actio,bool) -# nbPart => Entier => Nombre de participations (part, bool) -# bilType => -# bilAnnee => -# bilCloture => -# bilDuree => -# bilTca => -# bilEE => -# bilFL => -# bilFK => -# bilFR => -# bilGF => -# bilGP => -# bilGU => -# bilGW => -# bilHD => -# bilHH => -# bilHL => -# bilHM => -# bilHN => -# bilYP => -# avisCs -# codeCommune => -# l93_x -# l93_y -# alt -# precis -# zus => -# zru => -# zfu => -# cucs => -# zrr => -# zafr => -############################################################################# \ No newline at end of file diff --git a/scripts/build/config-dev/dir.conf b/scripts/build/config-dev/dir.conf deleted file mode 100644 index 13e5c1b..0000000 --- a/scripts/build/config-dev/dir.conf +++ /dev/null @@ -1,59 +0,0 @@ - -source dir -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, \ - prenom, YEAR(naissance_date) AS naiss_annee, MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, naissance_lieu, fonction_code, actif \ - FROM dirigeants; - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep -} - -index dir -{ - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index dir_phx -{ - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern - min_stemming_len = 4 - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} \ No newline at end of file diff --git a/scripts/build/config-dev/dirtmp.conf b/scripts/build/config-dev/dirtmp.conf deleted file mode 100644 index c642a40..0000000 --- a/scripts/build/config-dev/dirtmp.conf +++ /dev/null @@ -1,59 +0,0 @@ - -source dir -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = \ - SELECT id, siren, adr_dep, typeDir, dirSiren, civilite, CONCAT(nom,' ',naissance_nom, ' ', dirRS) AS nom, \ - prenom, YEAR(naissance_date) AS naiss_annee, MONTH(naissance_date) AS naiss_mois, \ - DAY(naissance_date) AS naiss_jour, naissance_lieu, fonction_code, actif \ - FROM dirigeants_tmp; - sql_attr_uint = dirSiren - sql_attr_uint = naiss_annee - sql_attr_uint = naiss_mois - sql_attr_uint = naiss_jour - sql_attr_uint = actif - sql_attr_uint = adr_dep -} - -index dir -{ - source = dir - path = /dbs/sphinx/dir - docinfo = extern - morphology = none - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index dir_phx -{ - source = dir - path = /dbs/sphinx/dir_phx - docinfo = extern - min_stemming_len = 4 - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} \ No newline at end of file diff --git a/scripts/build/config-dev/ent.conf b/scripts/build/config-dev/ent.conf deleted file mode 100644 index c79d218..0000000 --- a/scripts/build/config-dev/ent.conf +++ /dev/null @@ -1,71 +0,0 @@ - -source ent -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ - REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ - FROM etablissements; - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_bool = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj - sql_attr_bool = sirenValide - sql_attr_uint = rang -} - -index ent -{ - source = ent - path = /dbs/sphinx/ent - mlock = 1 - docinfo = extern - stopwords = /etc/sphinxsearch/stopwords-ent.txt - wordforms = /etc/sphinxsearch/wordforms-ent.txt - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index ent_phx -{ - source = ent - path = /dbs/sphinx/ent_phx - mlock = 1 - docinfo = extern - stopwords = /etc/sphinxsearch/stopwords-ent.txt - stopwords_unstemmed = 1 - wordforms = /etc/sphinxsearch/wordforms-ent.txt - morphology = libstemmer_fr - min_stemming_len = 4 - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} diff --git a/scripts/build/config-dev/enttmp.conf b/scripts/build/config-dev/enttmp.conf deleted file mode 100644 index 256af8c..0000000 --- a/scripts/build/config-dev/enttmp.conf +++ /dev/null @@ -1,71 +0,0 @@ - -source ent -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = jo - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, siren, nic, siege, \ - CONCAT_WS(" ", raisonSociale, enseigne, sigle, identite_pre) AS nom, \ - REPLACE(REPLACE(adr_dep, '2B', '202'), '2A', '201') AS adr_dep, \ - actif, adr_num, CONCAT_WS(" ", adr_typeVoie, adr_libVoie ,adr_comp) AS adresse, adr_cp, \ - adr_ville AS ville, cj, ape_etab, IF(siren>200,1,0) AS sirenValide, rang \ - FROM etablissements_tmp; - sql_attr_uint = siren - sql_attr_uint = nic - sql_attr_uint = siege - sql_attr_bool = actif - sql_attr_uint = adr_num - sql_attr_uint = adr_cp - sql_attr_uint = adr_dep - sql_attr_uint = cj:4 - sql_attr_bool = sirenValide - sql_attr_uint = rang -} - -index ent -{ - source = ent - path = /dbs/sphinx/ent - mlock = 1 - docinfo = extern - stopwords = /etc/sphinxsearch/stopwords-ent.txt - wordforms = /etc/sphinxsearch/wordforms-ent.txt - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} - -index ent_phx -{ - source = ent - path = /dbs/sphinx/ent_phx - mlock = 1 - docinfo = extern - stopwords = /etc/sphinxsearch/stopwords-ent.txt - stopwords_unstemmed = 1 - wordforms = /etc/sphinxsearch/wordforms-ent.txt - morphology = libstemmer_fr - min_stemming_len = 4 - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} diff --git a/scripts/build/config-dev/histo.conf b/scripts/build/config-dev/histo.conf deleted file mode 100644 index 13ab19f..0000000 --- a/scripts/build/config-dev/histo.conf +++ /dev/null @@ -1,32 +0,0 @@ - -source histo -{ - type = mysql - sql_host = 192.168.78.230 - sql_user = sphinx - sql_pass = indexer - sql_db = histobodacc - sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; - sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id - sql_attr_uint = annee1 -} - -index histo -{ - source = histo - path = /dbs/sphinx/histo - min_word_len = 3 - html_strip = 1 - charset_type = utf-8 - charset_table = 0..9, A..Z->a..z, a..z, \ - U+23, U+25, U+26, U+2B, U+3D, U+40, \ - U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ - U+C8->e, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, U+CD->i, U+CE->i, \ - U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, U+D4->o, U+D5->o, U+D6->o, \ - U+D8->o, U+D9->u, U+DA->u, U+DB->u, U+DC->u, U+DD->y, U+E0->a, U+E1->a, \ - U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E7->c, U+E8->e, U+E9->e, U+EA->e, \ - U+EB->e, U+EC->i, U+ED->i, U+EE->i, U+EF->i, U+F1->n, U+F2->o, U+F3->o, \ - U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u,U+FB->u, U+FC->u, \ - U+FD->y, U+FF->y, U+0152->U+0153, U+0153 -} \ No newline at end of file diff --git a/scripts/build/config-dev/sphinx.conf b/scripts/build/config-dev/sphinx.conf deleted file mode 100644 index 77506ae..0000000 --- a/scripts/build/config-dev/sphinx.conf +++ /dev/null @@ -1,366 +0,0 @@ - -############################################################################# -## indexer settings -############################################################################# - -indexer -{ - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 256M - - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iops = 40 - - - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 - - - # maximum xmlpipe2 field length, bytes - # optional, default is 2M - # - # max_xmlpipe2_field = 4M - - - # write buffer size, bytes - # several (currently up to 4) buffers will be allocated - # write buffers are allocated in addition to mem_limit - # optional, default is 1M - # - # write_buffer = 1M - - - # maximum file field adaptive buffer size - # optional, default is 8M, minimum is 1M - # - # max_file_field_buffer = 32M - - - # how to handle IO errors in file fields - # known values are 'ignore_field', 'skip_document', and 'fail_index' - # optional, default is 'ignore_field' - # - # on_file_field_error = skip_document - - - # how to handle syntax errors in JSON attributes - # known values are 'ignore_attr' and 'fail_index' - # optional, default is 'ignore_attr' - # - # on_json_attr_error = fail_index - - - # whether to auto-convert numeric values from strings in JSON attributes - # with auto-conversion, string value with actually numeric data - # (as in {"key":"12345"}) gets stored as a number, rather than string - # optional, allowed values are 0 and 1, default is 0 (do not convert) - # - # json_autoconv_numbers = 1 - - - # whether and how to auto-convert key names in JSON attributes - # known value is 'lowercase' - # optional, default is unspecified (do nothing) - # - # json_autoconv_keynames = lowercase - - - # lemmatizer dictionaries base path - # optional, defaut is /usr/local/share (see ./configure --datadir) - # - # lemmatizer_base = /usr/local/share/sphinx/dicts - - - # lemmatizer cache size - # improves the indexing time when the lemmatization is enabled - # optional, default is 256K - # - # lemmatizer_cache = 512M -} - -############################################################################# -## searchd settings -############################################################################# - -searchd -{ - # [hostname:]port[:protocol], or /unix/socket/path to listen on - # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) - # - # multi-value, multiple listen points are allowed - # optional, defaults are 9312:sphinx and 9306:mysql41, as below - # - # listen = 127.0.0.1 - # listen = 192.168.0.1:9312 - # listen = 9312 - # listen = /var/run/searchd.sock - listen = 3312 - - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /dbs/sphinxlog/searchd.log - - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /dbs/sphinxlog/query.log - - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 - - # request timeout, seconds - # optional, default is 5 minutes - client_timeout = 300 - - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 - - # maximum amount of persistent connections from this master to each agent host - # optional, but necessary if you use agent_persistent. It is reasonable to set the value - # as max_children, or less on the agent's hosts. - persistent_connections_limit = 30 - - # PID file, searchd process ID file name - # mandatory - pid_file = /var/run/searchd.pid - - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 - - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 - - # whether to forcibly preopen all indexes on startup - # optional, default is 1 (preopen everything) - preopen_indexes = 1 - - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 - - # attribute updates periodic flush timeout, seconds - # updates will be automatically dumped to disk this frequently - # optional, default is 0 (disable periodic flush) - # - # attr_flush_period = 900 - - - # instance-wide ondisk_dict defaults (per-index value take precedence) - # optional, default is 0 (precache all dictionaries in RAM) - # - # ondisk_dict_default = 1 - - - # MVA updates pool size - # shared between all instances of searchd, disables attr flushes! - # optional, default size is 1M - mva_updates_pool = 1M - - # max allowed network packet size - # limits both query packets from clients, and responses from agents - # optional, default size is 8M - max_packet_size = 8M - - # crash log path - # searchd will (try to) log crashed query to 'crash_log_path.PID' file - # optional, default is empty (do not create crash logs) - # - # crash_log_path = /var/log/sphinxsearch/crash - - - # max allowed per-query filter count - # optional, default is 256 - max_filters = 256 - - # max allowed per-filter values count - # optional, default is 4096 - max_filter_values = 4096 - - - # socket listen queue length - # optional, default is 5 - # - # listen_backlog = 5 - - - # per-keyword read buffer size - # optional, default is 256K - # - # read_buffer = 256K - - - # unhinted read size (currently used when reading hits) - # optional, default is 32K - # - # read_unhinted = 32K - - - # max allowed per-batch query count (aka multi-query count) - # optional, default is 32 - max_batch_queries = 32 - - - # max common subtree document cache size, per-query - # optional, default is 0 (disable subtree optimization) - # - # subtree_docs_cache = 4M - - - # max common subtree hit cache size, per-query - # optional, default is 0 (disable subtree optimization) - # - # subtree_hits_cache = 8M - - - # multi-processing mode (MPM) - # known values are none, fork, prefork, and threads - # threads is required for RT backend to work - # optional, default is fork - workers = prefork - - - # max threads to create for searching local parts of a distributed index - # optional, default is 0, which means disable multi-threaded searching - # should work with all MPMs (ie. does NOT require workers=threads) - # - dist_threads = 1 - - - # binlog files path; use empty string to disable binlog - # optional, default is build-time configured data directory - # - # binlog_path = # disable logging - # binlog_path = /var/lib/sphinxsearch/data # binlog.001 etc will be created there - - - # binlog flush/sync mode - # 0 means flush and sync every second - # 1 means flush and sync every transaction - # 2 means flush every transaction, sync every second - # optional, default is 2 - # - # binlog_flush = 2 - - - # binlog per-file size limit - # optional, default is 128M, 0 means no limit - # - # binlog_max_log_size = 256M - - - # per-thread stack size, only affects workers=threads mode - # optional, default is 64K - # - # thread_stack = 128K - - - # per-keyword expansion limit (for dict=keywords prefix searches) - # optional, default is 0 (no limit) - # - # expansion_limit = 1000 - - - # RT RAM chunks flush period - # optional, default is 0 (no periodic flush) - # - # rt_flush_period = 900 - - - # query log file format - # optional, known values are plain and sphinxql, default is plain - # - # query_log_format = sphinxql - - - # version string returned to MySQL network protocol clients - # optional, default is empty (use Sphinx version) - # - # mysql_version_string = 5.0.37 - - - # trusted plugin directory - # optional, default is empty (disable UDFs) - # - # plugin_dir = /usr/local/sphinx/lib - - - # default server-wide collation - # optional, default is libc_ci - # - # collation_server = utf8_general_ci - - - # server-wide locale for libc based collations - # optional, default is C - # - # collation_libc_locale = ru_RU.UTF-8 - - - # threaded server watchdog (only used in workers=threads mode) - # optional, values are 0 and 1, default is 1 (watchdog on) - # - # watchdog = 1 - - - # SphinxQL compatibility mode (legacy columns and their names) - # optional, default is 1 (old-style) - # - # compat_sphinxql_magics = 1 - - - # costs for max_predicted_time model, in (imaginary) nanoseconds - # optional, default is "doc=64, hit=48, skip=2048, match=64" - # - # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 - - - # current SphinxQL state (uservars etc) serialization path - # optional, default is none (do not serialize SphinxQL state) - # - # sphinxql_state = sphinxvars.sql - - - # maximum RT merge thread IO calls per second, and per-call IO size - # useful for throttling (the background) OPTIMIZE INDEX impact - # optional, default is 0 (unlimited) - # - # rt_merge_iops = 40 - # rt_merge_maxiosize = 1M - - - # interval between agent mirror pings, in milliseconds - # 0 means disable pings - # optional, default is 1000 - # - # ha_ping_interval = 0 - - - # agent mirror statistics window size, in seconds - # stats older than the window size (karma) are retired - # that is, they will not affect master choice of agents in any way - # optional, default is 60 seconds - # - # ha_period_karma = 60 - - - # delay between preforked children restarts on rotation, in milliseconds - # optional, default is 0 (no delay) - # - # prefork_rotation_throttle = 100 - - - # a prefix to prepend to the local file names when creating snippets - # with load_files and/or load_files_scatter options - # optional, default is empty - # - # snippets_file_prefix = /mnt/common/server1/ -} \ No newline at end of file diff --git a/scripts/build/config-dev/stopwords-ent.txt b/scripts/build/config-dev/stopwords-ent.txt deleted file mode 100644 index e657876..0000000 --- a/scripts/build/config-dev/stopwords-ent.txt +++ /dev/null @@ -1,47 +0,0 @@ -de -la -du -r -des -le -av -d -les -l -sci -sur -et -sarl -a -bd -societe -pl -en -rte -france -c -s -m -association -civile -ld -b -immobiliere -p -e -i -all -services -chez -gaulle -mer -republique -ecole -seine -rene -bat -f -au -grande -madame -monsieur diff --git a/scripts/build/config-prod/wordforms-ent.txt b/scripts/build/config-prod/wordforms-ent.txt deleted file mode 100644 index e095e49..0000000 --- a/scripts/build/config-prod/wordforms-ent.txt +++ /dev/null @@ -1,206 +0,0 @@ -& > et -un > 1 -deux > 2 -trois > 3 -quatre > 4 -cinq > 5 -six > 6 -sept > 7 -huit > 8 -neuf > 9 -dix > 10 -onze > 11 -douze > 12 -treize > 13 -quatorze > 14 -quinze > 15 -seize > 16 -vingt > 20 -vingts > 20 -trente > 30 -quarante > 40 -cinquante > 50 -soixante > 60 -quatrevingt > 80 -cent > 100 -cents > 100 -mille > 1000 -zac > zone -zad > zone -za > zone -zi > zone -zup > zone -general > gal -abbaye > abe -agglomeration > agl -aglo > agl -allee > all -ancien > ach -ancienne > art -anse > anse -arcade > arc -autoroute > aut -avenue > av -barriere > bre -bas > bch -bastide > bstd -baston > bast -beguinage > begi -berge > ber -boite postal > bp -boucle > bcle -boulevard > bd -bourg > brg -butte > but -campagne > cgne -camping > cpg -carre > carr -carreau > cau -carrefour > car -carriere > care -castel > cst -cavee > cav -central > ctre -centre > ctre -chalet > chl -chapelle > chp -charmille > chi -chateau > cht -chaussee > chs -che > chemin -chv > chemin -cheminement > chem -cloitre > cloi -colline > coli -contour > ctr -corniche > cor -cottage > cott -cours > crs -darse > dars -degre > deg -dsg > descente -dsc > descente -digue > dig -domaine > dom -docteur > dr -ecart > eca -ecluse > ecl -eglise > egl -enceinte > en -enclave > env -enclos > enc -escalier > esc -espace > espa -esplanade > esp -etang > eting -faubourg > fg -ferme > frm -fontaine > fon -fort > fort -forum > form -fosse > fos -foyer > foyr -galerie > gal -garenne > garn -grand > gbd -gden > grande -gr > grande -grille > gri -grimpette > grim -groupe > gpe -groupement > gpt -halle > hle -hameau > ham -haut > hch -hippodrome > hip -immeuble > imm -impasse > imp -jardin > jard -jetee > jte -levee > leve -lieu > ld -lieudit > ld -lotissement > lot -maison > mf -manoir > man -marche > mar -metro > met -montee > mte -moulin > mln -musee > mus -nouvelle > nte -palais > pal -parc > parc -parking > pkg -parvis > prv -pas > passage -pn > passage -passe > pass -passerelle > ple -patio > pat -pavillon > pav -peripherique > peri -peristyle > psty -petites > pta -pae > petite -pim > petite -prt > petite -ptr > petite -place > pl -placis > plci -plage > plag -plaine > pln -plan > plan -plateau > plt -pointe > pnt -porche > pch -porte > pte -portique > porq -poterne > pot -pourtour > pour -presqu ile > prq -promenade > prom -quai > qu -quartier > qua -raccourci > rac -raidillon > raid -rampe > rpe -rempart > rem -residence > res -rocade > roc -rond > rpt -roquet > roqt -rotonde > rtd -route > rte -rue > r -ruelle > rle -sente > sen -sentier > sen -square > sq -st > saint -ste > saint -sainte > saint -stade > stde -station > sta -terre > tpl -terrain > trn -terrasse > tsse -tertre > trt -traverse > tra -vallon > val -vallee > val -venelle > ven -vieille > vte -villa > vla -vge > village -vlge > village -voie > voi -centre cial > ccal -centre com > ccal -centre comm > ccal -centre commercial > ccal -ville > mairie -commune > mairie -conseil regional > region -conseil general > departement -companie > cie diff --git a/scripts/build/configure.php b/scripts/build/configure.php deleted file mode 100644 index ac69154..0000000 --- a/scripts/build/configure.php +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/php - -Utilisation : [actions] - -Installation : - - install list - - install [version] -Configuration : - - config etc [version] - - config list - - config server *.conf - array('archive'=>'sphinx-0.9.9.tar.gz', 'dir'=>'sphinx-0.9.9'), - '1.10' => array('archive'=>'sphinx-1.10-beta.tar.gz', 'dir'=>'sphinx-1.10-beta'), - '2.0.8' => array('archive'=>'sphinx-2.0.8-release.tar.gz', 'dir'=>'sphinx-2.0.8-release'), - '2.1.2' => array('archive'=>'sphinx-2.1.2-release.tar.gz', 'dir'=>'sphinx-2.1.2-release'), - '2.1.7' => array('archive'=>'sphinx-2.1.7-release.tar.gz', 'dir'=>'sphinx-2.1.7-release'), -); - -/** - * Nécessaire : - * ============ - * GCC : gcc g++ make - * MySQL : libmysqld-dev - * - * tree - * - */ - -if ($argv[1]=='install') -{ - //Version - if (!array_key_exists($argv[2], $SPHINX_VERSION)){ - echo "Erreur.\n"; exit; - } - $version = $argv[2]; - - //Vérification nécessaires - - //Compilation - $cmd = array(); - $cmd[] = "tar xzvf ".$SPHINX_VERSION[$version]['archive']; - $cmd[] = "cp libstemmer_c.tgz ".$SPHINX_VERSION[$version]['dir']."/"; - - foreach($cmd as $c){ - passthru($c); - } - - //Changement du répertoire pour la compilation - chdir($SPHINX_VERSION[$version]['dir']); - - $cmd = array(); - $cmd[] = "tar xzvf libstemmer_c.tgz"; - $cmd[] = "./configure --with-libstemmer --prefix=/usr/local/sphinx"; - $cmd[] = "make"; - $cmd[] = "make install"; - foreach($cmd as $c){ - passthru($c); - } -} - -if ($argv[1]=='config') -{ - //Initscript - if ($argv[2]=='etc') { - $version = $argv[3]; - exec("cp -vRi scripts/build/$version/etc/* /etc/"); - exit; - } - - //Liste des configurations - if ($argv[2]=='list') { - echo passthru("tree config/".$hostname."/"); - exit; - } - - //Configuration - $server = $argv[2]; - if (!file_exists("config/".$hostname."/".$server)){ - echo "Erreur\n"; exit; - } - - $concat = ''; - for($i=3;$i /etc/sphinxsearch/sphinx.conf"); -} - diff --git a/scripts/build/ubuntu-12.04/etc/default/sphinxsearch b/scripts/build/ubuntu-12.04/etc/default/sphinxsearch deleted file mode 100644 index 8d9bf66..0000000 --- a/scripts/build/ubuntu-12.04/etc/default/sphinxsearch +++ /dev/null @@ -1,10 +0,0 @@ -# -# Settings for the sphinxsearch searchd daemon -# Please read /usr/share/doc/sphinxsearch/README.Debian for details. -# - -# Should sphinxsearch run automatically on startup? (default: no) -# Before doing this you might want to modify /etc/sphinxsearch/sphinx.conf -# so that it works for you. -START=yes - diff --git a/scripts/build/ubuntu-12.04/etc/init.d/sphinxsearch b/scripts/build/ubuntu-12.04/etc/init.d/sphinxsearch deleted file mode 100644 index 0138977..0000000 --- a/scripts/build/ubuntu-12.04/etc/init.d/sphinxsearch +++ /dev/null @@ -1 +0,0 @@ -/lib/init/upstart-job \ No newline at end of file diff --git a/scripts/build/ubuntu-12.04/etc/init/sphinxsearch.conf b/scripts/build/ubuntu-12.04/etc/init/sphinxsearch.conf deleted file mode 100644 index c2da12e..0000000 --- a/scripts/build/ubuntu-12.04/etc/init/sphinxsearch.conf +++ /dev/null @@ -1,34 +0,0 @@ -# SphinxSearch Service - -description "SphinxSearch Daemon" -author "Andrey Aksyonoff " - -start on (net-device-up - and local-filesystems - and runlevel [2345]) -stop on runlevel [016] - -respawn -respawn limit 10 35 - -# The default of 5 seconds is too low if we have rt indices and have to flush them -kill timeout 30 - -env DEFAULTFILE="/etc/default/sphinxsearch" -pre-start script - if [ -f "$DEFAULTFILE" ]; then - . "$DEFAULTFILE" - fi - if [ "z$START" != "zyes" ]; then - stop - logger "To enable sphinxsearch, edit /etc/default/sphinxsearch and set START=yes" - exit 0 - fi - if [ ! -f /etc/sphinxsearch/sphinx.conf ]; then - logger "Please create an /etc/sphinxsearch/sphinx.conf configuration file." - logger "Templates are in the /etc/sphinxsearch/ directory." - exit 0 - fi -end script - -exec /usr/local/sphinx/bin/searchd --nodetach --config /etc/sphinxsearch/sphinx.conf diff --git a/scripts/build/ubuntu-12.04/etc/logrotate.d/indexer b/scripts/build/ubuntu-12.04/etc/logrotate.d/indexer deleted file mode 100644 index 222a5dc..0000000 --- a/scripts/build/ubuntu-12.04/etc/logrotate.d/indexer +++ /dev/null @@ -1,8 +0,0 @@ -/dbs/sphinxlog/indexer.log { - missingok - notifempty - weekly - rotate 5 - compress - delaycompress -} \ No newline at end of file diff --git a/scripts/build/ubuntu-12.04/etc/logrotate.d/searchd b/scripts/build/ubuntu-12.04/etc/logrotate.d/searchd deleted file mode 100644 index bbe7a31..0000000 --- a/scripts/build/ubuntu-12.04/etc/logrotate.d/searchd +++ /dev/null @@ -1,12 +0,0 @@ -/dbs/sphinxlog/query.log /dbs/sphinxlog/searchd.log { - missingok - notifempty - weekly - rotate 5 - compress - delaycompress - create 640 root root - postrotate - killall -SIGUSR1 searchd - endscript -} \ No newline at end of file diff --git a/scripts/build/ubuntu-14.04/etc/default/sphinxsearch b/scripts/build/ubuntu-14.04/etc/default/sphinxsearch deleted file mode 100644 index 8d9bf66..0000000 --- a/scripts/build/ubuntu-14.04/etc/default/sphinxsearch +++ /dev/null @@ -1,10 +0,0 @@ -# -# Settings for the sphinxsearch searchd daemon -# Please read /usr/share/doc/sphinxsearch/README.Debian for details. -# - -# Should sphinxsearch run automatically on startup? (default: no) -# Before doing this you might want to modify /etc/sphinxsearch/sphinx.conf -# so that it works for you. -START=yes - diff --git a/scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf b/scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf deleted file mode 100644 index c2da12e..0000000 --- a/scripts/build/ubuntu-14.04/etc/init/sphinxsearch.conf +++ /dev/null @@ -1,34 +0,0 @@ -# SphinxSearch Service - -description "SphinxSearch Daemon" -author "Andrey Aksyonoff " - -start on (net-device-up - and local-filesystems - and runlevel [2345]) -stop on runlevel [016] - -respawn -respawn limit 10 35 - -# The default of 5 seconds is too low if we have rt indices and have to flush them -kill timeout 30 - -env DEFAULTFILE="/etc/default/sphinxsearch" -pre-start script - if [ -f "$DEFAULTFILE" ]; then - . "$DEFAULTFILE" - fi - if [ "z$START" != "zyes" ]; then - stop - logger "To enable sphinxsearch, edit /etc/default/sphinxsearch and set START=yes" - exit 0 - fi - if [ ! -f /etc/sphinxsearch/sphinx.conf ]; then - logger "Please create an /etc/sphinxsearch/sphinx.conf configuration file." - logger "Templates are in the /etc/sphinxsearch/ directory." - exit 0 - fi -end script - -exec /usr/local/sphinx/bin/searchd --nodetach --config /etc/sphinxsearch/sphinx.conf diff --git a/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer b/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer deleted file mode 100644 index 54738eb..0000000 --- a/scripts/build/ubuntu-14.04/etc/logrotate.d/indexer +++ /dev/null @@ -1,8 +0,0 @@ -/dbs/log/indexer.log { - missingok - notifempty - weekly - rotate 5 - compress - delaycompress -} \ No newline at end of file diff --git a/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd b/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd deleted file mode 100644 index 17124ba..0000000 --- a/scripts/build/ubuntu-14.04/etc/logrotate.d/searchd +++ /dev/null @@ -1,12 +0,0 @@ -/dbs/log/query.log /dbs/log/searchd.log { - missingok - notifempty - weekly - rotate 5 - compress - delaycompress - create 640 root root - postrotate - killall -SIGUSR1 searchd - endscript -} \ No newline at end of file From 663caea9422d75ae89e799420e2bdaa3fed3a076 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 06:29:24 +0000 Subject: [PATCH 25/40] Reprise --- indexer/reprise-ciblage.sh | 19 +++++++++++++++++++ indexer/reprise-dir.sh | 17 +++++++++++++++++ indexer/reprise-ent.sh | 16 ++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 indexer/reprise-ciblage.sh create mode 100644 indexer/reprise-dir.sh create mode 100644 indexer/reprise-ent.sh diff --git a/indexer/reprise-ciblage.sh b/indexer/reprise-ciblage.sh new file mode 100644 index 0000000..acf33d1 --- /dev/null +++ b/indexer/reprise-ciblage.sh @@ -0,0 +1,19 @@ +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +echo "$(date '+%Y-%m-%d %H:%M:%S') === RERPISE INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + +# Sphinx rotate +echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log +$PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.reprise.conf --rotate ciblage >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN REPRISE INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + + + diff --git a/indexer/reprise-dir.sh b/indexer/reprise-dir.sh new file mode 100644 index 0000000..aaef72e --- /dev/null +++ b/indexer/reprise-dir.sh @@ -0,0 +1,17 @@ +#!/bin/bash +PATH_BIN=/usr/local/sphinx/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +echo "$(date '+%Y-%m-%d %H:%M:%S') === REPRISE INDEXATION DIR" >> $PATH_LOG/indexer.log + +# Sphinx rotate +echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log +$PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.reprise.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN REPRISE INDEXATION DIR" >> $PATH_LOG/indexer.log + \ No newline at end of file diff --git a/indexer/reprise-ent.sh b/indexer/reprise-ent.sh new file mode 100644 index 0000000..7fe0409 --- /dev/null +++ b/indexer/reprise-ent.sh @@ -0,0 +1,16 @@ +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +echo "$(date '+%Y-%m-%d %H:%M:%S') === REPRISE INDEXATION ENT" >> $PATH_LOG/indexer.log + +# Sphinx rotate +echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log +$PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.reprise.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN REPRISE INDEXATION ENT" >> $PATH_LOG/indexer.log From bf06874aaef95a9e4e3fd4e6ed0e72696a7b1bb0 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 06:29:39 +0000 Subject: [PATCH 26/40] Update doc --- README | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README b/README index 0afb7ce..7c62a3d 100644 --- a/README +++ b/README @@ -7,17 +7,15 @@ Create a /etc/sphinxsearch/sphinx.conf See in scripts/build/config-* sphinx.conf : Sphinx Engine config - *.conf : one file per index + *.conf : one file per index -Concat only needed file + * Concat only needed file Indexing -------- indexer-* slave-* reprise-* - manual-* - dev-* Ubuntu PPA repository ===================== From ac29ef902367b40b407faa9f110e8d10ab885bee Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 07:39:55 +0000 Subject: [PATCH 27/40] Name script and update doc --- README | 64 +++++++++---------- indexer/manual-act.sh | 7 ++ .../{reprise-ciblage.sh => manual-ciblage.sh} | 0 indexer/{reprise-dir.sh => manual-dir.sh} | 0 indexer/{reprise-ent.sh => manual-ent.sh} | 0 indexer/manual-histo.sh | 7 ++ indexer/manual-indexer-ciblage.sh | 40 ------------ 7 files changed, 46 insertions(+), 72 deletions(-) create mode 100644 indexer/manual-act.sh rename indexer/{reprise-ciblage.sh => manual-ciblage.sh} (100%) rename indexer/{reprise-dir.sh => manual-dir.sh} (100%) rename indexer/{reprise-ent.sh => manual-ent.sh} (100%) create mode 100644 indexer/manual-histo.sh delete mode 100644 indexer/manual-indexer-ciblage.sh diff --git a/README b/README index 7c62a3d..cbc9ce2 100644 --- a/README +++ b/README @@ -1,46 +1,46 @@ -How to Use -========== + +USAGE +===== Sphinx Engine configuration --------------------------- -Create a /etc/sphinxsearch/sphinx.conf - -See in scripts/build/config-* - sphinx.conf : Sphinx Engine config - *.conf : one file per index - - * Concat only needed file + Create a /etc/sphinxsearch/sphinx.conf + + See in scripts/build/config-* + sphinx.conf : Sphinx Engine config + *.conf : one file per index + + Table rotation is use with some index + Indexing -------- - indexer-* - slave-* - reprise-* + indexer-* : Index on master database with rotation + slave-* : Index on slave database only if table have rotate + manual-* : Manually reload index + Ubuntu PPA repository ===================== - -sudo apt-get install software-properties-common -sudo add-apt-repository ppa:builds/sphinxsearch-rel22 -sudo apt-get install libstemmer0d -sudo apt-get update && apt-get install sphinxsearch + sudo apt-get install software-properties-common + sudo add-apt-repository ppa:builds/sphinxsearch-rel22 + sudo apt-get install libstemmer0d + sudo apt-get update && apt-get install sphinxsearch Manual installation =================== -htop -bwm-ng -mysql-client-5.5 -gcc g++ make libmysqld-dev -wget -nv -O - http://snowball.tartarus.org/dist/libstemmer_c.tgz | tar zx -wget -nv -O - https://re2.googlecode.com/files/re2-20140304.tgz | tar zx -wget http://sphinxsearch.com/files/sphinx-2.2.7-release.tar.gz -tar xzvf sphinx-2.2.7-release.tar.gz -cp -R libstemmer_c/* sphinx-2.2.7-release/libstemmer_c/ -sed -i -e 's/stem_ISO_8859_1_hungarian/stem_ISO_8859_2_hungarian/g' sphinx-2.2.7-release/libstemmer_c/Makefile.in -cp -R re2/* sphinx-2.2.7-release/libre2/ -cd sphinx-2.2.7-release -./configure --with-libstemmer --with-re2 --prefix=/usr/local/sphinx -make -make install + mysql-client-5.5 + gcc g++ make libmysqld-dev + wget -nv -O - http://snowball.tartarus.org/dist/libstemmer_c.tgz | tar zx + wget -nv -O - https://re2.googlecode.com/files/re2-20140304.tgz | tar zx + wget http://sphinxsearch.com/files/sphinx-2.2.7-release.tar.gz + tar xzvf sphinx-2.2.7-release.tar.gz + cp -R libstemmer_c/* sphinx-2.2.7-release/libstemmer_c/ + sed -i -e 's/stem_ISO_8859_1_hungarian/stem_ISO_8859_2_hungarian/g' sphinx-2.2.7-release/libstemmer_c/Makefile.in + cp -R re2/* sphinx-2.2.7-release/libre2/ + cd sphinx-2.2.7-release + ./configure --with-libstemmer --with-re2 --prefix=/usr/local/sphinx + make + make install diff --git a/indexer/manual-act.sh b/indexer/manual-act.sh new file mode 100644 index 0000000..7a56cfe --- /dev/null +++ b/indexer/manual-act.sh @@ -0,0 +1,7 @@ +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log + +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ACT" >> $PATH_LOG/indexer.log +$PATH_BIN/indexer --rotate act >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ACT" >> $PATH_LOG/indexer.log diff --git a/indexer/reprise-ciblage.sh b/indexer/manual-ciblage.sh similarity index 100% rename from indexer/reprise-ciblage.sh rename to indexer/manual-ciblage.sh diff --git a/indexer/reprise-dir.sh b/indexer/manual-dir.sh similarity index 100% rename from indexer/reprise-dir.sh rename to indexer/manual-dir.sh diff --git a/indexer/reprise-ent.sh b/indexer/manual-ent.sh similarity index 100% rename from indexer/reprise-ent.sh rename to indexer/manual-ent.sh diff --git a/indexer/manual-histo.sh b/indexer/manual-histo.sh new file mode 100644 index 0000000..b178b05 --- /dev/null +++ b/indexer/manual-histo.sh @@ -0,0 +1,7 @@ +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log + +echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION HISTO" >> $PATH_LOG/indexer.log +$PATH_BIN/indexer --rotate histo >> $PATH_LOG/indexer.log +echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION HISTO" >> $PATH_LOG/indexer.log diff --git a/indexer/manual-indexer-ciblage.sh b/indexer/manual-indexer-ciblage.sh deleted file mode 100644 index 36bdb3f..0000000 --- a/indexer/manual-indexer-ciblage.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/dbs/log -PATH_SQL=/home/scripts/indexer/sql -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer - -# Is consolidated ? -output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ciblage.sql) -idx=''; -for line in "$output"; do - idx="$line"; -done -# Suppression fin de ligne -idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") - -# Lancement de l'indexation si la consolidation a eu lieu -if [ -n "$idx" ]; then - if [[ "$idx" > 0 ]]; then - - echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log - - # Enregistrement Debut Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log - - # Sphinx rotate - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log - $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ciblage >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log - - # Enregistrement Fin Indexation - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log - - echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log - - fi -fi - - From 20d0b73ca20e9c3fceb3652f62f653bd86fde9c0 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 08:38:11 +0000 Subject: [PATCH 28/40] Add slave indexing script --- indexer/dev-indexer-all.sh | 3 --- indexer/slave-ciblage.sh | 46 +++++++++++++++++++++++++++++++++++ indexer/slave-dir.sh | 46 +++++++++++++++++++++++++++++++++++ indexer/slave-ent.sh | 46 +++++++++++++++++++++++++++++++++++ indexer/sql/slave-ciblage.sql | 4 +++ indexer/sql/slave-dir.sql | 4 +++ indexer/sql/slave-ent.sql | 4 +++ 7 files changed, 150 insertions(+), 3 deletions(-) delete mode 100644 indexer/dev-indexer-all.sh create mode 100644 indexer/slave-ciblage.sh create mode 100644 indexer/slave-dir.sh create mode 100644 indexer/slave-ent.sh create mode 100644 indexer/sql/slave-ciblage.sql create mode 100644 indexer/sql/slave-dir.sql create mode 100644 indexer/sql/slave-ent.sql diff --git a/indexer/dev-indexer-all.sh b/indexer/dev-indexer-all.sh deleted file mode 100644 index 140d37f..0000000 --- a/indexer/dev-indexer-all.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -/usr/bin/indexer act dir dir_phx ent ent_phx ciblage diff --git a/indexer/slave-ciblage.sh b/indexer/slave-ciblage.sh new file mode 100644 index 0000000..b7eeae0 --- /dev/null +++ b/indexer/slave-ciblage.sh @@ -0,0 +1,46 @@ +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +# Indexation déjà lancé +FILEINDEX=$PATH_LOG/ciblage-$(date '+%Y%m%d').idx +if [ -f "$FILEINDEX" ]; then + exit 0 +fi + +# Indexation sur le master ? +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/slave-ciblage.sql) +idx=''; +for line in "$output"; do + idx="$line"; +done +# Suppression fin de ligne +idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") + +# Lancement de l'indexation +if [ -n "$idx" ]; then + if [[ "$idx" > 0 ]]; then + + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + + # Create file to indicate indexing on slave + rm -v $PATH_LOG/ciblage-*.idx + echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ciblage >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log + + fi +fi + + diff --git a/indexer/slave-dir.sh b/indexer/slave-dir.sh new file mode 100644 index 0000000..ccf93e4 --- /dev/null +++ b/indexer/slave-dir.sh @@ -0,0 +1,46 @@ +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +# Indexation déjà lancé +FILEINDEX=$PATH_LOG/dir-$(date '+%Y%m%d').idx +if [ -f "$FILEINDEX" ]; then + exit 0 +fi + +# Indexation sur le master ? +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/slave-dir.sql) +idx=''; +for line in "$output"; do + idx="$line"; +done +# Suppression fin de ligne +idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") + +# Lancement de l'indexation +if [ -n "$idx" ]; then + if [[ "$idx" > 0 ]]; then + + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log + + # Create file to indicate indexing on slave + rm -v $PATH_LOG/dir-*.idx + echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log + + fi +fi + + diff --git a/indexer/slave-ent.sh b/indexer/slave-ent.sh new file mode 100644 index 0000000..fbdab08 --- /dev/null +++ b/indexer/slave-ent.sh @@ -0,0 +1,46 @@ +#!/bin/bash +PATH_BIN=/usr/bin +PATH_LOG=/var/lib/sphinx/log +PATH_SQL=/home/indexer/sql +MYSQL_HOST=192.168.3.30 +MYSQL_USER=sphinx +MYSQL_PASS=indexer + +# Indexation déjà lancé +FILEINDEX=$PATH_LOG/ent-$(date '+%Y%m%d').idx +if [ -f "$FILEINDEX" ]; then + exit 0 +fi + +# Indexation sur le master ? +output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/slave-ent.sql) +idx=''; +for line in "$output"; do + idx="$line"; +done +# Suppression fin de ligne +idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") + +# Lancement de l'indexation +if [ -n "$idx" ]; then + if [[ "$idx" > 0 ]]; then + + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log + + # Create file to indicate indexing on slave + rm -v $PATH_LOG/ent-*.idx + echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + + # Sphinx rotate + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log + + fi +fi + + diff --git a/indexer/sql/slave-ciblage.sql b/indexer/sql/slave-ciblage.sql new file mode 100644 index 0000000..63e69ad --- /dev/null +++ b/indexer/sql/slave-ciblage.sql @@ -0,0 +1,4 @@ +SELECT id FROM sphinx_idx +WHERE indexingEnd BETWEEN (NOW() - INTERVAL 2 HOUR) AND NOW() +AND nom = 'jo.etablissements_act' +ORDER BY indexingEnd DESC LIMIT 1; \ No newline at end of file diff --git a/indexer/sql/slave-dir.sql b/indexer/sql/slave-dir.sql new file mode 100644 index 0000000..4985802 --- /dev/null +++ b/indexer/sql/slave-dir.sql @@ -0,0 +1,4 @@ +SELECT id FROM sphinx_idx +WHERE indexingEnd BETWEEN (NOW() - INTERVAL 2 HOUR) AND NOW() +AND nom = 'jo.dirigeants' +ORDER BY indexingEnd DESC LIMIT 1; \ No newline at end of file diff --git a/indexer/sql/slave-ent.sql b/indexer/sql/slave-ent.sql new file mode 100644 index 0000000..c2fe179 --- /dev/null +++ b/indexer/sql/slave-ent.sql @@ -0,0 +1,4 @@ +SELECT id FROM sphinx_idx +WHERE indexingEnd BETWEEN (NOW() - INTERVAL 2 HOUR) AND NOW() +AND nom = 'jo.etablissements' +ORDER BY indexingEnd DESC LIMIT 1; \ No newline at end of file From fe7b92c91bde7d8d5b898d1166b429027517acbb Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 09:21:42 +0000 Subject: [PATCH 29/40] Simplification des chemins --- indexer/indexer-ciblage.sh | 6 +++--- indexer/indexer-dir.sh | 6 +++--- indexer/indexer-ent.sh | 6 +++--- indexer/manual-ciblage.sh | 4 ---- indexer/manual-dir.sh | 8 ++------ indexer/manual-ent.sh | 6 +----- indexer/manual-histo.sh | 2 +- indexer/slave-ciblage.sh | 6 +++--- indexer/slave-dir.sh | 6 +++--- indexer/slave-ent.sh | 6 +++--- 10 files changed, 22 insertions(+), 34 deletions(-) diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh index 395f68a..6042312 100644 --- a/indexer/indexer-ciblage.sh +++ b/indexer/indexer-ciblage.sh @@ -1,10 +1,10 @@ #!/bin/bash -PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer +PATH_LOG=/var/lib/sphinx/log +PATH_BIN=/usr/bin +PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ciblage.sql) diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 2929987..018d9c7 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -1,10 +1,10 @@ #!/bin/bash -PATH_BIN=/usr/local/sphinx/bin -PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer +PATH_LOG=/var/lib/sphinx/log +PATH_BIN=/usr/bin +PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-dir.sql) diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index bb19dc1..018a554 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -1,10 +1,10 @@ #!/bin/bash -PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer +PATH_LOG=/var/lib/sphinx/log +PATH_BIN=/usr/bin +PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ent.sql) diff --git a/indexer/manual-ciblage.sh b/indexer/manual-ciblage.sh index acf33d1..d231d56 100644 --- a/indexer/manual-ciblage.sh +++ b/indexer/manual-ciblage.sh @@ -1,10 +1,6 @@ #!/bin/bash PATH_BIN=/usr/bin PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer echo "$(date '+%Y-%m-%d %H:%M:%S') === RERPISE INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log diff --git a/indexer/manual-dir.sh b/indexer/manual-dir.sh index aaef72e..55e399a 100644 --- a/indexer/manual-dir.sh +++ b/indexer/manual-dir.sh @@ -1,11 +1,7 @@ #!/bin/bash -PATH_BIN=/usr/local/sphinx/bin PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer - +PATH_BIN=/usr/bin + echo "$(date '+%Y-%m-%d %H:%M:%S') === REPRISE INDEXATION DIR" >> $PATH_LOG/indexer.log # Sphinx rotate diff --git a/indexer/manual-ent.sh b/indexer/manual-ent.sh index 7fe0409..dda4f32 100644 --- a/indexer/manual-ent.sh +++ b/indexer/manual-ent.sh @@ -1,10 +1,6 @@ #!/bin/bash -PATH_BIN=/usr/bin PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer +PATH_BIN=/usr/bin echo "$(date '+%Y-%m-%d %H:%M:%S') === REPRISE INDEXATION ENT" >> $PATH_LOG/indexer.log diff --git a/indexer/manual-histo.sh b/indexer/manual-histo.sh index b178b05..59b8aaa 100644 --- a/indexer/manual-histo.sh +++ b/indexer/manual-histo.sh @@ -1,6 +1,6 @@ #!/bin/bash -PATH_BIN=/usr/bin PATH_LOG=/var/lib/sphinx/log +PATH_BIN=/usr/bin echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION HISTO" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --rotate histo >> $PATH_LOG/indexer.log diff --git a/indexer/slave-ciblage.sh b/indexer/slave-ciblage.sh index b7eeae0..1b4a02d 100644 --- a/indexer/slave-ciblage.sh +++ b/indexer/slave-ciblage.sh @@ -1,10 +1,10 @@ #!/bin/bash -PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer +PATH_LOG=/var/lib/sphinx/log +PATH_BIN=/usr/bin +PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql # Indexation déjà lancé FILEINDEX=$PATH_LOG/ciblage-$(date '+%Y%m%d').idx diff --git a/indexer/slave-dir.sh b/indexer/slave-dir.sh index ccf93e4..e52ac0e 100644 --- a/indexer/slave-dir.sh +++ b/indexer/slave-dir.sh @@ -1,10 +1,10 @@ #!/bin/bash -PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer +PATH_LOG=/var/lib/sphinx/log +PATH_BIN=/usr/bin +PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql # Indexation déjà lancé FILEINDEX=$PATH_LOG/dir-$(date '+%Y%m%d').idx diff --git a/indexer/slave-ent.sh b/indexer/slave-ent.sh index fbdab08..31cebbd 100644 --- a/indexer/slave-ent.sh +++ b/indexer/slave-ent.sh @@ -1,10 +1,10 @@ #!/bin/bash -PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log -PATH_SQL=/home/indexer/sql MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer +PATH_LOG=/var/lib/sphinx/log +PATH_BIN=/usr/bin +PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql # Indexation déjà lancé FILEINDEX=$PATH_LOG/ent-$(date '+%Y%m%d').idx From 81eb6136db6cb0c35bb1d66c2a1fc090b583af97 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 13:24:16 +0000 Subject: [PATCH 30/40] /var/lib/sphinx remplacer par var/lib/sphinxsearch --- README | 4 ++-- config/act.conf | 2 +- config/ciblage.conf | 2 +- config/ciblagetmp.conf | 2 +- config/dir.conf | 2 +- config/dirtmp.conf | 2 +- config/ent.conf | 4 ++-- config/enttmp.conf | 2 +- config/histo.conf | 2 +- config/sphinx.conf | 6 +++--- indexer/indexer-act.sh | 2 +- indexer/indexer-ciblage.sh | 2 +- indexer/indexer-dir.sh | 2 +- indexer/indexer-ent.sh | 2 +- indexer/manual-act.sh | 2 +- indexer/manual-ciblage.sh | 2 +- indexer/manual-dir.sh | 2 +- indexer/manual-ent.sh | 2 +- indexer/manual-histo.sh | 2 +- indexer/slave-ciblage.sh | 2 +- indexer/slave-dir.sh | 2 +- indexer/slave-ent.sh | 2 +- 22 files changed, 26 insertions(+), 26 deletions(-) diff --git a/README b/README index cbc9ce2..69df671 100644 --- a/README +++ b/README @@ -29,8 +29,8 @@ Ubuntu PPA repository Manual installation =================== - mysql-client-5.5 - gcc g++ make libmysqld-dev + apt-get install mysql-client-5.5 + apt-get install gcc g++ make libmysqld-dev wget -nv -O - http://snowball.tartarus.org/dist/libstemmer_c.tgz | tar zx wget -nv -O - https://re2.googlecode.com/files/re2-20140304.tgz | tar zx wget http://sphinxsearch.com/files/sphinx-2.2.7-release.tar.gz diff --git a/config/act.conf b/config/act.conf index a1afa14..5e6ab3a 100644 --- a/config/act.conf +++ b/config/act.conf @@ -22,7 +22,7 @@ source act index act { source = act - path = /var/lib/sphinx/idx/act + path = /var/lib/sphinxsearch/idx/act docinfo = extern morphology = none charset_type = utf-8 diff --git a/config/ciblage.conf b/config/ciblage.conf index afd16fb..81dfd0b 100644 --- a/config/ciblage.conf +++ b/config/ciblage.conf @@ -141,7 +141,7 @@ source ciblage index ciblage { source = ciblage - path = /var/lib/sphinx/idx/ciblage + path = /var/lib/sphinxsearch/idx/ciblage docinfo = extern morphology = none charset_type = utf-8 diff --git a/config/ciblagetmp.conf b/config/ciblagetmp.conf index d69b75a..2267a57 100644 --- a/config/ciblagetmp.conf +++ b/config/ciblagetmp.conf @@ -141,7 +141,7 @@ source ciblage index ciblage { source = ciblage - path = /var/lib/sphinx/idx/ciblage + path = /var/lib/sphinxsearch/idx/ciblage docinfo = extern morphology = none charset_type = utf-8 diff --git a/config/dir.conf b/config/dir.conf index 11cfb58..f5b6eb8 100644 --- a/config/dir.conf +++ b/config/dir.conf @@ -24,7 +24,7 @@ source dir index dir { source = dir - path = /var/lib/sphinx/idx/dir + path = /var/lib/sphinxsearch/idx/dir docinfo = extern morphology = none charset_type = utf-8 diff --git a/config/dirtmp.conf b/config/dirtmp.conf index 8a289fc..35527b3 100644 --- a/config/dirtmp.conf +++ b/config/dirtmp.conf @@ -24,7 +24,7 @@ source dir index dir { source = dir - path = /var/lib/sphinx/idx/dir + path = /var/lib/sphinxsearch/idx/dir docinfo = extern morphology = none charset_type = utf-8 diff --git a/config/ent.conf b/config/ent.conf index e1fbf46..b47753c 100644 --- a/config/ent.conf +++ b/config/ent.conf @@ -30,7 +30,7 @@ source ent index ent { source = ent - path = /var/lib/sphinx/idx/ent + path = /var/lib/sphinxsearch/idx/ent mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt @@ -51,7 +51,7 @@ index ent index ent_phx { source = ent - path = /var/lib/sphinx/idx/ent_phx + path = /var/lib/sphinxsearch/idx/ent_phx mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt diff --git a/config/enttmp.conf b/config/enttmp.conf index f27cc2f..a502a70 100644 --- a/config/enttmp.conf +++ b/config/enttmp.conf @@ -29,7 +29,7 @@ source ent index ent { source = ent - path = /var/lib/sphinx/idx/ent + path = /var/lib/sphinxsearch/idx/ent mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt diff --git a/config/histo.conf b/config/histo.conf index 9597a08..e4ada27 100644 --- a/config/histo.conf +++ b/config/histo.conf @@ -16,7 +16,7 @@ source histo index histo { source = histo - path = /var/lib/sphinx/idx/histo + path = /var/lib/sphinxsearch/idx/histo min_word_len = 3 html_strip = 1 charset_type = utf-8 diff --git a/config/sphinx.conf b/config/sphinx.conf index e235c7b..db05188 100644 --- a/config/sphinx.conf +++ b/config/sphinx.conf @@ -103,11 +103,11 @@ searchd # log file, searchd run info is logged here # optional, default is 'searchd.log' - log = /var/lib/sphinx/log/searchd.log + log = /var/lib/sphinxsearch/log/searchd.log # query log file, all search queries are logged here # optional, default is empty (do not log queries) - query_log = /var/lib/sphinx/log/query.log + query_log = /var/lib/sphinxsearch/log/query.log # client read timeout, seconds # optional, default is 5 @@ -239,7 +239,7 @@ searchd # optional, default is build-time configured data directory # # binlog_path = # disable logging - # binlog_path = /var/lib/sphinxsearch/data # binlog.001 etc will be created there + # binlog_path = /var/lib/sphinxsearchsearch/data # binlog.001 etc will be created there # binlog flush/sync mode diff --git a/indexer/indexer-act.sh b/indexer/indexer-act.sh index 7a56cfe..b8b31b2 100644 --- a/indexer/indexer-act.sh +++ b/indexer/indexer-act.sh @@ -1,6 +1,6 @@ #!/bin/bash PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ACT" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --rotate act >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh index 6042312..a21c8af 100644 --- a/indexer/indexer-ciblage.sh +++ b/indexer/indexer-ciblage.sh @@ -2,7 +2,7 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 018d9c7..9e4b1f7 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -2,7 +2,7 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index 018a554..e183119 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -2,7 +2,7 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/manual-act.sh b/indexer/manual-act.sh index 7a56cfe..b8b31b2 100644 --- a/indexer/manual-act.sh +++ b/indexer/manual-act.sh @@ -1,6 +1,6 @@ #!/bin/bash PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ACT" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --rotate act >> $PATH_LOG/indexer.log diff --git a/indexer/manual-ciblage.sh b/indexer/manual-ciblage.sh index d231d56..3bad281 100644 --- a/indexer/manual-ciblage.sh +++ b/indexer/manual-ciblage.sh @@ -1,6 +1,6 @@ #!/bin/bash PATH_BIN=/usr/bin -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log echo "$(date '+%Y-%m-%d %H:%M:%S') === RERPISE INDEXATION CIBLAGE" >> $PATH_LOG/indexer.log diff --git a/indexer/manual-dir.sh b/indexer/manual-dir.sh index 55e399a..a69260f 100644 --- a/indexer/manual-dir.sh +++ b/indexer/manual-dir.sh @@ -1,5 +1,5 @@ #!/bin/bash -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin echo "$(date '+%Y-%m-%d %H:%M:%S') === REPRISE INDEXATION DIR" >> $PATH_LOG/indexer.log diff --git a/indexer/manual-ent.sh b/indexer/manual-ent.sh index dda4f32..c6b5cfb 100644 --- a/indexer/manual-ent.sh +++ b/indexer/manual-ent.sh @@ -1,5 +1,5 @@ #!/bin/bash -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin echo "$(date '+%Y-%m-%d %H:%M:%S') === REPRISE INDEXATION ENT" >> $PATH_LOG/indexer.log diff --git a/indexer/manual-histo.sh b/indexer/manual-histo.sh index 59b8aaa..4f1712e 100644 --- a/indexer/manual-histo.sh +++ b/indexer/manual-histo.sh @@ -1,5 +1,5 @@ #!/bin/bash -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION HISTO" >> $PATH_LOG/indexer.log diff --git a/indexer/slave-ciblage.sh b/indexer/slave-ciblage.sh index 1b4a02d..7b58835 100644 --- a/indexer/slave-ciblage.sh +++ b/indexer/slave-ciblage.sh @@ -2,7 +2,7 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/slave-dir.sh b/indexer/slave-dir.sh index e52ac0e..4ab145f 100644 --- a/indexer/slave-dir.sh +++ b/indexer/slave-dir.sh @@ -2,7 +2,7 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/slave-ent.sh b/indexer/slave-ent.sh index 31cebbd..7fcfbd6 100644 --- a/indexer/slave-ent.sh +++ b/indexer/slave-ent.sh @@ -2,7 +2,7 @@ MYSQL_HOST=192.168.3.30 MYSQL_USER=sphinx MYSQL_PASS=indexer -PATH_LOG=/var/lib/sphinx/log +PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql From c05a1e29d0da103886bbe9d72f2ae17ec71ce3d1 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Mon, 7 Sep 2015 14:31:46 +0000 Subject: [PATCH 31/40] Update doc --- README | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/README b/README index 69df671..21c3943 100644 --- a/README +++ b/README @@ -4,21 +4,22 @@ USAGE Sphinx Engine configuration --------------------------- - Create a /etc/sphinxsearch/sphinx.conf - - See in scripts/build/config-* + - Config is store in config/ sphinx.conf : Sphinx Engine config - *.conf : one file per index - - Table rotation is use with some index - + *.conf : one file per index + conf with "tmp" must be use only on master + - Create /etc/sphinxsearch/sphinx.conf + - Create /etc/sphinxsearch/sphinx.reprise.conf + - Data storage + /var/lib/sphinxsearch/log : Log + /var/lib/sphinxsearch/idx : Data index Indexing -------- - indexer-* : Index on master database with rotation - slave-* : Index on slave database only if table have rotate - manual-* : Manually reload index - + indexer-*.sh : Index on master database with rotation + slave-*.sh : Index on slave database only if master table have rotated + manual-*.sh : Manually reload index + Ubuntu PPA repository ===================== @@ -27,6 +28,7 @@ Ubuntu PPA repository sudo apt-get install libstemmer0d sudo apt-get update && apt-get install sphinxsearch + Manual installation =================== apt-get install mysql-client-5.5 From d593194f59655914f5929a4c61b42b997d3c998c Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 9 Sep 2015 06:18:13 +0000 Subject: [PATCH 32/40] Remove deprecated config option --- config/act.conf | 1 - config/ciblage.conf | 1 - config/ciblagetmp.conf | 1 - config/dir.conf | 2 +- config/dirtmp.conf | 4 +--- config/ent.conf | 2 -- config/enttmp.conf | 2 +- config/histo.conf | 1 - 8 files changed, 3 insertions(+), 11 deletions(-) diff --git a/config/act.conf b/config/act.conf index 5e6ab3a..fddb0ab 100644 --- a/config/act.conf +++ b/config/act.conf @@ -25,7 +25,6 @@ index act path = /var/lib/sphinxsearch/idx/act docinfo = extern morphology = none - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ diff --git a/config/ciblage.conf b/config/ciblage.conf index 81dfd0b..2cd7f55 100644 --- a/config/ciblage.conf +++ b/config/ciblage.conf @@ -144,7 +144,6 @@ index ciblage path = /var/lib/sphinxsearch/idx/ciblage docinfo = extern morphology = none - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ diff --git a/config/ciblagetmp.conf b/config/ciblagetmp.conf index 2267a57..0bab6fe 100644 --- a/config/ciblagetmp.conf +++ b/config/ciblagetmp.conf @@ -144,7 +144,6 @@ index ciblage path = /var/lib/sphinxsearch/idx/ciblage docinfo = extern morphology = none - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ diff --git a/config/dir.conf b/config/dir.conf index f5b6eb8..e94619d 100644 --- a/config/dir.conf +++ b/config/dir.conf @@ -43,7 +43,7 @@ index dir index dir_phx { source = dir - path = /dbs/idx/dir_phx + path = /var/lib/sphinxsearch/idx/dir_phx docinfo = extern min_stemming_len = 4 charset_type = utf-8 diff --git a/config/dirtmp.conf b/config/dirtmp.conf index 35527b3..9902b3e 100644 --- a/config/dirtmp.conf +++ b/config/dirtmp.conf @@ -27,7 +27,6 @@ index dir path = /var/lib/sphinxsearch/idx/dir docinfo = extern morphology = none - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ @@ -43,10 +42,9 @@ index dir index dir_phx { source = dir - path = /dbs/sphinx/dir_phx + path = /var/lib/sphinxsearch/idx/dir_phx docinfo = extern min_stemming_len = 4 - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ diff --git a/config/ent.conf b/config/ent.conf index b47753c..d0905e5 100644 --- a/config/ent.conf +++ b/config/ent.conf @@ -35,7 +35,6 @@ index ent docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt wordforms = /etc/sphinxsearch/wordforms-ent.txt - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ @@ -59,7 +58,6 @@ index ent_phx wordforms = /etc/sphinxsearch/wordforms-ent.txt morphology = libstemmer_fr min_stemming_len = 4 - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ diff --git a/config/enttmp.conf b/config/enttmp.conf index a502a70..61b2ca0 100644 --- a/config/enttmp.conf +++ b/config/enttmp.conf @@ -50,7 +50,7 @@ index ent index ent_phx { source = ent - path = /dbs/idx/ent_phx + path = /var/lib/sphinxsearch/idx/ent_phx mlock = 1 docinfo = extern stopwords = /etc/sphinxsearch/stopwords-ent.txt diff --git a/config/histo.conf b/config/histo.conf index e4ada27..7a373c6 100644 --- a/config/histo.conf +++ b/config/histo.conf @@ -19,7 +19,6 @@ index histo path = /var/lib/sphinxsearch/idx/histo min_word_len = 3 html_strip = 1 - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ From 07c52d038f7bf81651b2faceb18cabcd30ac671b Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 9 Sep 2015 06:32:17 +0000 Subject: [PATCH 33/40] Update from documentation --- config/sphinx.conf | 108 +++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 57 deletions(-) diff --git a/config/sphinx.conf b/config/sphinx.conf index db05188..42cafdf 100644 --- a/config/sphinx.conf +++ b/config/sphinx.conf @@ -5,82 +5,76 @@ indexer { - # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) - # optional, default is 32M, max is 2047M, recommended is 256M to 1024M - mem_limit = 256M + # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) + # optional, default is 128M, max is 2047M, recommended is 256M to 1024M + mem_limit = 256M - # maximum IO calls per second (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iops = 40 + # maximum IO calls per second (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iops = 40 - # maximum IO call size, bytes (for I/O throttling) - # optional, default is 0 (unlimited) - # - # max_iosize = 1048576 + # maximum IO call size, bytes (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iosize = 1048576 - # maximum xmlpipe2 field length, bytes - # optional, default is 2M - # - # max_xmlpipe2_field = 4M + # maximum xmlpipe2 field length, bytes + # optional, default is 2M + # + # max_xmlpipe2_field = 4M - # write buffer size, bytes - # several (currently up to 4) buffers will be allocated - # write buffers are allocated in addition to mem_limit - # optional, default is 1M - # - # write_buffer = 1M + # write buffer size, bytes + # several (currently up to 4) buffers will be allocated + # write buffers are allocated in addition to mem_limit + # optional, default is 1M + # + # write_buffer = 1M - # maximum file field adaptive buffer size - # optional, default is 8M, minimum is 1M - # - # max_file_field_buffer = 32M + # maximum file field adaptive buffer size + # optional, default is 8M, minimum is 1M + # + # max_file_field_buffer = 32M - # how to handle IO errors in file fields - # known values are 'ignore_field', 'skip_document', and 'fail_index' - # optional, default is 'ignore_field' - # - # on_file_field_error = skip_document + # how to handle IO errors in file fields + # known values are 'ignore_field', 'skip_document', and 'fail_index' + # optional, default is 'ignore_field' + # + # on_file_field_error = skip_document - # how to handle syntax errors in JSON attributes - # known values are 'ignore_attr' and 'fail_index' - # optional, default is 'ignore_attr' - # - # on_json_attr_error = fail_index + # how to handle syntax errors in JSON attributes + # known values are 'ignore_attr' and 'fail_index' + # optional, default is 'ignore_attr' + # + # on_json_attr_error = fail_index - # whether to auto-convert numeric values from strings in JSON attributes - # with auto-conversion, string value with actually numeric data - # (as in {"key":"12345"}) gets stored as a number, rather than string - # optional, allowed values are 0 and 1, default is 0 (do not convert) - # - # json_autoconv_numbers = 1 + # whether to auto-convert numeric values from strings in JSON attributes + # with auto-conversion, string value with actually numeric data + # (as in {"key":"12345"}) gets stored as a number, rather than string + # optional, allowed values are 0 and 1, default is 0 (do not convert) + # + # json_autoconv_numbers = 1 - # whether and how to auto-convert key names in JSON attributes - # known value is 'lowercase' - # optional, default is unspecified (do nothing) - # - # json_autoconv_keynames = lowercase + # whether and how to auto-convert key names in JSON attributes + # known value is 'lowercase' + # optional, default is unspecified (do nothing) + # + # json_autoconv_keynames = lowercase - # lemmatizer dictionaries base path - # optional, defaut is /usr/local/share (see ./configure --datadir) - # - # lemmatizer_base = /usr/local/share/sphinx/dicts - - - # lemmatizer cache size - # improves the indexing time when the lemmatization is enabled - # optional, default is 256K - # - # lemmatizer_cache = 512M + # lemmatizer cache size + # improves the indexing time when the lemmatization is enabled + # optional, default is 256K + # + # lemmatizer_cache = 512M } ############################################################################# From 91cbf44eacfc755af32fcc667e932d9f7d0504c8 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 9 Sep 2015 06:36:59 +0000 Subject: [PATCH 34/40] Update from documentation --- config/sphinx.conf | 387 +++++++++++++++++++++------------------------ 1 file changed, 182 insertions(+), 205 deletions(-) diff --git a/config/sphinx.conf b/config/sphinx.conf index 42cafdf..2f3daed 100644 --- a/config/sphinx.conf +++ b/config/sphinx.conf @@ -83,278 +83,255 @@ indexer searchd { - # [hostname:]port[:protocol], or /unix/socket/path to listen on - # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) - # - # multi-value, multiple listen points are allowed - # optional, defaults are 9312:sphinx and 9306:mysql41, as below - # - # listen = 127.0.0.1 - # listen = 192.168.0.1:9312 - # listen = 9312 - # listen = /var/run/searchd.sock - listen = 9312 - - # log file, searchd run info is logged here - # optional, default is 'searchd.log' - log = /var/lib/sphinxsearch/log/searchd.log + # [hostname:]port[:protocol], or /unix/socket/path to listen on + # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) + # + # multi-value, multiple listen points are allowed + # optional, defaults are 9312:sphinx and 9306:mysql41, as below + # + # listen = 127.0.0.1 + # listen = 192.168.0.1:9312 + # listen = 9312 + # listen = /var/run/searchd.sock + listen = 9312 + listen = 9306:mysql41 - # query log file, all search queries are logged here - # optional, default is empty (do not log queries) - query_log = /var/lib/sphinxsearch/log/query.log + # log file, searchd run info is logged here + # optional, default is 'searchd.log' + log = /var/lib/sphinxsearch/log/searchd.log - # client read timeout, seconds - # optional, default is 5 - read_timeout = 5 + # query log file, all search queries are logged here + # optional, default is empty (do not log queries) + query_log = /var/lib/sphinxsearch/log/query.log - # request timeout, seconds - # optional, default is 5 minutes - client_timeout = 300 + # client read timeout, seconds + # optional, default is 5 + read_timeout = 5 - # maximum amount of children to fork (concurrent searches to run) - # optional, default is 0 (unlimited) - max_children = 30 + # request timeout, seconds + # optional, default is 5 minutes + client_timeout = 300 - # maximum amount of persistent connections from this master to each agent host - # optional, but necessary if you use agent_persistent. It is reasonable to set the value - # as max_children, or less on the agent's hosts. - persistent_connections_limit = 30 + # maximum amount of children to fork (concurrent searches to run) + # optional, default is 0 (unlimited) + max_children = 30 - # PID file, searchd process ID file name - # mandatory - pid_file = /var/run/searchd.pid + # maximum amount of persistent connections from this master to each agent host + # optional, but necessary if you use agent_persistent. It is reasonable to set the value + # as max_children, or less on the agent's hosts. + persistent_connections_limit = 30 - # max amount of matches the daemon ever keeps in RAM, per-index - # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL - # default is 1000 (just like Google) - max_matches = 1000 + # PID file, searchd process ID file name + # mandatory + pid_file = /var/run/searchd.pid - # seamless rotate, prevents rotate stalls if precaching huge datasets - # optional, default is 1 - seamless_rotate = 1 + # seamless rotate, prevents rotate stalls if precaching huge datasets + # optional, default is 1 + seamless_rotate = 1 - # whether to forcibly preopen all indexes on startup - # optional, default is 1 (preopen everything) - preopen_indexes = 1 + # whether to forcibly preopen all indexes on startup + # optional, default is 1 (preopen everything) + preopen_indexes = 1 - # whether to unlink .old index copies on succesful rotation. - # optional, default is 1 (do unlink) - unlink_old = 1 + # whether to unlink .old index copies on succesful rotation. + # optional, default is 1 (do unlink) + unlink_old = 1 - # attribute updates periodic flush timeout, seconds - # updates will be automatically dumped to disk this frequently - # optional, default is 0 (disable periodic flush) - # - # attr_flush_period = 900 + # attribute updates periodic flush timeout, seconds + # updates will be automatically dumped to disk this frequently + # optional, default is 0 (disable periodic flush) + # + # attr_flush_period = 900 - # instance-wide ondisk_dict defaults (per-index value take precedence) - # optional, default is 0 (precache all dictionaries in RAM) - # - # ondisk_dict_default = 1 + # MVA updates pool size + # shared between all instances of searchd, disables attr flushes! + # optional, default size is 1M + mva_updates_pool = 1M + # max allowed network packet size + # limits both query packets from clients, and responses from agents + # optional, default size is 8M + max_packet_size = 8M - # MVA updates pool size - # shared between all instances of searchd, disables attr flushes! - # optional, default size is 1M - mva_updates_pool = 1M + # max allowed per-query filter count + # optional, default is 256 + max_filters = 256 - # max allowed network packet size - # limits both query packets from clients, and responses from agents - # optional, default size is 8M - max_packet_size = 8M + # max allowed per-filter values count + # optional, default is 4096 + max_filter_values = 4096 - # crash log path - # searchd will (try to) log crashed query to 'crash_log_path.PID' file - # optional, default is empty (do not create crash logs) - # - # crash_log_path = /var/log/sphinxsearch/crash + # socket listen queue length + # optional, default is 5 + # + # listen_backlog = 5 - # max allowed per-query filter count - # optional, default is 256 - max_filters = 256 - # max allowed per-filter values count - # optional, default is 4096 - max_filter_values = 4096 + # per-keyword read buffer size + # optional, default is 256K + # + # read_buffer = 256K - # socket listen queue length - # optional, default is 5 - # - # listen_backlog = 5 + # unhinted read size (currently used when reading hits) + # optional, default is 32K + # + # read_unhinted = 32K - # per-keyword read buffer size - # optional, default is 256K - # - # read_buffer = 256K + # max allowed per-batch query count (aka multi-query count) + # optional, default is 32 + max_batch_queries = 32 - # unhinted read size (currently used when reading hits) - # optional, default is 32K - # - # read_unhinted = 32K + # max common subtree document cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_docs_cache = 4M - # max allowed per-batch query count (aka multi-query count) - # optional, default is 32 - max_batch_queries = 32 + # max common subtree hit cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_hits_cache = 8M - # max common subtree document cache size, per-query - # optional, default is 0 (disable subtree optimization) - # - # subtree_docs_cache = 4M + # multi-processing mode (MPM) + # known values are none, fork, prefork, and threads + # threads is required for RT backend to work + # optional, default is threads + workers = prefork - # max common subtree hit cache size, per-query - # optional, default is 0 (disable subtree optimization) - # - # subtree_hits_cache = 8M + # max threads to create for searching local parts of a distributed index + # optional, default is 0, which means disable multi-threaded searching + # should work with all MPMs (ie. does NOT require workers=threads) + # + dist_threads = 2 - # multi-processing mode (MPM) - # known values are none, fork, prefork, and threads - # threads is required for RT backend to work - # optional, default is fork - workers = prefork + # binlog files path; use empty string to disable binlog + # optional, default is build-time configured data directory + # + # binlog_path = # disable logging + # binlog_path = @CONFDIR@/data # binlog.001 etc will be created there - # max threads to create for searching local parts of a distributed index - # optional, default is 0, which means disable multi-threaded searching - # should work with all MPMs (ie. does NOT require workers=threads) - # - dist_threads = 2 + # binlog flush/sync mode + # 0 means flush and sync every second + # 1 means flush and sync every transaction + # 2 means flush every transaction, sync every second + # optional, default is 2 + # + # binlog_flush = 2 - # binlog files path; use empty string to disable binlog - # optional, default is build-time configured data directory - # - # binlog_path = # disable logging - # binlog_path = /var/lib/sphinxsearchsearch/data # binlog.001 etc will be created there + # binlog per-file size limit + # optional, default is 128M, 0 means no limit + # + # binlog_max_log_size = 256M - # binlog flush/sync mode - # 0 means flush and sync every second - # 1 means flush and sync every transaction - # 2 means flush every transaction, sync every second - # optional, default is 2 - # - # binlog_flush = 2 + # per-thread stack size, only affects workers=threads mode + # optional, default is 64K + # + # thread_stack = 128K - # binlog per-file size limit - # optional, default is 128M, 0 means no limit - # - # binlog_max_log_size = 256M + # per-keyword expansion limit (for dict=keywords prefix searches) + # optional, default is 0 (no limit) + # + # expansion_limit = 1000 - # per-thread stack size, only affects workers=threads mode - # optional, default is 64K - # - # thread_stack = 128K + # RT RAM chunks flush period + # optional, default is 0 (no periodic flush) + # + # rt_flush_period = 900 - # per-keyword expansion limit (for dict=keywords prefix searches) - # optional, default is 0 (no limit) - # - # expansion_limit = 1000 + # query log file format + # optional, known values are plain and sphinxql, default is plain + # + # query_log_format = sphinxql - # RT RAM chunks flush period - # optional, default is 0 (no periodic flush) - # - # rt_flush_period = 900 + # version string returned to MySQL network protocol clients + # optional, default is empty (use Sphinx version) + # + # mysql_version_string = 5.0.37 - # query log file format - # optional, known values are plain and sphinxql, default is plain - # - # query_log_format = sphinxql + # trusted plugin directory + # optional, default is empty (disable UDFs) + # + # plugin_dir = /usr/local/sphinx/lib - # version string returned to MySQL network protocol clients - # optional, default is empty (use Sphinx version) - # - # mysql_version_string = 5.0.37 + # default server-wide collation + # optional, default is libc_ci + # + # collation_server = utf8_general_ci - # trusted plugin directory - # optional, default is empty (disable UDFs) - # - # plugin_dir = /usr/local/sphinx/lib + # server-wide locale for libc based collations + # optional, default is C + # + # collation_libc_locale = ru_RU.UTF-8 - # default server-wide collation - # optional, default is libc_ci - # - # collation_server = utf8_general_ci + # threaded server watchdog (only used in workers=threads mode) + # optional, values are 0 and 1, default is 1 (watchdog on) + # + # watchdog = 1 + + # costs for max_predicted_time model, in (imaginary) nanoseconds + # optional, default is "doc=64, hit=48, skip=2048, match=64" + # + # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 - # server-wide locale for libc based collations - # optional, default is C - # - # collation_libc_locale = ru_RU.UTF-8 + # current SphinxQL state (uservars etc) serialization path + # optional, default is none (do not serialize SphinxQL state) + # + # sphinxql_state = sphinxvars.sql - # threaded server watchdog (only used in workers=threads mode) - # optional, values are 0 and 1, default is 1 (watchdog on) - # - # watchdog = 1 - - # SphinxQL compatibility mode (legacy columns and their names) - # optional, default is 1 (old-style) - # - # compat_sphinxql_magics = 1 + # maximum RT merge thread IO calls per second, and per-call IO size + # useful for throttling (the background) OPTIMIZE INDEX impact + # optional, default is 0 (unlimited) + # + # rt_merge_iops = 40 + # rt_merge_maxiosize = 1M - # costs for max_predicted_time model, in (imaginary) nanoseconds - # optional, default is "doc=64, hit=48, skip=2048, match=64" - # - # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 + # interval between agent mirror pings, in milliseconds + # 0 means disable pings + # optional, default is 1000 + # + # ha_ping_interval = 0 - # current SphinxQL state (uservars etc) serialization path - # optional, default is none (do not serialize SphinxQL state) - # - # sphinxql_state = sphinxvars.sql + # agent mirror statistics window size, in seconds + # stats older than the window size (karma) are retired + # that is, they will not affect master choice of agents in any way + # optional, default is 60 seconds + # + # ha_period_karma = 60 - # maximum RT merge thread IO calls per second, and per-call IO size - # useful for throttling (the background) OPTIMIZE INDEX impact - # optional, default is 0 (unlimited) - # - # rt_merge_iops = 40 - # rt_merge_maxiosize = 1M + # delay between preforked children restarts on rotation, in milliseconds + # optional, default is 0 (no delay) + # + # prefork_rotation_throttle = 100 - # interval between agent mirror pings, in milliseconds - # 0 means disable pings - # optional, default is 1000 - # - # ha_ping_interval = 0 - - - # agent mirror statistics window size, in seconds - # stats older than the window size (karma) are retired - # that is, they will not affect master choice of agents in any way - # optional, default is 60 seconds - # - # ha_period_karma = 60 - - - # delay between preforked children restarts on rotation, in milliseconds - # optional, default is 0 (no delay) - # - # prefork_rotation_throttle = 100 - - - # a prefix to prepend to the local file names when creating snippets - # with load_files and/or load_files_scatter options - # optional, default is empty - # - # snippets_file_prefix = /mnt/common/server1/ + # a prefix to prepend to the local file names when creating snippets + # with load_files and/or load_files_scatter options + # optional, default is empty + # + # snippets_file_prefix = /mnt/common/server1/ } \ No newline at end of file From 5dda8155ea69ae08126a4866808605520519adb9 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 9 Sep 2015 06:37:17 +0000 Subject: [PATCH 35/40] Add original config from source --- config/sphinx.conf.original | 1101 +++++++++++++++++++++++++++++++++++ 1 file changed, 1101 insertions(+) create mode 100644 config/sphinx.conf.original diff --git a/config/sphinx.conf.original b/config/sphinx.conf.original new file mode 100644 index 0000000..2201f9a --- /dev/null +++ b/config/sphinx.conf.original @@ -0,0 +1,1101 @@ +# +# Sphinx configuration file sample +# +# WARNING! While this sample file mentions all available options, +# it contains (very) short helper descriptions only. Please refer to +# doc/sphinx.html for details. +# + +############################################################################# +## data source definition +############################################################################# + +source src1 +{ + # data source type. mandatory, no default value + # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc + type = mysql + + ##################################################################### + ## SQL settings (for 'mysql' and 'pgsql' types) + ##################################################################### + + # some straightforward parameters for SQL source types + sql_host = localhost + sql_user = test + sql_pass = + sql_db = test + sql_port = 3306 # optional, default is 3306 + + # UNIX socket name + # optional, default is empty (reuse client library defaults) + # usually '/var/lib/mysql/mysql.sock' on Linux + # usually '/tmp/mysql.sock' on FreeBSD + # + # sql_sock = /tmp/mysql.sock + + + # MySQL specific client connection flags + # optional, default is 0 + # + # mysql_connect_flags = 32 # enable compression + + # MySQL specific SSL certificate settings + # optional, defaults are empty + # + # mysql_ssl_cert = /etc/ssl/client-cert.pem + # mysql_ssl_key = /etc/ssl/client-key.pem + # mysql_ssl_ca = /etc/ssl/cacert.pem + + # MS SQL specific Windows authentication mode flag + # MUST be in sync with charset_type index-level setting + # optional, default is 0 + # + # mssql_winauth = 1 # use currently logged on user credentials + + + # ODBC specific DSN (data source name) + # mandatory for odbc source type, no default value + # + # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)}; + # sql_query = SELECT id, data FROM documents.csv + + + # ODBC and MS SQL specific, per-column buffer sizes + # optional, default is auto-detect + # + # sql_column_buffers = content=12M, comments=1M + + + # pre-query, executed before the main fetch query + # multi-value, optional, default is empty list of queries + # + # sql_query_pre = SET NAMES utf8 + # sql_query_pre = SET SESSION query_cache_type=OFF + + + # main document fetch query + # mandatory, integer document ID field MUST be the first selected column + sql_query = \ + SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \ + FROM documents + + + # joined/payload field fetch query + # joined fields let you avoid (slow) JOIN and GROUP_CONCAT + # payload fields let you attach custom per-keyword values (eg. for ranking) + # + # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY + # joined field QUERY should return 2 columns (docid, text) + # payload field QUERY should return 3 columns (docid, keyword, weight) + # + # REQUIRES that query results are in ascending document ID order! + # multi-value, optional, default is empty list of queries + # + # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC + # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC + + + # file based field declaration + # + # content of this field is treated as a file name + # and the file gets loaded and indexed in place of a field + # + # max file size is limited by max_file_field_buffer indexer setting + # file IO errors are non-fatal and get reported as warnings + # + # sql_file_field = content_file_path + + + # range query setup, query that must return min and max ID values + # optional, default is empty + # + # sql_query will need to reference $start and $end boundaries + # if using ranged query: + # + # sql_query = \ + # SELECT doc.id, doc.id AS group, doc.title, doc.data \ + # FROM documents doc \ + # WHERE id>=$start AND id<=$end + # + # sql_query_range = SELECT MIN(id),MAX(id) FROM documents + + + # range query step + # optional, default is 1024 + # + # sql_range_step = 1000 + + + # unsigned integer attribute declaration + # multi-value (an arbitrary number of attributes is allowed), optional + # optional bit size can be specified, default is 32 + # + # sql_attr_uint = author_id + # sql_attr_uint = forum_id:9 # 9 bits for forum_id + sql_attr_uint = group_id + + # boolean attribute declaration + # multi-value (an arbitrary number of attributes is allowed), optional + # equivalent to sql_attr_uint with 1-bit size + # + # sql_attr_bool = is_deleted + + + # bigint attribute declaration + # multi-value (an arbitrary number of attributes is allowed), optional + # declares a signed (unlike uint!) 64-bit attribute + # + # sql_attr_bigint = my_bigint_id + + + # UNIX timestamp attribute declaration + # multi-value (an arbitrary number of attributes is allowed), optional + # similar to integer, but can also be used in date functions + # + # sql_attr_timestamp = posted_ts + # sql_attr_timestamp = last_edited_ts + sql_attr_timestamp = date_added + + + # floating point attribute declaration + # multi-value (an arbitrary number of attributes is allowed), optional + # values are stored in single precision, 32-bit IEEE 754 format + # + # sql_attr_float = lat_radians + # sql_attr_float = long_radians + + + # multi-valued attribute (MVA) attribute declaration + # multi-value (an arbitrary number of attributes is allowed), optional + # MVA values are variable length lists of unsigned 32-bit integers + # + # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY] + # ATTR-TYPE is 'uint' or 'timestamp' + # SOURCE-TYPE is 'field', 'query', or 'ranged-query' + # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs + # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range' + # + # sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags + # sql_attr_multi = uint tag from ranged-query; \ + # SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \ + # SELECT MIN(docid), MAX(docid) FROM tags + + + # string attribute declaration + # multi-value (an arbitrary number of these is allowed), optional + # lets you store and retrieve strings + # + # sql_attr_string = stitle + + + # JSON attribute declaration + # multi-value (an arbitrary number of these is allowed), optional + # lets you store a JSON document as an (in-memory) attribute for later use + # + # sql_attr_json = properties + + + # combined field plus attribute declaration (from a single column) + # stores column as an attribute, but also indexes it as a full-text field + # + # sql_field_string = author + + + # post-query, executed on sql_query completion + # optional, default is empty + # + # sql_query_post = + + + # post-index-query, executed on successful indexing completion + # optional, default is empty + # $maxid expands to max document ID actually fetched from DB + # + # sql_query_post_index = REPLACE INTO counters ( id, val ) \ + # VALUES ( 'max_indexed_id', $maxid ) + + + # ranged query throttling, in milliseconds + # optional, default is 0 which means no delay + # enforces given delay before each query step + sql_ranged_throttle = 0 + + + # kill-list query, fetches the document IDs for kill-list + # k-list will suppress matches from preceding indexes in the same query + # optional, default is empty + # + # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex + + + # columns to unpack on indexer side when indexing + # multi-value, optional, default is empty list + # + # unpack_zlib = zlib_column + # unpack_mysqlcompress = compressed_column + # unpack_mysqlcompress = compressed_column_2 + + + # maximum unpacked length allowed in MySQL COMPRESS() unpacker + # optional, default is 16M + # + # unpack_mysqlcompress_maxsize = 16M + + + # hook command to run when SQL connection succeeds + # optional, default value is empty (do nothing) + # + # hook_connect = bash sql_connect.sh + + + # hook command to run after (any) SQL range query + # it may print out "minid maxid" (w/o quotes) to override the range + # optional, default value is empty (do nothing) + # + # hook_query_range = bash sql_query_range.sh + + + # hook command to run on successful indexing completion + # $maxid expands to max document ID actually fetched from DB + # optional, default value is empty (do nothing) + # + # hook_post_index = bash sql_post_index.sh $maxid + + ##################################################################### + ## xmlpipe2 settings + ##################################################################### + + # type = xmlpipe + + # shell command to invoke xmlpipe stream producer + # mandatory + # + # xmlpipe_command = cat @CONFDIR@/test.xml + + # xmlpipe2 field declaration + # multi-value, optional, default is empty + # + # xmlpipe_field = subject + # xmlpipe_field = content + + + # xmlpipe2 attribute declaration + # multi-value, optional, default is empty + # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX + # examples: + # + # xmlpipe_attr_timestamp = published + # xmlpipe_attr_uint = author_id + # xmlpipe_attr_bool = is_enabled + # xmlpipe_attr_float = latitude + # xmlpipe_attr_bigint = guid + # xmlpipe_attr_multi = tags + # xmlpipe_attr_multi_64 = tags64 + # xmlpipe_attr_string = title + # xmlpipe_attr_json = extra_data + # xmlpipe_field_string = content + + + # perform UTF-8 validation, and filter out incorrect codes + # avoids XML parser choking on non-UTF-8 documents + # optional, default is 0 + # + # xmlpipe_fixup_utf8 = 1 +} + + +# inherited source example +# +# all the parameters are copied from the parent source, +# and may then be overridden in this source definition +source src1throttled : src1 +{ + sql_ranged_throttle = 100 +} + +############################################################################# +## index definition +############################################################################# + +# local index example +# +# this is an index which is stored locally in the filesystem +# +# all indexing-time options (such as morphology and charsets) +# are configured per local index +index test1 +{ + # index type + # optional, default is 'plain' + # known values are 'plain', 'distributed', and 'rt' (see samples below) + # type = plain + + # document source(s) to index + # multi-value, mandatory + # document IDs must be globally unique across all sources + source = src1 + + # index files path and file name, without extension + # mandatory, path must be writable, extensions will be auto-appended + path = @CONFDIR@/data/test1 + + # document attribute values (docinfo) storage mode + # optional, default is 'extern' + # known values are 'none', 'extern' and 'inline' + docinfo = extern + + # dictionary type, 'crc' or 'keywords' + # crc is faster to index when no substring/wildcards searches are needed + # crc with substrings might be faster to search but is much slower to index + # (because all substrings are pre-extracted as individual keywords) + # keywords is much faster to index with substrings, and index is much (3-10x) smaller + # keywords supports wildcards, crc does not, and never will + # optional, default is 'keywords' + dict = keywords + + # memory locking for cached data (.spa and .spi), to prevent swapping + # optional, default is 0 (do not mlock) + # requires searchd to be run from root + mlock = 0 + + # a list of morphology preprocessors to apply + # optional, default is empty + # + # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', + # 'soundex', and 'metaphone'; additional preprocessors available from + # libstemmer are 'libstemmer_XXX', where XXX is algorithm code + # (see libstemmer_c/libstemmer/modules.txt) + # + # morphology = stem_en, stem_ru, soundex + # morphology = libstemmer_german + # morphology = libstemmer_sv + morphology = none + + # minimum word length at which to enable stemming + # optional, default is 1 (stem everything) + # + # min_stemming_len = 1 + + + # stopword files list (space separated) + # optional, default is empty + # contents are plain text, charset_table and stemming are both applied + # + # stopwords = @CONFDIR@/data/stopwords.txt + + + # wordforms file, in "mapfrom > mapto" plain text format + # optional, default is empty + # + # wordforms = @CONFDIR@/data/wordforms.txt + + + # tokenizing exceptions file + # optional, default is empty + # + # plain text, case sensitive, space insensitive in map-from part + # one "Map Several Words => ToASingleOne" entry per line + # + # exceptions = @CONFDIR@/data/exceptions.txt + + + # embedded file size limit + # optional, default is 16K + # + # exceptions, wordforms, and stopwords files smaller than this limit + # are stored in the index; otherwise, their paths and sizes are stored + # + # embedded_limit = 16K + + # minimum indexed word length + # default is 1 (index everything) + min_word_len = 1 + + + # ignored characters list + # optional, default value is empty + # + # ignore_chars = U+00AD + + + # minimum word prefix length to index + # optional, default is 0 (do not index prefixes) + # + # min_prefix_len = 0 + + + # minimum word infix length to index + # optional, default is 0 (do not index infixes) + # + # min_infix_len = 0 + + + # maximum substring (prefix or infix) length to index + # optional, default is 0 (do not limit substring length) + # + # max_substring_len = 8 + + + # list of fields to limit prefix/infix indexing to + # optional, default value is empty (index all fields in prefix/infix mode) + # + # prefix_fields = filename + # infix_fields = url, domain + + + # expand keywords with exact forms and/or stars when searching fit indexes + # search-time only, does not affect indexing, can be 0 or 1 + # optional, default is 0 (do not expand keywords) + # + # expand_keywords = 1 + + + # n-gram length to index, for CJK indexing + # only supports 0 and 1 for now, other lengths to be implemented + # optional, default is 0 (disable n-grams) + # + # ngram_len = 1 + + + # n-gram characters list, for CJK indexing + # optional, default is empty + # + # ngram_chars = U+3000..U+2FA1F + + + # phrase boundary characters list + # optional, default is empty + # + # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis + + + # phrase boundary word position increment + # optional, default is 0 + # + # phrase_boundary_step = 100 + + + # blended characters list + # blended chars are indexed both as separators and valid characters + # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t") + # optional, default is empty + # + # blend_chars = +, &, U+23 + + + # blended token indexing mode + # a comma separated list of blended token indexing variants + # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure + # optional, default is trim_none + # + # blend_mode = trim_tail, skip_pure + + + # whether to strip HTML tags from incoming documents + # known values are 0 (do not strip) and 1 (do strip) + # optional, default is 0 + html_strip = 0 + + # what HTML attributes to index if stripping HTML + # optional, default is empty (do not index anything) + # + # html_index_attrs = img=alt,title; a=title; + + + # what HTML elements contents to strip + # optional, default is empty (do not strip element contents) + # + # html_remove_elements = style, script + + + # whether to preopen index data files on startup + # optional, default is 0 (do not preopen), searchd-only + # + # preopen = 1 + + + # whether to enable in-place inversion (2x less disk, 90-95% speed) + # optional, default is 0 (use separate temporary files), indexer-only + # + # inplace_enable = 1 + + + # in-place fine-tuning options + # optional, defaults are listed below + # + # inplace_hit_gap = 0 # preallocated hitlist gap size + # inplace_docinfo_gap = 0 # preallocated docinfo gap size + # inplace_reloc_factor = 0.1 # relocation buffer size within arena + # inplace_write_factor = 0.1 # write buffer size within arena + + + # whether to index original keywords along with stemmed versions + # enables "=exactform" operator to work + # optional, default is 0 + # + # index_exact_words = 1 + + + # position increment on overshort (less that min_word_len) words + # optional, allowed values are 0 and 1, default is 1 + # + # overshort_step = 1 + + + # position increment on stopword + # optional, allowed values are 0 and 1, default is 1 + # + # stopword_step = 1 + + + # hitless words list + # positions for these keywords will not be stored in the index + # optional, allowed values are 'all', or a list file name + # + # hitless_words = all + # hitless_words = hitless.txt + + + # detect and index sentence and paragraph boundaries + # required for the SENTENCE and PARAGRAPH operators to work + # optional, allowed values are 0 and 1, default is 0 + # + # index_sp = 1 + + + # index zones, delimited by HTML/XML tags + # a comma separated list of tags and wildcards + # required for the ZONE operator to work + # optional, default is empty string (do not index zones) + # + # index_zones = title, h*, th + + + # index per-document and average per-index field lengths, in tokens + # required for the BM25A(), BM25F() in expression ranker + # optional, default is 0 (do not index field lenghts) + # + # index_field_lengths = 1 + + + # regular expressions (regexps) to filter the fields and queries with + # gets applied to data source fields when indexing + # gets applied to search queries when searching + # multi-value, optional, default is empty list of regexps + # + # regexp_filter = \b(\d+)\" => \1inch + # regexp_filter = (blue|red) => color + + + # list of the words considered frequent with respect to bigram indexing + # optional, default is empty + # + # bigram_freq_words = the, a, i, you, my + + + # bigram indexing mode + # known values are none, all, first_freq, both_freq + # option, default is none (do not index bigrams) + # + # bigram_index = both_freq + + + # snippet document file name prefix + # preprended to file names when generating snippets using load_files option + # WARNING, this is a prefix (not a path), trailing slash matters! + # optional, default is empty + # + # snippets_file_prefix = /mnt/mydocs/server1 + + + # whether to apply stopwords before or after stemming + # optional, default is 0 (apply stopwords after stemming) + # + # stopwords_unstemmed = 0 + + + # path to a global (cluster-wide) keyword IDFs file + # optional, default is empty (use local IDFs) + # + # global_idf = /usr/local/sphinx/var/global.idf +} + + +# inherited index example +# +# all the parameters are copied from the parent index, +# and may then be overridden in this index definition +index test1stemmed : test1 +{ + path = @CONFDIR@/data/test1stemmed + morphology = stem_en +} + + +# distributed index example +# +# this is a virtual index which can NOT be directly indexed, +# and only contains references to other local and/or remote indexes +index dist1 +{ + # 'distributed' index type MUST be specified + type = distributed + + # local index to be searched + # there can be many local indexes configured + local = test1 + local = test1stemmed + + # remote agent + # multiple remote agents may be specified + # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]' + # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]' + agent = localhost:9313:remote1 + agent = localhost:9314:remote2,remote3 + # agent = /var/run/searchd.sock:remote4 + + # remote agent mirrors groups, aka mirrors, aka HA agents + # defines 2 or more interchangeable mirrors for a given index part + # + # agent = server3:9312 | server4:9312 :indexchunk2 + # agent = server3:9312:chunk2server3 | server4:9312:chunk2server4 + # agent = server3:chunk2server3 | server4:chunk2server4 + # agent = server21|server22|server23:chunk2 + + + # blackhole remote agent, for debugging/testing + # network errors and search results will be ignored + # + # agent_blackhole = testbox:9312:testindex1,testindex2 + + + # persistenly connected remote agent + # reduces connect() pressure, requires that workers IS threads + # + # agent_persistent = testbox:9312:testindex1,testindex2 + + + # remote agent connection timeout, milliseconds + # optional, default is 1000 ms, ie. 1 sec + agent_connect_timeout = 1000 + + # remote agent query timeout, milliseconds + # optional, default is 3000 ms, ie. 3 sec + agent_query_timeout = 3000 + + # HA mirror agent strategy + # optional, defaults to ??? (random mirror) + # know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm + # + # ha_strategy = nodeads + + # path to RLP context file + # optional, defaut is empty + # + # rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml +} + + +# realtime index example +# +# you can run INSERT, REPLACE, and DELETE on this index on the fly +# using MySQL protocol (see 'listen' directive below) +index rt +{ + # 'rt' index type must be specified to use RT index + type = rt + + # index files path and file name, without extension + # mandatory, path must be writable, extensions will be auto-appended + path = @CONFDIR@/data/rt + + # RAM chunk size limit + # RT index will keep at most this much data in RAM, then flush to disk + # optional, default is 128M + # + # rt_mem_limit = 512M + + # full-text field declaration + # multi-value, mandatory + rt_field = title + rt_field = content + + # unsigned integer attribute declaration + # multi-value (an arbitrary number of attributes is allowed), optional + # declares an unsigned 32-bit attribute + rt_attr_uint = gid + + # RT indexes currently support the following attribute types: + # uint, bigint, float, timestamp, string, mva, mva64, json + # + # rt_attr_bigint = guid + # rt_attr_float = gpa + # rt_attr_timestamp = ts_added + # rt_attr_string = author + # rt_attr_multi = tags + # rt_attr_multi_64 = tags64 + # rt_attr_json = extra_data +} + +############################################################################# +## indexer settings +############################################################################# + +indexer +{ + # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) + # optional, default is 128M, max is 2047M, recommended is 256M to 1024M + mem_limit = 128M + + # maximum IO calls per second (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iops = 40 + + + # maximum IO call size, bytes (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iosize = 1048576 + + + # maximum xmlpipe2 field length, bytes + # optional, default is 2M + # + # max_xmlpipe2_field = 4M + + + # write buffer size, bytes + # several (currently up to 4) buffers will be allocated + # write buffers are allocated in addition to mem_limit + # optional, default is 1M + # + # write_buffer = 1M + + + # maximum file field adaptive buffer size + # optional, default is 8M, minimum is 1M + # + # max_file_field_buffer = 32M + + + # how to handle IO errors in file fields + # known values are 'ignore_field', 'skip_document', and 'fail_index' + # optional, default is 'ignore_field' + # + # on_file_field_error = skip_document + + + # how to handle syntax errors in JSON attributes + # known values are 'ignore_attr' and 'fail_index' + # optional, default is 'ignore_attr' + # + # on_json_attr_error = fail_index + + + # whether to auto-convert numeric values from strings in JSON attributes + # with auto-conversion, string value with actually numeric data + # (as in {"key":"12345"}) gets stored as a number, rather than string + # optional, allowed values are 0 and 1, default is 0 (do not convert) + # + # json_autoconv_numbers = 1 + + + # whether and how to auto-convert key names in JSON attributes + # known value is 'lowercase' + # optional, default is unspecified (do nothing) + # + # json_autoconv_keynames = lowercase + + + # lemmatizer cache size + # improves the indexing time when the lemmatization is enabled + # optional, default is 256K + # + # lemmatizer_cache = 512M +} + +############################################################################# +## searchd settings +############################################################################# + +searchd +{ + # [hostname:]port[:protocol], or /unix/socket/path to listen on + # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) + # + # multi-value, multiple listen points are allowed + # optional, defaults are 9312:sphinx and 9306:mysql41, as below + # + # listen = 127.0.0.1 + # listen = 192.168.0.1:9312 + # listen = 9312 + # listen = /var/run/searchd.sock + listen = 9312 + listen = 9306:mysql41 + + # log file, searchd run info is logged here + # optional, default is 'searchd.log' + log = @CONFDIR@/log/searchd.log + + # query log file, all search queries are logged here + # optional, default is empty (do not log queries) + query_log = @CONFDIR@/log/query.log + + # client read timeout, seconds + # optional, default is 5 + read_timeout = 5 + + # request timeout, seconds + # optional, default is 5 minutes + client_timeout = 300 + + # maximum amount of children to fork (concurrent searches to run) + # optional, default is 0 (unlimited) + max_children = 30 + + # maximum amount of persistent connections from this master to each agent host + # optional, but necessary if you use agent_persistent. It is reasonable to set the value + # as max_children, or less on the agent's hosts. + persistent_connections_limit = 30 + + # PID file, searchd process ID file name + # mandatory + pid_file = @CONFDIR@/log/searchd.pid + + # seamless rotate, prevents rotate stalls if precaching huge datasets + # optional, default is 1 + seamless_rotate = 1 + + # whether to forcibly preopen all indexes on startup + # optional, default is 1 (preopen everything) + preopen_indexes = 1 + + # whether to unlink .old index copies on succesful rotation. + # optional, default is 1 (do unlink) + unlink_old = 1 + + # attribute updates periodic flush timeout, seconds + # updates will be automatically dumped to disk this frequently + # optional, default is 0 (disable periodic flush) + # + # attr_flush_period = 900 + + + # MVA updates pool size + # shared between all instances of searchd, disables attr flushes! + # optional, default size is 1M + mva_updates_pool = 1M + + # max allowed network packet size + # limits both query packets from clients, and responses from agents + # optional, default size is 8M + max_packet_size = 8M + + # max allowed per-query filter count + # optional, default is 256 + max_filters = 256 + + # max allowed per-filter values count + # optional, default is 4096 + max_filter_values = 4096 + + + # socket listen queue length + # optional, default is 5 + # + # listen_backlog = 5 + + + # per-keyword read buffer size + # optional, default is 256K + # + # read_buffer = 256K + + + # unhinted read size (currently used when reading hits) + # optional, default is 32K + # + # read_unhinted = 32K + + + # max allowed per-batch query count (aka multi-query count) + # optional, default is 32 + max_batch_queries = 32 + + + # max common subtree document cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_docs_cache = 4M + + + # max common subtree hit cache size, per-query + # optional, default is 0 (disable subtree optimization) + # + # subtree_hits_cache = 8M + + + # multi-processing mode (MPM) + # known values are none, fork, prefork, and threads + # threads is required for RT backend to work + # optional, default is threads + workers = threads # for RT to work + + + # max threads to create for searching local parts of a distributed index + # optional, default is 0, which means disable multi-threaded searching + # should work with all MPMs (ie. does NOT require workers=threads) + # + # dist_threads = 4 + + + # binlog files path; use empty string to disable binlog + # optional, default is build-time configured data directory + # + # binlog_path = # disable logging + # binlog_path = @CONFDIR@/data # binlog.001 etc will be created there + + + # binlog flush/sync mode + # 0 means flush and sync every second + # 1 means flush and sync every transaction + # 2 means flush every transaction, sync every second + # optional, default is 2 + # + # binlog_flush = 2 + + + # binlog per-file size limit + # optional, default is 128M, 0 means no limit + # + # binlog_max_log_size = 256M + + + # per-thread stack size, only affects workers=threads mode + # optional, default is 64K + # + # thread_stack = 128K + + + # per-keyword expansion limit (for dict=keywords prefix searches) + # optional, default is 0 (no limit) + # + # expansion_limit = 1000 + + + # RT RAM chunks flush period + # optional, default is 0 (no periodic flush) + # + # rt_flush_period = 900 + + + # query log file format + # optional, known values are plain and sphinxql, default is plain + # + # query_log_format = sphinxql + + + # version string returned to MySQL network protocol clients + # optional, default is empty (use Sphinx version) + # + # mysql_version_string = 5.0.37 + + + # trusted plugin directory + # optional, default is empty (disable UDFs) + # + # plugin_dir = /usr/local/sphinx/lib + + + # default server-wide collation + # optional, default is libc_ci + # + # collation_server = utf8_general_ci + + + # server-wide locale for libc based collations + # optional, default is C + # + # collation_libc_locale = ru_RU.UTF-8 + + + # threaded server watchdog (only used in workers=threads mode) + # optional, values are 0 and 1, default is 1 (watchdog on) + # + # watchdog = 1 + + + # costs for max_predicted_time model, in (imaginary) nanoseconds + # optional, default is "doc=64, hit=48, skip=2048, match=64" + # + # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 + + + # current SphinxQL state (uservars etc) serialization path + # optional, default is none (do not serialize SphinxQL state) + # + # sphinxql_state = sphinxvars.sql + + + # maximum RT merge thread IO calls per second, and per-call IO size + # useful for throttling (the background) OPTIMIZE INDEX impact + # optional, default is 0 (unlimited) + # + # rt_merge_iops = 40 + # rt_merge_maxiosize = 1M + + + # interval between agent mirror pings, in milliseconds + # 0 means disable pings + # optional, default is 1000 + # + # ha_ping_interval = 0 + + + # agent mirror statistics window size, in seconds + # stats older than the window size (karma) are retired + # that is, they will not affect master choice of agents in any way + # optional, default is 60 seconds + # + # ha_period_karma = 60 + + + # delay between preforked children restarts on rotation, in milliseconds + # optional, default is 0 (no delay) + # + # prefork_rotation_throttle = 100 + + + # a prefix to prepend to the local file names when creating snippets + # with load_files and/or load_files_scatter options + # optional, default is empty + # + # snippets_file_prefix = /mnt/common/server1/ +} + +############################################################################# +## common settings +############################################################################# + +common +{ + + # lemmatizer dictionaries base path + # optional, defaut is /usr/local/share (see ./configure --datadir) + # + # lemmatizer_base = /usr/local/share/sphinx/dicts + + # path to RLP root directory + # optional, defaut is /usr/local/share (see ./configure --datadir) + # + # rlp_root = /usr/local/share/sphinx/rlp + + # path to RLP environment file + # optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir) + # + # rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml +} + +# --eof-- From dfdb914b17113ea715f4e3503af0300bb3b66666 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 9 Sep 2015 08:56:30 +0000 Subject: [PATCH 36/40] Remove deprecated charset_type --- config/dir.conf | 2 -- 1 file changed, 2 deletions(-) diff --git a/config/dir.conf b/config/dir.conf index e94619d..b8134e9 100644 --- a/config/dir.conf +++ b/config/dir.conf @@ -27,7 +27,6 @@ index dir path = /var/lib/sphinxsearch/idx/dir docinfo = extern morphology = none - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ @@ -46,7 +45,6 @@ index dir_phx path = /var/lib/sphinxsearch/idx/dir_phx docinfo = extern min_stemming_len = 4 - charset_type = utf-8 charset_table = 0..9, A..Z->a..z, a..z, \ U+23, U+25, U+26, U+2B, U+3D, U+40, \ U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, U+C7->c, \ From ead70004b6c1eb4e0723da4e4935b837b08ea187 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 23 Sep 2015 10:06:52 +0000 Subject: [PATCH 37/40] Ecriture lancement indexation pour suivi dans les logs --- indexer/slave-dir.sh | 4 +++- indexer/slave-ent.sh | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/indexer/slave-dir.sh b/indexer/slave-dir.sh index 4ab145f..0e2a6ff 100644 --- a/indexer/slave-dir.sh +++ b/indexer/slave-dir.sh @@ -21,6 +21,8 @@ done # Suppression fin de ligne idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") +echo "$(date '+%Y-%m-%d %H:%M:%S') === TENTATIVE INDEXATION DIR" >> $PATH_LOG/indexer.log + # Lancement de l'indexation if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then @@ -28,7 +30,7 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log # Create file to indicate indexing on slave - rm -v $PATH_LOG/dir-*.idx + rm -f $PATH_LOG/dir-*.idx echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX # Sphinx rotate diff --git a/indexer/slave-ent.sh b/indexer/slave-ent.sh index 7fcfbd6..5d6f5c5 100644 --- a/indexer/slave-ent.sh +++ b/indexer/slave-ent.sh @@ -21,6 +21,8 @@ done # Suppression fin de ligne idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") +echo "$(date '+%Y-%m-%d %H:%M:%S') === TENTATIVE INDEXATION ENT" >> $PATH_LOG/indexer.log + # Lancement de l'indexation if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then @@ -28,7 +30,7 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log # Create file to indicate indexing on slave - rm -v $PATH_LOG/ent-*.idx + rm -f $PATH_LOG/ent-*.idx echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX # Sphinx rotate From a49cabee3607e31a702a6a43661e2df9829cc391 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Wed, 23 Sep 2015 13:02:19 +0000 Subject: [PATCH 38/40] =?UTF-8?q?Fichier=20pour=20eviter=20l'indexation=20?= =?UTF-8?q?une=20deuxi=C3=A8me=20fois?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- indexer/indexer-dir.sh | 12 ++++++++++++ indexer/indexer-ent.sh | 14 +++++++++++++- indexer/slave-dir.sh | 2 +- indexer/slave-ent.sh | 2 +- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 9e4b1f7..f217664 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -6,6 +6,12 @@ PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql +# Indexation déjà lancé +FILEINDEX=$PATH_LOG/dir-$(date '+%Y%m%d').idx +if [ -f "$FILEINDEX" ]; then + exit 0 +fi + # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-dir.sql) idx=''; @@ -44,6 +50,10 @@ if [ -n "$idx" ]; then elif [ -z "$nbT" ]; then elif [ "$nbT" -gt "$nbC" ]; then + # Create file to indicate indexing + rm -f $PATH_LOG/dir-*.idx + echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + # Enregistrement Debut Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log @@ -59,6 +69,8 @@ if [ -n "$idx" ]; then # Enregistrement Fin Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + + echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index e183119..d1e3559 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -6,6 +6,12 @@ PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql +# Indexation déjà lancé +FILEINDEX=$PATH_LOG/ent-$(date '+%Y%m%d').idx +if [ -f "$FILEINDEX" ]; then + exit 0 +fi + # Is consolidated ? output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 < $PATH_SQL/consolidate-ent.sql) idx=''; @@ -19,7 +25,7 @@ idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then - echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log # Nombre de lignes dans la table etablissements output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-ent.sql) @@ -44,6 +50,10 @@ if [ -n "$idx" ]; then elif [ -z "$nbT" ]; then elif [ "$nbT" -gt "$nbC" ]; then + # Create file to indicate indexing + rm -f $PATH_LOG/ent-*.idx + echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + # Enregistrement Debut Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log @@ -60,6 +70,8 @@ if [ -n "$idx" ]; then # Enregistrement Fin Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log + echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log fi diff --git a/indexer/slave-dir.sh b/indexer/slave-dir.sh index 0e2a6ff..d12412f 100644 --- a/indexer/slave-dir.sh +++ b/indexer/slave-dir.sh @@ -29,7 +29,7 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log - # Create file to indicate indexing on slave + # Create file to indicate indexing rm -f $PATH_LOG/dir-*.idx echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX diff --git a/indexer/slave-ent.sh b/indexer/slave-ent.sh index 5d6f5c5..9238313 100644 --- a/indexer/slave-ent.sh +++ b/indexer/slave-ent.sh @@ -29,7 +29,7 @@ if [ -n "$idx" ]; then echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log - # Create file to indicate indexing on slave + # Create file to indicate indexing rm -f $PATH_LOG/ent-*.idx echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX From 72fc6522baedc955d2268b04b4ab94497bfd1931 Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Thu, 5 Nov 2015 15:57:24 +0000 Subject: [PATCH 39/40] =?UTF-8?q?Gestion=20des=20param=C3=A8tres=20par=20d?= =?UTF-8?q?es=20variables=20d'environnement?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README | 12 +++++++++++- config/act.conf | 6 +++--- config/ciblage.conf | 6 +++--- config/ciblagetmp.conf | 6 +++--- config/dir.conf | 6 +++--- config/dirtmp.conf | 6 +++--- config/ent.conf | 6 +++--- config/enttmp.conf | 6 +++--- config/histo.conf | 7 +++---- env.sh | 12 ++++++++++++ indexer/indexer-ciblage.sh | 6 +++--- indexer/indexer-dir.sh | 7 +++---- indexer/indexer-ent.sh | 6 +++--- indexer/slave-ciblage.sh | 6 +++--- indexer/slave-dir.sh | 6 +++--- indexer/slave-ent.sh | 6 +++--- 16 files changed, 65 insertions(+), 45 deletions(-) create mode 100644 env.sh diff --git a/README b/README index 21c3943..cc294ee 100644 --- a/README +++ b/README @@ -1,4 +1,3 @@ - USAGE ===== @@ -13,7 +12,18 @@ Sphinx Engine configuration - Data storage /var/lib/sphinxsearch/log : Log /var/lib/sphinxsearch/idx : Data index + + Environment variables + ENV_MYSQLHOST + ENV_MYSQLUSER + ENV_MYSQLPASS + sed -i -e 's/ENV_MYSQLHOST/VALUE/g' sphinx.*.conf + sed -i -e 's/ENV_MYSQLUSER/VALUE/g' sphinx.*.conf + sed -i -e 's/ENV_MYSQLPASS/VALUE/g' sphinx.*.conf + sed -i -e 's/ENV_MYSQLHOST/VALUE/g' indexer/*.sh + sed -i -e 's/ENV_MYSQLUSER/VALUE/g' indexer/*.sh + sed -i -e 's/ENV_MYSQLPASS/VALUE/g' indexer/*.sh Indexing -------- indexer-*.sh : Index on master database with rotation diff --git a/config/act.conf b/config/act.conf index fddb0ab..755ecbd 100644 --- a/config/act.conf +++ b/config/act.conf @@ -2,10 +2,10 @@ source act { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = SELECT l.id, l.idPar, l.idAct, r.siren, r.actif, r.PpPm, r.RS, r.adresse_cp, r.adresse_ville, \ diff --git a/config/ciblage.conf b/config/ciblage.conf index 2cd7f55..054cbca 100644 --- a/config/ciblage.conf +++ b/config/ciblage.conf @@ -2,10 +2,10 @@ source ciblage { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = \ diff --git a/config/ciblagetmp.conf b/config/ciblagetmp.conf index 0bab6fe..4ee4d6f 100644 --- a/config/ciblagetmp.conf +++ b/config/ciblagetmp.conf @@ -2,10 +2,10 @@ source ciblage { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = \ diff --git a/config/dir.conf b/config/dir.conf index b8134e9..64433bb 100644 --- a/config/dir.conf +++ b/config/dir.conf @@ -2,10 +2,10 @@ source dir { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = \ diff --git a/config/dirtmp.conf b/config/dirtmp.conf index 9902b3e..9915ad6 100644 --- a/config/dirtmp.conf +++ b/config/dirtmp.conf @@ -2,10 +2,10 @@ source dir { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = \ diff --git a/config/ent.conf b/config/ent.conf index d0905e5..0763e9a 100644 --- a/config/ent.conf +++ b/config/ent.conf @@ -2,10 +2,10 @@ source ent { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = SELECT id, siren, nic, siege, \ diff --git a/config/enttmp.conf b/config/enttmp.conf index 61b2ca0..eeb679f 100644 --- a/config/enttmp.conf +++ b/config/enttmp.conf @@ -2,10 +2,10 @@ source ent { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = jo sql_query_pre = SET NAMES utf8 sql_query = SELECT id, siren, nic, siege, \ diff --git a/config/histo.conf b/config/histo.conf index 7a373c6..4187754 100644 --- a/config/histo.conf +++ b/config/histo.conf @@ -2,14 +2,13 @@ source histo { type = mysql - sql_host = 192.168.3.30 + sql_host = ENV_MYSQLHOST sql_port = 3306 - sql_user = sphinx - sql_pass = indexer + sql_user = ENV_MYSQLUSER + sql_pass = ENV_MYSQLPASS sql_db = histobodacc sql_query_pre = SET NAMES utf8 sql_query = SELECT id, nomFichier, annee1, dateBod, texte FROM bodacc_ocr; - sql_query_info = SELECT * FROM bodacc_ocr WHERE id=$id sql_attr_uint = annee1 } diff --git a/env.sh b/env.sh new file mode 100644 index 0000000..5d6566c --- /dev/null +++ b/env.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +ENV_MYSQLHOST=192.168.3.28 +ENV_MYSQLUSER=sphinx +ENV_MYSQLPASS=indexer + +sed -i -e s/ENV_MYSQLHOST/"$ENV_MYSQLHOST"/g sphinx.*.conf +sed -i -e s/ENV_MYSQLUSER/"$ENV_MYSQLUSER"/g sphinx.*.conf +sed -i -e s/ENV_MYSQLPASS/"$ENV_MYSQLPASS"/g sphinx.*.conf +sed -i -e s/ENV_MYSQLHOST/"$ENV_MYSQLHOST"/g indexer/*.sh +sed -i -e s/ENV_MYSQLUSER/"$ENV_MYSQLUSER"/g indexer/*.sh +sed -i -e s/ENV_MYSQLPASS/"$ENV_MYSQLPASS"/g indexer/*.sh \ No newline at end of file diff --git a/indexer/indexer-ciblage.sh b/indexer/indexer-ciblage.sh index a21c8af..337f02e 100644 --- a/indexer/indexer-ciblage.sh +++ b/indexer/indexer-ciblage.sh @@ -1,7 +1,7 @@ #!/bin/bash -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer +MYSQL_HOST=ENV_MYSQLHOST +MYSQL_USER=ENV_MYSQLUSER +MYSQL_PASS=ENV_MYSQLPASS PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index f217664..8960b2a 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -1,7 +1,7 @@ #!/bin/bash -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer +MYSQL_HOST=ENV_MYSQLHOST +MYSQL_USER=ENV_MYSQLUSER +MYSQL_PASS=ENV_MYSQLPASS PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql @@ -71,7 +71,6 @@ if [ -n "$idx" ]; then mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX - echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log fi diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index d1e3559..e13e169 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -1,7 +1,7 @@ #!/bin/bash -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer +MYSQL_HOST=ENV_MYSQLHOST +MYSQL_USER=ENV_MYSQLUSER +MYSQL_PASS=ENV_MYSQLPASS PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/slave-ciblage.sh b/indexer/slave-ciblage.sh index 7b58835..3850ebe 100644 --- a/indexer/slave-ciblage.sh +++ b/indexer/slave-ciblage.sh @@ -1,7 +1,7 @@ #!/bin/bash -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer +MYSQL_HOST=ENV_MYSQLHOST +MYSQL_USER=ENV_MYSQLUSER +MYSQL_PASS=ENV_MYSQLPASS PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/slave-dir.sh b/indexer/slave-dir.sh index d12412f..2d797ee 100644 --- a/indexer/slave-dir.sh +++ b/indexer/slave-dir.sh @@ -1,7 +1,7 @@ #!/bin/bash -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer +MYSQL_HOST=ENV_MYSQLHOST +MYSQL_USER=ENV_MYSQLUSER +MYSQL_PASS=ENV_MYSQLPASS PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql diff --git a/indexer/slave-ent.sh b/indexer/slave-ent.sh index 9238313..c45a8ec 100644 --- a/indexer/slave-ent.sh +++ b/indexer/slave-ent.sh @@ -1,7 +1,7 @@ #!/bin/bash -MYSQL_HOST=192.168.3.30 -MYSQL_USER=sphinx -MYSQL_PASS=indexer +MYSQL_HOST=ENV_MYSQLHOST +MYSQL_USER=ENV_MYSQLUSER +MYSQL_PASS=ENV_MYSQLPASS PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin PATH_SQL=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/sql From fb3596f696b948b323caa6b48805aa59aa14885b Mon Sep 17 00:00:00 2001 From: Michael RICOIS Date: Fri, 6 Nov 2015 08:21:44 +0000 Subject: [PATCH 40/40] Correction --- README | 4 ++++ env.sh | 2 +- indexer/indexer-dir.sh | 24 ++++++++++++------------ indexer/indexer-ent.sh | 18 ++++++++---------- indexer/manual-ent.sh | 2 +- indexer/sql/count-ent.sql | 2 +- indexer/sql/count-enttmp.sql | 2 +- 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/README b/README index cc294ee..3e58024 100644 --- a/README +++ b/README @@ -24,6 +24,10 @@ Sphinx Engine configuration sed -i -e 's/ENV_MYSQLHOST/VALUE/g' indexer/*.sh sed -i -e 's/ENV_MYSQLUSER/VALUE/g' indexer/*.sh sed -i -e 's/ENV_MYSQLPASS/VALUE/g' indexer/*.sh + + - Crontab + - Logrotate + Indexing -------- indexer-*.sh : Index on master database with rotation diff --git a/env.sh b/env.sh index 5d6566c..2cd9609 100644 --- a/env.sh +++ b/env.sh @@ -9,4 +9,4 @@ sed -i -e s/ENV_MYSQLUSER/"$ENV_MYSQLUSER"/g sphinx.*.conf sed -i -e s/ENV_MYSQLPASS/"$ENV_MYSQLPASS"/g sphinx.*.conf sed -i -e s/ENV_MYSQLHOST/"$ENV_MYSQLHOST"/g indexer/*.sh sed -i -e s/ENV_MYSQLUSER/"$ENV_MYSQLUSER"/g indexer/*.sh -sed -i -e s/ENV_MYSQLPASS/"$ENV_MYSQLPASS"/g indexer/*.sh \ No newline at end of file +sed -i -e s/ENV_MYSQLPASS/"$ENV_MYSQLPASS"/g indexer/*.sh diff --git a/indexer/indexer-dir.sh b/indexer/indexer-dir.sh index 8960b2a..0847019 100755 --- a/indexer/indexer-dir.sh +++ b/indexer/indexer-dir.sh @@ -24,9 +24,9 @@ idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") # Lancement de l'indexation si la consolidation a eu lieu if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then - - echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log - + + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION DIR" >> $PATH_LOG/indexer.log + # Nombre de lignes dans la table etablissements output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-dir.sql) nbC=''; @@ -45,9 +45,11 @@ if [ -n "$idx" ]; then done # Suppression fin de ligne nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") - + if [ -z "$nbC" ]; then + echo $nbC elif [ -z "$nbT" ]; then + echo $nbT elif [ "$nbT" -gt "$nbC" ]; then # Create file to indicate indexing @@ -59,21 +61,19 @@ if [ -n "$idx" ]; then # Sphinx rotate echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log - $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log - - # Rotation des tables MySQL + $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate dir dir_phx >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log + + # Rotation des tables MySQL echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log - mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log - echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log + mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-dir.sql >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Fin" >> $PATH_LOG/indexer.log # Enregistrement Fin Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION DIR" >> $PATH_LOG/indexer.log - fi - fi fi diff --git a/indexer/indexer-ent.sh b/indexer/indexer-ent.sh index e13e169..5b1279b 100755 --- a/indexer/indexer-ent.sh +++ b/indexer/indexer-ent.sh @@ -25,7 +25,7 @@ idx=$(echo $idx|sed -e "s/^[idx ]*//g"||sed -e "s/[ ]*$//g") if [ -n "$idx" ]; then if [[ "$idx" > 0 ]]; then - echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log + echo "$(date '+%Y-%m-%d %H:%M:%S') === INDEXATION ENT" >> $PATH_LOG/indexer.log # Nombre de lignes dans la table etablissements output=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/count-ent.sql) @@ -47,21 +47,22 @@ if [ -n "$idx" ]; then nbT=$(echo $nbT|sed -e "s/^[nbT ]*//g"||sed -e "s/[ ]*$//g") if [ -z "$nbC" ]; then + echo $nbC elif [ -z "$nbT" ]; then - elif [ "$nbT" -gt "$nbC" ]; then - + echo $nbT + elif [ "$nbT" -gt "$nbC" ]; then # Create file to indicate indexing rm -f $PATH_LOG/ent-*.idx echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX # Enregistrement Debut Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingBegin=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log - + # Sphinx rotate echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Fin" >> $PATH_LOG/indexer.log - + # Rotation des tables MySQL echo "$(date '+%Y-%m-%d %H:%M:%S') - Rotation Table - Debut" >> $PATH_LOG/indexer.log mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS jo < $PATH_SQL/rotate-ent.sql >> $PATH_LOG/indexer.log @@ -69,12 +70,9 @@ if [ -n "$idx" ]; then # Enregistrement Fin Indexation mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS sdv1 -e "UPDATE sphinx_idx SET indexingEnd=NOW() WHERE id=$idx" >> $PATH_LOG/indexer.log - - echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX - - echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log + echo "FIN $(date '+%Y-%m-%d %H:%M:%S')" >> $FILEINDEX + echo "$(date '+%Y-%m-%d %H:%M:%S') === FIN INDEXATION ENT" >> $PATH_LOG/indexer.log fi - fi fi diff --git a/indexer/manual-ent.sh b/indexer/manual-ent.sh index c6b5cfb..2d4f647 100644 --- a/indexer/manual-ent.sh +++ b/indexer/manual-ent.sh @@ -3,7 +3,7 @@ PATH_LOG=/var/lib/sphinxsearch/log PATH_BIN=/usr/bin echo "$(date '+%Y-%m-%d %H:%M:%S') === REPRISE INDEXATION ENT" >> $PATH_LOG/indexer.log - + # Sphinx rotate echo "$(date '+%Y-%m-%d %H:%M:%S') - Sphinx - Debut" >> $PATH_LOG/indexer.log $PATH_BIN/indexer --config /etc/sphinxsearch/sphinx.reprise.conf --rotate ent ent_phx >> $PATH_LOG/indexer.log diff --git a/indexer/sql/count-ent.sql b/indexer/sql/count-ent.sql index e1aa32b..464c086 100644 --- a/indexer/sql/count-ent.sql +++ b/indexer/sql/count-ent.sql @@ -1 +1 @@ -SELECT count(*) AS etab from etablissements; +SELECT count(*) AS nb from etablissements; diff --git a/indexer/sql/count-enttmp.sql b/indexer/sql/count-enttmp.sql index cb3ce37..4b349d9 100644 --- a/indexer/sql/count-enttmp.sql +++ b/indexer/sql/count-enttmp.sql @@ -1 +1 @@ -SELECT count(*) AS etab_tmp from etablissements_tmp; +SELECT count(*) AS nb from etablissements_tmp;