157 lines
3.9 KiB
Plaintext
157 lines
3.9 KiB
Plaintext
La base de données doit communiqué en UTF-8
|
|
|
|
sql_query_pre = SET NAMES utf8
|
|
charset_type = utf-8
|
|
|
|
# 'utf-8' defaults for English and Russian
|
|
charset_table = 0..9, A..Z->a..z, _, a..z, \
|
|
U+410..U+42F->U+430..U+44F, U+430..U+44F
|
|
|
|
# For french
|
|
charset_table = 0..9, A..Z->a..z, a..z, \
|
|
U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, \
|
|
U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, \
|
|
U+0152->U+0153, U+0153 \
|
|
|
|
Morphology
|
|
==========
|
|
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
|
|
# 'soundex', and 'metaphone'; additional preprocessors available from
|
|
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
|
|
# (see libstemmer_c/libstemmer/modules.txt)
|
|
|
|
morphology = libstemmer_french
|
|
|
|
Taille minimum des mots ou l'on applique la morphology
|
|
min_stemming_len = 4
|
|
|
|
|
|
According to libstimmer.c/libstimmer/modules.txt, the french module can be refered to by either
|
|
french, fr, fre, fra... french UTF_8,ISO_8859_1 french,fr,fre,fra
|
|
|
|
|
|
Utilisation des stopwords
|
|
=========================
|
|
une, le, la, les, de, du, dans, l', d', @
|
|
|
|
Utilisation des wordforms
|
|
=========================
|
|
|
|
& > ET
|
|
|
|
Les chiffres
|
|
|
|
un > 1
|
|
deux > 2
|
|
trois > 3
|
|
quatre > 4
|
|
cinq > 5
|
|
six > 6
|
|
sept > 7
|
|
huit > 8
|
|
neuf > 9
|
|
dix > 10
|
|
onze > 11
|
|
douze > 12
|
|
treize > 13
|
|
quatorze > 14
|
|
quinze > 15
|
|
seize > 16
|
|
vingt > 20
|
|
vingts > 20
|
|
trente > 30
|
|
quarante > 40
|
|
cinquante > 50
|
|
soixante > 60
|
|
quatrevingt > 80
|
|
cent > 100
|
|
cents > 100
|
|
mille > 1000
|
|
|
|
Les chiffres romain
|
|
|
|
|
|
Index DIR
|
|
=========
|
|
|
|
source dir
|
|
{
|
|
type = mysql
|
|
sql_host = 192.168.3.30
|
|
sql_user = sphinx
|
|
sql_pass = indexer
|
|
sql_db = jo
|
|
sql_query_pre =
|
|
sql_query = SELECT id, siren, actif, \
|
|
IF(civilite='M',1,IF(civilite='MME' OR civilite='MLLE',2,0)) AS genre, \
|
|
CONCAT_WS(' ',dirRS,nom,naissance_nom) AS nom, prenom, \
|
|
YEAR(naissance_date) AS naiss_annee, \
|
|
MONTH(naissance_date) AS naiss_mois, \
|
|
DAY(naissance_date) AS naiss_jour, \
|
|
naissance_lieu, adr_dep \
|
|
FROM rncs_dirigeants;
|
|
sql_attr_uint = naiss_annee
|
|
sql_attr_uint = naiss_mois
|
|
sql_attr_uint = naiss_jour
|
|
sql_attr_uint = siren
|
|
sql_attr_uint = actif
|
|
sql_attr_uint = genre
|
|
sql_attr_uint = adr_dep
|
|
}
|
|
|
|
index dir
|
|
{
|
|
source = dir
|
|
path = /dbs/sphinx/dir
|
|
docinfo = extern
|
|
morphology = none
|
|
charset_type = sbcs
|
|
charset_table = 0..9, A..Z->a..z, a..z, \
|
|
U+23, U+25, U+26, U+2B, U+3D, U+40, \
|
|
U+C0..U+DE->U+E0..U+FE, U+DF, U+E0..U+FF
|
|
}
|
|
|
|
Index ENT
|
|
=========
|
|
source ent
|
|
{
|
|
type = mysql
|
|
sql_host = 192.168.3.30
|
|
sql_user = sphinx
|
|
sql_pass = indexer
|
|
sql_db = jo
|
|
sql_query_pre =
|
|
sql_query = SELECT id, siren, nic, siege, raisonSociale, enseigne, sigle, identite_pre, \
|
|
CONCAT_WS(' ', REPLACE(raisonSociale,' ',''), REPLACE(enseigne,' ',''), \
|
|
REPLACE(sigle,' ',''),REPLACE(identite_pre,' ','')) AS nom, \
|
|
adr_dep, actif, adr_num, adr_typeVoie, CONCAT(adr_libVoie,' ',adr_comp) AS adresse, \
|
|
adr_cp, adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, tel, fax, rang \
|
|
FROM etablissements_tmp;
|
|
|
|
sql_attr_str2ordinal = adr_typeVoie
|
|
sql_attr_uint = siren
|
|
sql_attr_uint = nic
|
|
sql_attr_uint = siege
|
|
sql_attr_uint = adr_num
|
|
sql_attr_uint = adr_cp
|
|
sql_attr_uint = adr_dep
|
|
sql_attr_uint = actif
|
|
sql_attr_uint = cj
|
|
sql_attr_uint = sirenValide
|
|
sql_attr_uint = rang
|
|
}
|
|
|
|
index ent
|
|
{
|
|
source = source_ent
|
|
path = /dbs/sphinx/ent
|
|
docinfo = extern
|
|
morphology = libstemmer_french
|
|
wordforms = /usr/local/sphinx/etc/wordforms.txt
|
|
charset_type = sbcs
|
|
charset_table = 0..9, A..Z->a..z, a..z, \
|
|
U+23, U+25, U+26, U+2B, U+3D, U+40, \
|
|
U+C0..U+DE->U+E0..U+FE, U+DF, U+E0..U+FF
|
|
}
|
|
|