sdsphinx/config/DOC
Michael RICOIS c92efda6c9 Actualise
2013-10-28 08:40:59 +00:00

157 lines
3.9 KiB
Plaintext

La base de données doit communiqué en UTF-8
sql_query_pre = SET NAMES utf8
charset_type = utf-8
# 'utf-8' defaults for English and Russian
charset_table = 0..9, A..Z->a..z, _, a..z, \
U+410..U+42F->U+430..U+44F, U+430..U+44F
# For french
charset_table = 0..9, A..Z->a..z, a..z, \
U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, \
U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, \
U+0152->U+0153, U+0153 \
Morphology
==========
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
# 'soundex', and 'metaphone'; additional preprocessors available from
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
# (see libstemmer_c/libstemmer/modules.txt)
morphology = libstemmer_french
Taille minimum des mots ou l'on applique la morphology
min_stemming_len = 4
According to libstimmer.c/libstimmer/modules.txt, the french module can be refered to by either
french, fr, fre, fra... french UTF_8,ISO_8859_1 french,fr,fre,fra
Utilisation des stopwords
=========================
une, le, la, les, de, du, dans, l', d', @
Utilisation des wordforms
=========================
& > ET
Les chiffres
un > 1
deux > 2
trois > 3
quatre > 4
cinq > 5
six > 6
sept > 7
huit > 8
neuf > 9
dix > 10
onze > 11
douze > 12
treize > 13
quatorze > 14
quinze > 15
seize > 16
vingt > 20
vingts > 20
trente > 30
quarante > 40
cinquante > 50
soixante > 60
quatrevingt > 80
cent > 100
cents > 100
mille > 1000
Les chiffres romain
Index DIR
=========
source dir
{
type = mysql
sql_host = 192.168.3.30
sql_user = sphinx
sql_pass = indexer
sql_db = jo
sql_query_pre =
sql_query = SELECT id, siren, actif, \
IF(civilite='M',1,IF(civilite='MME' OR civilite='MLLE',2,0)) AS genre, \
CONCAT_WS(' ',dirRS,nom,naissance_nom) AS nom, prenom, \
YEAR(naissance_date) AS naiss_annee, \
MONTH(naissance_date) AS naiss_mois, \
DAY(naissance_date) AS naiss_jour, \
naissance_lieu, adr_dep \
FROM rncs_dirigeants;
sql_attr_uint = naiss_annee
sql_attr_uint = naiss_mois
sql_attr_uint = naiss_jour
sql_attr_uint = siren
sql_attr_uint = actif
sql_attr_uint = genre
sql_attr_uint = adr_dep
}
index dir
{
source = dir
path = /dbs/sphinx/dir
docinfo = extern
morphology = none
charset_type = sbcs
charset_table = 0..9, A..Z->a..z, a..z, \
U+23, U+25, U+26, U+2B, U+3D, U+40, \
U+C0..U+DE->U+E0..U+FE, U+DF, U+E0..U+FF
}
Index ENT
=========
source ent
{
type = mysql
sql_host = 192.168.3.30
sql_user = sphinx
sql_pass = indexer
sql_db = jo
sql_query_pre =
sql_query = SELECT id, siren, nic, siege, raisonSociale, enseigne, sigle, identite_pre, \
CONCAT_WS(' ', REPLACE(raisonSociale,' ',''), REPLACE(enseigne,' ',''), \
REPLACE(sigle,' ',''),REPLACE(identite_pre,' ','')) AS nom, \
adr_dep, actif, adr_num, adr_typeVoie, CONCAT(adr_libVoie,' ',adr_comp) AS adresse, \
adr_cp, adr_ville AS ville, cj, ape_etab, (siren>200) AS sirenValide, tel, fax, rang \
FROM etablissements_tmp;
sql_attr_str2ordinal = adr_typeVoie
sql_attr_uint = siren
sql_attr_uint = nic
sql_attr_uint = siege
sql_attr_uint = adr_num
sql_attr_uint = adr_cp
sql_attr_uint = adr_dep
sql_attr_uint = actif
sql_attr_uint = cj
sql_attr_uint = sirenValide
sql_attr_uint = rang
}
index ent
{
source = source_ent
path = /dbs/sphinx/ent
docinfo = extern
morphology = libstemmer_french
wordforms = /usr/local/sphinx/etc/wordforms.txt
charset_type = sbcs
charset_table = 0..9, A..Z->a..z, a..z, \
U+23, U+25, U+26, U+2B, U+3D, U+40, \
U+C0..U+DE->U+E0..U+FE, U+DF, U+E0..U+FF
}