893f4da173
Modification de certains chemins Ajout du framework dans l'extranet
582 lines
17 KiB
PHP
582 lines
17 KiB
PHP
<?php
|
|
|
|
//
|
|
// $Id: sphinxapi.php,v 1.38 2006/12/06 00:21:29 shodan Exp $
|
|
//
|
|
|
|
//
|
|
// Copyright (c) 2001-2006, Andrew Aksyonoff. All rights reserved.
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License. You should have
|
|
// received a copy of the GPL license along with this program; if you
|
|
// did not, you can find it at http://www.gnu.org/
|
|
//
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
// PHP version of Sphinx searchd client (PHP API)
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
/// known searchd commands
|
|
define ( "SEARCHD_COMMAND_SEARCH", 0 );
|
|
define ( "SEARCHD_COMMAND_EXCERPT", 1 );
|
|
|
|
/// current client-side command implementation versions
|
|
define ( "VER_COMMAND_SEARCH", 0x104 );
|
|
define ( "VER_COMMAND_EXCERPT", 0x100 );
|
|
|
|
/// known searchd status codes
|
|
define ( "SEARCHD_OK", 0 );
|
|
define ( "SEARCHD_ERROR", 1 );
|
|
define ( "SEARCHD_RETRY", 2 );
|
|
|
|
/// known match modes
|
|
define ( "SPH_MATCH_ALL", 0 );
|
|
define ( "SPH_MATCH_ANY", 1 );
|
|
define ( "SPH_MATCH_PHRASE", 2 );
|
|
define ( "SPH_MATCH_BOOLEAN", 3 );
|
|
define ( "SPH_MATCH_EXTENDED", 4 );
|
|
|
|
/// known sort modes
|
|
define ( "SPH_SORT_RELEVANCE", 0 );
|
|
define ( "SPH_SORT_ATTR_DESC", 1 );
|
|
define ( "SPH_SORT_ATTR_ASC", 2 );
|
|
define ( "SPH_SORT_TIME_SEGMENTS", 3 );
|
|
define ( "SPH_SORT_EXTENDED", 4 );
|
|
|
|
/// known attribute types
|
|
define ( "SPH_ATTR_INTEGER", 1 );
|
|
define ( "SPH_ATTR_TIMESTAMP", 2 );
|
|
|
|
/// known grouping functions
|
|
define ( "SPH_GROUPBY_DAY", 0 );
|
|
define ( "SPH_GROUPBY_WEEK", 1 );
|
|
define ( "SPH_GROUPBY_MONTH", 2 );
|
|
define ( "SPH_GROUPBY_YEAR", 3 );
|
|
define ( "SPH_GROUPBY_ATTR", 4 );
|
|
|
|
/// sphinx searchd client class
|
|
class SphinxClient
|
|
{
|
|
var $_host; ///< searchd host (default is "localhost")
|
|
var $_port; ///< searchd port (default is 3312)
|
|
var $_offset; ///< how many records to seek from result-set start (default is 0)
|
|
var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
|
|
var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
|
|
var $_weights; ///< per-field weights (default is 1 for all fields)
|
|
var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
|
|
var $_sortby; ///< attribute to sort by (defualt is "")
|
|
var $_min_id; ///< min ID to match (default is 0)
|
|
var $_max_id; ///< max ID to match (default is UINT_MAX)
|
|
var $_min; ///< attribute name to min-value hash (for range filters)
|
|
var $_max; ///< attribute name to max-value hash (for range filters)
|
|
var $_filter; ///< attribute name to values set hash (for values-set filters)
|
|
var $_groupby; ///< group-by attribute name
|
|
var $_groupfunc;///< function to pre-process group-by attribute value with
|
|
var $_maxmatches;///< max matches to retrieve
|
|
|
|
var $_error; ///< last error message
|
|
var $_warning; ///< last warning message
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
// common stuff
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
/// create a new client object and fill defaults
|
|
function SphinxClient ()
|
|
{
|
|
$this->_host = "localhost";
|
|
$this->_port = 3312;
|
|
$this->_offset = 0;
|
|
$this->_limit = 20;
|
|
$this->_mode = SPH_MATCH_ALL;
|
|
$this->_weights = array ();
|
|
$this->_sort = SPH_SORT_RELEVANCE;
|
|
$this->_sortby = "";
|
|
$this->_min_id = 0;
|
|
$this->_max_id = 0xFFFFFFFF;
|
|
$this->_min = array ();
|
|
$this->_max = array ();
|
|
$this->_filter = array ();
|
|
$this->_groupby = "";
|
|
$this->_groupfunc = SPH_GROUPBY_DAY;
|
|
$this->_maxmatches = 1000;
|
|
|
|
$this->_error = "";
|
|
$this->_warning = "";
|
|
}
|
|
|
|
/// get last error message (string)
|
|
function GetLastError ()
|
|
{
|
|
return $this->_error;
|
|
}
|
|
|
|
/// get last warning message (string)
|
|
function GetLastWarning ()
|
|
{
|
|
return $this->_warning;
|
|
}
|
|
|
|
/// set searchd server
|
|
function SetServer ( $host, $port )
|
|
{
|
|
assert ( is_string($host) );
|
|
assert ( is_int($port) );
|
|
$this->_host = $host;
|
|
$this->_port = $port;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
/// connect to searchd server
|
|
function _Connect ()
|
|
{
|
|
if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
|
|
{
|
|
$this->_error = "connection to {$this->_host}:{$this->_port} failed";
|
|
return false;
|
|
}
|
|
|
|
// check version
|
|
list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
|
|
$v = (int)$v;
|
|
if ( $v<1 )
|
|
{
|
|
fclose ( $fp );
|
|
$this->_error = "expected searchd protocol version 1+, got version '$v'";
|
|
return false;
|
|
}
|
|
|
|
// all ok, send my version
|
|
fwrite ( $fp, pack ( "N", 1 ) );
|
|
return $fp;
|
|
}
|
|
|
|
/// get and check response packet from searchd server
|
|
function _GetResponse ( $fp, $client_ver )
|
|
{
|
|
$header = fread ( $fp, 8 );
|
|
list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
|
|
$response = "";
|
|
$left = $len;
|
|
while ( $left>0 && !feof($fp) )
|
|
{
|
|
$chunk = fread ( $fp, $left );
|
|
if ( $chunk )
|
|
{
|
|
$response .= $chunk;
|
|
$left -= strlen($chunk);
|
|
}
|
|
}
|
|
fclose ( $fp );
|
|
|
|
// check response
|
|
$read = strlen ( $response );
|
|
if ( !$response || $read!=$len )
|
|
{
|
|
$this->_error = $len
|
|
? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
|
|
: "received zero-sized searchd response";
|
|
return false;
|
|
}
|
|
|
|
// check status
|
|
if ( $status==SEARCHD_ERROR )
|
|
{
|
|
$this->_error = "searchd error: " . substr ( $response, 4 );
|
|
return false;
|
|
}
|
|
if ( $status==SEARCHD_RETRY )
|
|
{
|
|
$this->_error = "temporary searchd error: " . substr ( $response, 4 );
|
|
return false;
|
|
}
|
|
if ( $status!=SEARCHD_OK )
|
|
{
|
|
$this->_error = "unknown status code '$status'";
|
|
return false;
|
|
}
|
|
|
|
// check version
|
|
if ( $ver<$client_ver )
|
|
{
|
|
$this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
|
|
$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
|
|
}
|
|
|
|
return $response;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
// searching
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
/// set match offset, count, and max number to retrieve
|
|
function SetLimits ( $offset, $limit, $max=0 )
|
|
{
|
|
assert ( is_int($offset) );
|
|
assert ( is_int($limit) );
|
|
assert ( $offset>=0 );
|
|
assert ( $limit>0 );
|
|
assert ( $max>=0 );
|
|
$this->_offset = $offset;
|
|
$this->_limit = $limit;
|
|
if ( $max>0 )
|
|
$this->_maxmatches = $max;
|
|
}
|
|
|
|
/// set match mode
|
|
function SetMatchMode ( $mode )
|
|
{
|
|
assert ( $mode==SPH_MATCH_ALL
|
|
|| $mode==SPH_MATCH_ANY
|
|
|| $mode==SPH_MATCH_PHRASE
|
|
|| $mode==SPH_MATCH_BOOLEAN
|
|
|| $mode==SPH_MATCH_EXTENDED );
|
|
$this->_mode = $mode;
|
|
}
|
|
|
|
/// set sort mode
|
|
function SetSortMode ( $mode, $sortby="" )
|
|
{
|
|
assert (
|
|
$mode==SPH_SORT_RELEVANCE ||
|
|
$mode==SPH_SORT_ATTR_DESC ||
|
|
$mode==SPH_SORT_ATTR_ASC ||
|
|
$mode==SPH_SORT_TIME_SEGMENTS ||
|
|
$mode==SPH_SORT_EXTENDED );
|
|
assert ( is_string($sortby) );
|
|
assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
|
|
|
|
$this->_sort = $mode;
|
|
$this->_sortby = $sortby;
|
|
}
|
|
|
|
/// set per-field weights
|
|
function SetWeights ( $weights )
|
|
{
|
|
assert ( is_array($weights) );
|
|
foreach ( $weights as $weight )
|
|
assert ( is_int($weight) );
|
|
|
|
$this->_weights = $weights;
|
|
}
|
|
|
|
/// set IDs range to match
|
|
/// only match those records where document ID
|
|
/// is beetwen $min and $max (including $min and $max)
|
|
function SetIDRange ( $min, $max )
|
|
{
|
|
assert ( is_int($min) );
|
|
assert ( is_int($max) );
|
|
assert ( $min<=$max );
|
|
$this->_min_id = $min;
|
|
$this->_max_id = $max;
|
|
}
|
|
|
|
/// set values filter
|
|
/// only match those records where $attribute column values
|
|
/// are in specified set
|
|
function SetFilter ( $attribute, $values )
|
|
{
|
|
assert ( is_string($attribute) );
|
|
assert ( is_array($values) );
|
|
assert ( count($values) );
|
|
|
|
if ( is_array($values) && count($values) )
|
|
{
|
|
foreach ( $values as $value )
|
|
assert ( is_int($value) );
|
|
|
|
$this->_filter[$attribute] = $values;
|
|
}
|
|
}
|
|
|
|
/// set range filter
|
|
/// only match those records where $attribute column value
|
|
/// is beetwen $min and $max (including $min and $max)
|
|
function SetFilterRange ( $attribute, $min, $max )
|
|
{
|
|
assert ( is_string($attribute) );
|
|
assert ( is_int($min) );
|
|
assert ( is_int($max) );
|
|
assert ( $min<=$max );
|
|
|
|
$this->_min[$attribute] = $min;
|
|
$this->_max[$attribute] = $max;
|
|
}
|
|
|
|
/// set grouping attribute and function
|
|
///
|
|
/// in grouping mode, all matches are assigned to different groups
|
|
/// based on grouping function value.
|
|
///
|
|
/// each group keeps track of the total match count, and the best match
|
|
/// (in this group) according to current sorting function.
|
|
///
|
|
/// the final result set contains one best match per group, with
|
|
/// grouping function value and matches count attached. result set
|
|
/// is sorted by grouping function value, in descending order.
|
|
///
|
|
/// for example, if sorting by relevance and grouping by "published"
|
|
/// attribute with SPH_GROUPBY_DAY function, then the result set will
|
|
/// contain one most relevant match per each day when there were any
|
|
/// matches published, with day number and per-day match count attached,
|
|
/// and sorted by day number in descending order (ie. recent days first).
|
|
function SetGroupBy ( $attribute, $func )
|
|
{
|
|
assert ( is_string($attribute) );
|
|
assert ( $func==SPH_GROUPBY_DAY
|
|
|| $func==SPH_GROUPBY_WEEK
|
|
|| $func==SPH_GROUPBY_MONTH
|
|
|| $func==SPH_GROUPBY_YEAR
|
|
|| $func==SPH_GROUPBY_ATTR );
|
|
|
|
$this->_groupby = $attribute;
|
|
$this->_groupfunc = $func;
|
|
}
|
|
|
|
/// connect to searchd server and run given search query
|
|
///
|
|
/// $query is query string
|
|
/// $index is index name to query, default is "*" which means to query all indexes
|
|
///
|
|
/// returns false on failure
|
|
/// returns hash which has the following keys on success:
|
|
/// "matches"
|
|
/// hash which maps found document_id to ( "weight", "group" ) hash
|
|
/// "total"
|
|
/// total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
|
/// "total_found"
|
|
/// total amount of matching documents in index
|
|
/// "time"
|
|
/// search time
|
|
/// "words"
|
|
/// hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
|
|
function Query ( $query, $index="*" )
|
|
{
|
|
if (!( $fp = $this->_Connect() ))
|
|
return false;
|
|
|
|
/////////////////
|
|
// build request
|
|
/////////////////
|
|
|
|
$req = pack ( "NNNN", $this->_offset, $this->_limit, $this->_mode, $this->_sort ); // mode and limits
|
|
$req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
|
|
$req .= pack ( "N", strlen($query) ) . $query; // query itself
|
|
$req .= pack ( "N", count($this->_weights) ); // weights
|
|
foreach ( $this->_weights as $weight )
|
|
$req .= pack ( "N", (int)$weight );
|
|
$req .= pack ( "N", strlen($index) ) . $index; // indexes
|
|
$req .= // id range
|
|
pack ( "N", (int)$this->_min_id ) .
|
|
pack ( "N", (int)$this->_max_id );
|
|
|
|
// filters
|
|
$req .= pack ( "N", count($this->_min) + count($this->_filter) );
|
|
|
|
foreach ( $this->_min as $attr => $min )
|
|
$req .=
|
|
pack ( "N", strlen($attr) ) . $attr .
|
|
pack ( "NNN", 0, $min, $this->_max[$attr] );
|
|
|
|
foreach ( $this->_filter as $attr => $values )
|
|
{
|
|
$req .=
|
|
pack ( "N", strlen($attr) ) . $attr .
|
|
pack ( "N", count($values) );
|
|
|
|
foreach ( $values as $value )
|
|
$req .= pack ( "N", $value );
|
|
}
|
|
|
|
// group-by
|
|
$req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
|
|
|
|
// max matches to retrieve
|
|
$req .= pack ( "N", $this->_maxmatches );
|
|
|
|
////////////////////////////
|
|
// send query, get response
|
|
////////////////////////////
|
|
|
|
$len = strlen($req);
|
|
$req = pack ( "nnN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len ) . $req; // add header
|
|
fwrite ( $fp, $req, $len+8 );
|
|
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
|
|
return false;
|
|
|
|
//////////////////
|
|
// parse response
|
|
//////////////////
|
|
|
|
$result = array();
|
|
$max = strlen($response); // protection from broken response
|
|
|
|
// read schema
|
|
$p = 0;
|
|
$fields = array ();
|
|
$attrs = array ();
|
|
|
|
list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
while ( $nfields-->0 && $p<$max )
|
|
{
|
|
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
$fields[] = substr ( $response, $p, $len ); $p += $len;
|
|
}
|
|
$result["fields"] = $fields;
|
|
|
|
list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
while ( $nattrs-->0 && $p<$max )
|
|
{
|
|
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
$attr = substr ( $response, $p, $len ); $p += $len;
|
|
list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
$attrs[$attr] = $type;
|
|
}
|
|
$result["attrs"] = $attrs;
|
|
|
|
// read match count
|
|
list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
|
|
// read matches
|
|
while ( $count-->0 && $p<$max )
|
|
{
|
|
list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
|
|
substr ( $response, $p, 8 ) ) );
|
|
$p += 8;
|
|
|
|
$result["matches"][$doc]["weight"] = $weight;
|
|
foreach ( $attrs as $attr=>$type )
|
|
{
|
|
list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
$result["matches"][$doc]["attrs"][$attr] = $val;
|
|
}
|
|
}
|
|
list ( $result["total"], $result["total_found"], $result["time"], $words ) =
|
|
array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
|
|
$result["time"] = sprintf ( "%.3f", $result["time"]/1000 );
|
|
$p += 16;
|
|
|
|
while ( $words-->0 )
|
|
{
|
|
list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
|
|
$word = substr ( $response, $p, $len ); $p += $len;
|
|
list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
|
|
|
|
$result["words"][$word] = array ( "docs"=>$docs, "hits"=>$hits );
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
// excerpts generation
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
/// connect to searchd server and generate exceprts from given documents
|
|
///
|
|
/// $docs is an array of strings which represent the documents' contents
|
|
/// $index is a string specifiying the index which settings will be used
|
|
/// for stemming, lexing and case folding
|
|
/// $words is a string which contains the words to highlight
|
|
/// $opts is a hash which contains additional optional highlighting parameters:
|
|
/// "before_match"
|
|
/// a string to insert before a set of matching words, default is "<b>"
|
|
/// "after_match"
|
|
/// a string to insert after a set of matching words, default is "<b>"
|
|
/// "chunk_separator"
|
|
/// a string to insert between excerpts chunks, default is " ... "
|
|
/// "limit"
|
|
/// max excerpt size in symbols (codepoints), default is 256
|
|
/// "around"
|
|
/// how much words to highlight around each match, default is 5
|
|
///
|
|
/// returns false on failure
|
|
/// returns an array of string excerpts on success
|
|
function BuildExcerpts ( $docs, $index, $words, $opts=array() )
|
|
{
|
|
assert ( is_array($docs) );
|
|
assert ( is_string($index) );
|
|
assert ( is_string($words) );
|
|
assert ( is_array($opts) );
|
|
|
|
if (!( $fp = $this->_Connect() ))
|
|
return false;
|
|
|
|
/////////////////
|
|
// fixup options
|
|
/////////////////
|
|
|
|
if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
|
|
if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
|
|
if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
|
|
if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
|
|
if ( !isset($opts["around"]) ) $opts["around"] = 5;
|
|
|
|
/////////////////
|
|
// build request
|
|
/////////////////
|
|
|
|
// v.1.0 req
|
|
$req = pack ( "NN", 0, 1 ); // mode=0, flags=1 (remove spaces)
|
|
$req .= pack ( "N", strlen($index) ) . $index; // req index
|
|
$req .= pack ( "N", strlen($words) ) . $words; // req words
|
|
|
|
// options
|
|
$req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
|
|
$req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
|
|
$req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
|
|
$req .= pack ( "N", (int)$opts["limit"] );
|
|
$req .= pack ( "N", (int)$opts["around"] );
|
|
|
|
// documents
|
|
$req .= pack ( "N", count($docs) );
|
|
foreach ( $docs as $doc )
|
|
{
|
|
assert ( is_string($doc) );
|
|
$req .= pack ( "N", strlen($doc) ) . $doc;
|
|
}
|
|
|
|
////////////////////////////
|
|
// send query, get response
|
|
////////////////////////////
|
|
|
|
$len = strlen($req);
|
|
$req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
|
|
$wrote = fwrite ( $fp, $req, $len+8 );
|
|
if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
|
|
return false;
|
|
|
|
//////////////////
|
|
// parse response
|
|
//////////////////
|
|
|
|
$pos = 0;
|
|
$res = array ();
|
|
$rlen = strlen($response);
|
|
for ( $i=0; $i<count($docs); $i++ )
|
|
{
|
|
list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
|
|
$pos += 4;
|
|
|
|
if ( $pos+$len > $rlen )
|
|
{
|
|
$this->_error = "incomplete reply";
|
|
return false;
|
|
}
|
|
$res[] = substr ( $response, $pos, $len );
|
|
$pos += $len;
|
|
}
|
|
|
|
return $res;
|
|
}
|
|
}
|
|
|
|
//
|
|
// $Id: sphinxapi.php,v 1.38 2006/12/06 00:21:29 shodan Exp $
|
|
//
|
|
|
|
?>
|