<?php

//
// $Id: sphinxapi.php,v 1.38 2006/12/06 00:21:29 shodan Exp $
//

//
// Copyright (c) 2001-2006, Andrew Aksyonoff. All rights reserved.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License. You should have
// received a copy of the GPL license along with this program; if you
// did not, you can find it at http://www.gnu.org/
//

/////////////////////////////////////////////////////////////////////////////
// PHP version of Sphinx searchd client (PHP API)
/////////////////////////////////////////////////////////////////////////////

/// known searchd commands
define ( "SEARCHD_COMMAND_SEARCH",	0 );
define ( "SEARCHD_COMMAND_EXCERPT",	1 );

/// current client-side command implementation versions
define ( "VER_COMMAND_SEARCH",		0x104 );
define ( "VER_COMMAND_EXCERPT",		0x100 );

/// known searchd status codes
define ( "SEARCHD_OK",				0 );
define ( "SEARCHD_ERROR",			1 );
define ( "SEARCHD_RETRY",			2 );

/// known match modes
define ( "SPH_MATCH_ALL",			0 );
define ( "SPH_MATCH_ANY",			1 );
define ( "SPH_MATCH_PHRASE",		2 );
define ( "SPH_MATCH_BOOLEAN",		3 );
define ( "SPH_MATCH_EXTENDED",		4 );

/// known sort modes
define ( "SPH_SORT_RELEVANCE",		0 );
define ( "SPH_SORT_ATTR_DESC",		1 );
define ( "SPH_SORT_ATTR_ASC",		2 );
define ( "SPH_SORT_TIME_SEGMENTS", 	3 );
define ( "SPH_SORT_EXTENDED", 		4 );

/// known attribute types
define ( "SPH_ATTR_INTEGER",		1 );
define ( "SPH_ATTR_TIMESTAMP",		2 );

/// known grouping functions
define ( "SPH_GROUPBY_DAY",			0 );
define ( "SPH_GROUPBY_WEEK",		1 );
define ( "SPH_GROUPBY_MONTH",		2 );
define ( "SPH_GROUPBY_YEAR",		3 );
define ( "SPH_GROUPBY_ATTR",		4 );

/// sphinx searchd client class
class SphinxClient
{
	var $_host;		///< searchd host (default is "localhost")
	var $_port;		///< searchd port (default is 3312)
	var $_offset;	///< how many records to seek from result-set start (default is 0)
	var $_limit;	///< how many records to return from result-set starting at offset (default is 20)
	var $_mode;		///< query matching mode (default is SPH_MATCH_ALL)
	var $_weights;	///< per-field weights (default is 1 for all fields)
	var $_sort;		///< match sorting mode (default is SPH_SORT_RELEVANCE)
	var $_sortby;	///< attribute to sort by (defualt is "")
	var $_min_id;	///< min ID to match (default is 0)
	var $_max_id;	///< max ID to match (default is UINT_MAX)
	var $_min;		///< attribute name to min-value hash (for range filters)
	var $_max;		///< attribute name to max-value hash (for range filters)
	var $_filter;	///< attribute name to values set hash (for values-set filters)
	var $_groupby;	///< group-by attribute name
	var $_groupfunc;///< function to pre-process group-by attribute value with
	var $_maxmatches;///< max matches to retrieve

	var $_error;	///< last error message
	var $_warning;	///< last warning message

	/////////////////////////////////////////////////////////////////////////////
	// common stuff
	/////////////////////////////////////////////////////////////////////////////

	/// create a new client object and fill defaults
	function SphinxClient ()
	{
		$this->_host	= "localhost";
		$this->_port	= 3312;
		$this->_offset	= 0;
		$this->_limit	= 20;
		$this->_mode	= SPH_MATCH_ALL;
		$this->_weights	= array ();
		$this->_sort	= SPH_SORT_RELEVANCE;
		$this->_sortby	= "";
		$this->_min_id	= 0;
		$this->_max_id	= 0xFFFFFFFF;
		$this->_min		= array ();
		$this->_max		= array ();
		$this->_filter	= array ();
		$this->_groupby		= "";
		$this->_groupfunc	= SPH_GROUPBY_DAY;
		$this->_maxmatches	= 1000;

		$this->_error	= "";
		$this->_warning	= "";
	}

	/// get last error message (string)
	function GetLastError ()
	{
		return $this->_error;
	}

	/// get last warning message (string)
	function GetLastWarning ()
	{
		return $this->_warning;
	}

	/// set searchd server
	function SetServer ( $host, $port )
	{
		assert ( is_string($host) );
		assert ( is_int($port) );
		$this->_host = $host;
		$this->_port = $port;
	}

	/////////////////////////////////////////////////////////////////////////////

	/// connect to searchd server
	function _Connect ()
	{
		if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
		{
			$this->_error = "connection to {$this->_host}:{$this->_port} failed";
			return false;
		}

		// check version
		list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
		$v = (int)$v;
		if ( $v<1 )
		{
			fclose ( $fp );
			$this->_error = "expected searchd protocol version 1+, got version '$v'";
			return false;
		}

		// all ok, send my version
		fwrite ( $fp, pack ( "N", 1 ) );
		return $fp;
	}

	/// get and check response packet from searchd server
	function _GetResponse ( $fp, $client_ver )
	{
		$header = fread ( $fp, 8 );
		list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
		$response = "";
		$left = $len;
		while ( $left>0 && !feof($fp) )
		{
			$chunk = fread ( $fp, $left );
			if ( $chunk )
			{
				$response .= $chunk;
				$left -= strlen($chunk);
			}
		}
		fclose ( $fp );

		// check response
		$read = strlen ( $response );
		if ( !$response || $read!=$len )
		{
			$this->_error = $len
				? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
				: "received zero-sized searchd response";
			return false;
		}

		// check status
		if ( $status==SEARCHD_ERROR )
		{
			$this->_error = "searchd error: " . substr ( $response, 4 );
			return false;
		}
		if ( $status==SEARCHD_RETRY )
		{
			$this->_error = "temporary searchd error: " . substr ( $response, 4 );
			return false;
		}
		if ( $status!=SEARCHD_OK )
		{
			$this->_error = "unknown status code '$status'";
			return false;
		}

		// check version
		if ( $ver<$client_ver )
		{
			$this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
				$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
		}

		return $response;
	}

	/////////////////////////////////////////////////////////////////////////////
	// searching
	/////////////////////////////////////////////////////////////////////////////

	/// set match offset, count, and max number to retrieve
	function SetLimits ( $offset, $limit, $max=0 )
	{
		assert ( is_int($offset) );
		assert ( is_int($limit) );
		assert ( $offset>=0 );
		assert ( $limit>0 );
		assert ( $max>=0 );
		$this->_offset = $offset;
		$this->_limit = $limit;
		if ( $max>0 )
			$this->_maxmatches = $max;
	}

	/// set match mode
	function SetMatchMode ( $mode )
	{
		assert ( $mode==SPH_MATCH_ALL
			|| $mode==SPH_MATCH_ANY
			|| $mode==SPH_MATCH_PHRASE
			|| $mode==SPH_MATCH_BOOLEAN
			|| $mode==SPH_MATCH_EXTENDED );
		$this->_mode = $mode;
	}

	/// set sort mode
	function SetSortMode ( $mode, $sortby="" )
	{
		assert (
			$mode==SPH_SORT_RELEVANCE ||
			$mode==SPH_SORT_ATTR_DESC ||
			$mode==SPH_SORT_ATTR_ASC ||
			$mode==SPH_SORT_TIME_SEGMENTS ||
			$mode==SPH_SORT_EXTENDED );
		assert ( is_string($sortby) );
		assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );

		$this->_sort = $mode;
		$this->_sortby = $sortby;
	}

	/// set per-field weights
	function SetWeights ( $weights )
	{
		assert ( is_array($weights) );
		foreach ( $weights as $weight )
			assert ( is_int($weight) );

		$this->_weights = $weights;
	}

	/// set IDs range to match
	/// only match those records where document ID
	/// is beetwen $min and $max (including $min and $max)
	function SetIDRange ( $min, $max )
	{
		assert ( is_int($min) );
		assert ( is_int($max) );
		assert ( $min<=$max );
		$this->_min_id = $min;
		$this->_max_id = $max;
	}

	/// set values filter
	/// only match those records where $attribute column values
	/// are in specified set
	function SetFilter ( $attribute, $values )
	{
		assert ( is_string($attribute) );
		assert ( is_array($values) );
		assert ( count($values) );

		if ( is_array($values) && count($values) )
		{
			foreach ( $values as $value )
				assert ( is_int($value) );

			$this->_filter[$attribute] = $values;
		}
	}

	/// set range filter
	/// only match those records where $attribute column value
	/// is beetwen $min and $max (including $min and $max)
	function SetFilterRange ( $attribute, $min, $max )
	{
		assert ( is_string($attribute) );
		assert ( is_int($min) );
		assert ( is_int($max) );
		assert ( $min<=$max );

		$this->_min[$attribute] = $min;
		$this->_max[$attribute] = $max;
	}

	/// set grouping attribute and function
	///
	/// in grouping mode, all matches are assigned to different groups
	/// based on grouping function value.
	///
	/// each group keeps track of the total match count, and the best match
	/// (in this group) according to current sorting function.
	///
	/// the final result set contains one best match per group, with
	/// grouping function value and matches count attached. result set
	/// is sorted by grouping function value, in descending order.
	///
	/// for example, if sorting by relevance and grouping by "published"
	/// attribute with SPH_GROUPBY_DAY function, then the result set will
	/// contain one most relevant match per each day when there were any
	/// matches published, with day number and per-day match count attached,
	/// and sorted by day number in descending order (ie. recent days first).
	function SetGroupBy ( $attribute, $func )
	{
		assert ( is_string($attribute) );
		assert ( $func==SPH_GROUPBY_DAY
			|| $func==SPH_GROUPBY_WEEK
			|| $func==SPH_GROUPBY_MONTH
			|| $func==SPH_GROUPBY_YEAR
			|| $func==SPH_GROUPBY_ATTR );

		$this->_groupby = $attribute;
		$this->_groupfunc = $func;
	}

	/// connect to searchd server and run given search query
	///
	/// $query is query string
	/// $index is index name to query, default is "*" which means to query all indexes
	///
	/// returns false on failure
	/// returns hash which has the following keys on success:
	///		"matches"
	///			hash which maps found document_id to ( "weight", "group" ) hash
	///		"total"
	///			total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
	///		"total_found"
	///			total amount of matching documents in index
	///		"time"
	///			search time
	///		"words"
	///			hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
	function Query ( $query, $index="*" )
	{
		if (!( $fp = $this->_Connect() ))
			return false;

		/////////////////
		// build request
		/////////////////

		$req = pack ( "NNNN", $this->_offset, $this->_limit, $this->_mode, $this->_sort ); // mode and limits
		$req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
		$req .= pack ( "N", strlen($query) ) . $query; // query itself
		$req .= pack ( "N", count($this->_weights) ); // weights
		foreach ( $this->_weights as $weight )
			$req .= pack ( "N", (int)$weight );
		$req .= pack ( "N", strlen($index) ) . $index; // indexes
		$req .= // id range
			pack ( "N", (int)$this->_min_id ) .
			pack ( "N", (int)$this->_max_id );

		// filters
		$req .= pack ( "N", count($this->_min) + count($this->_filter) );

		foreach ( $this->_min as $attr => $min )
			$req .=
				pack ( "N", strlen($attr) ) . $attr .
				pack ( "NNN", 0, $min, $this->_max[$attr] );

		foreach ( $this->_filter as $attr => $values )
		{
			$req .= 
				pack ( "N", strlen($attr) ) . $attr .
				pack ( "N", count($values) );

			foreach ( $values as $value )
				$req .= pack ( "N", $value );
		}

		// group-by
		$req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;

		// max matches to retrieve
		$req .= pack ( "N", $this->_maxmatches );

		////////////////////////////
		// send query, get response
		////////////////////////////

		$len = strlen($req);
		$req = pack ( "nnN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len ) . $req; // add header
		fwrite ( $fp, $req, $len+8 );
		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
			return false;

		//////////////////
		// parse response
		//////////////////

		$result = array();
		$max = strlen($response); // protection from broken response

		// read schema
		$p = 0;
		$fields = array ();
		$attrs = array ();

		list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
		while ( $nfields-->0 && $p<$max )
		{
			list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
			$fields[] = substr ( $response, $p, $len ); $p += $len;
		}
		$result["fields"] = $fields;

		list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
		while ( $nattrs-->0 && $p<$max  )
		{
			list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
			$attr = substr ( $response, $p, $len ); $p += $len;
			list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
			$attrs[$attr] = $type;
		}
		$result["attrs"] = $attrs;

		// read match count
		list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;

		// read matches
		while ( $count-->0 && $p<$max )
		{
			list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
				substr ( $response, $p, 8 ) ) );
			$p += 8;

			$result["matches"][$doc]["weight"] = $weight;
			foreach ( $attrs as $attr=>$type )
			{
				list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
				$result["matches"][$doc]["attrs"][$attr] = $val;
			}
		}
		list ( $result["total"], $result["total_found"], $result["time"], $words ) =
			array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
		$result["time"] = sprintf ( "%.3f", $result["time"]/1000 );
		$p += 16;

		while ( $words-->0 )
		{
			list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
			$word = substr ( $response, $p, $len ); $p += $len;
			list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;

			$result["words"][$word] = array ( "docs"=>$docs, "hits"=>$hits );
		}

		return $result;
	}

	/////////////////////////////////////////////////////////////////////////////
	// excerpts generation
	/////////////////////////////////////////////////////////////////////////////

	/// connect to searchd server and generate exceprts from given documents
	///
	/// $docs is an array of strings which represent the documents' contents
	/// $index is a string specifiying the index which settings will be used
	///		for stemming, lexing and case folding
	/// $words is a string which contains the words to highlight
	/// $opts is a hash which contains additional optional highlighting parameters:
	///		"before_match"
	///			a string to insert before a set of matching words, default is "<b>"
	///		"after_match"
	///			a string to insert after a set of matching words, default is "<b>"
	///		"chunk_separator"
	///			a string to insert between excerpts chunks, default is " ... "
	///		"limit"
	///			max excerpt size in symbols (codepoints), default is 256
	///		"around"
	///			how much words to highlight around each match, default is 5
	///
	/// returns false on failure
	/// returns an array of string excerpts on success
	function BuildExcerpts ( $docs, $index, $words, $opts=array() )
	{
		assert ( is_array($docs) );
		assert ( is_string($index) );
		assert ( is_string($words) );
		assert ( is_array($opts) );

		if (!( $fp = $this->_Connect() ))
			return false;

		/////////////////
		// fixup options
		/////////////////

		if ( !isset($opts["before_match"]) )		$opts["before_match"] = "<b>";
		if ( !isset($opts["after_match"]) )			$opts["after_match"] = "</b>";
		if ( !isset($opts["chunk_separator"]) )		$opts["chunk_separator"] = " ... ";
		if ( !isset($opts["limit"]) )				$opts["limit"] = 256;
		if ( !isset($opts["around"]) )				$opts["around"] = 5;

		/////////////////
		// build request
		/////////////////

		// v.1.0 req
		$req = pack ( "NN", 0, 1 ); // mode=0, flags=1 (remove spaces)
		$req .= pack ( "N", strlen($index) ) . $index; // req index
		$req .= pack ( "N", strlen($words) ) . $words; // req words

		// options
		$req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
		$req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
		$req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
		$req .= pack ( "N", (int)$opts["limit"] );
		$req .= pack ( "N", (int)$opts["around"] );

		// documents
		$req .= pack ( "N", count($docs) );
		foreach ( $docs as $doc )
		{
			assert ( is_string($doc) );
			$req .= pack ( "N", strlen($doc) ) . $doc;
		}

		////////////////////////////
		// send query, get response
		////////////////////////////

		$len = strlen($req);
		$req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
		$wrote = fwrite ( $fp, $req, $len+8 );
		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
			return false;

		//////////////////
		// parse response
		//////////////////

		$pos = 0;
		$res = array ();
		$rlen = strlen($response);
		for ( $i=0; $i<count($docs); $i++ )
		{
			list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
			$pos += 4;

			if ( $pos+$len > $rlen )
			{
				$this->_error = "incomplete reply";
				return false;
			}
			$res[] = substr ( $response, $pos, $len );
			$pos += $len;
		}

		return $res;
	}
}

//
// $Id: sphinxapi.php,v 1.38 2006/12/06 00:21:29 shodan Exp $
//

?>