. */ /** * Doctrine_Query_Tokenizer * * @package Doctrine * @subpackage Query * @license http://www.opensource.org/licenses/lgpl-license.php LGPL * @link www.doctrine-project.org * @since 1.0 * @version $Revision$ * @author Konsta Vesterinen * @author Guilherme Blanco * @author Stefan Klug */ class Doctrine_Query_Tokenizer { /** * Splits the given dql query into an array where keys represent different * query part names and values are arrays splitted using sqlExplode method * * example: * * parameter: * $query = "SELECT u.* FROM User u WHERE u.name LIKE ?" * returns: * array( * 'select' => array('u.*'), * 'from' => array('User', 'u'), * 'where' => array('u.name', 'LIKE', '?') * ); * * @param string $query DQL query * * @throws Doctrine_Query_Exception If some generic parsing error occurs * * @return array An array containing the query string parts */ public function tokenizeQuery($query) { $tokens = $this->sqlExplode($query, ' '); $parts = array(); foreach ($tokens as $index => $token) { $token = trim($token); switch (strtolower($token)) { case 'delete': case 'update': case 'select': case 'set': case 'from': case 'where': case 'limit': case 'offset': case 'having': $p = $token; //$parts[$token] = array(); $parts[$token] = ''; break; case 'order': case 'group': $i = ($index + 1); if (isset($tokens[$i]) && strtolower($tokens[$i]) === 'by') { $p = $token; $parts[$token] = ''; //$parts[$token] = array(); } else { $parts[$p] .= "$token "; //$parts[$p][] = $token; } break; case 'by': continue; default: if ( ! isset($p)) { throw new Doctrine_Query_Tokenizer_Exception( "Couldn't tokenize query. Encountered invalid token: '$token'." ); } $parts[$p] .= "$token "; //$parts[$p][] = $token; } } return $parts; } /** * Trims brackets from string * * @param string $str String to remove the brackets * @param string $e1 First bracket, usually '(' * @param string $e2 Second bracket, usually ')' * * @return string */ public function bracketTrim($str, $e1 = '(', $e2 = ')') { if (substr($str, 0, 1) === $e1 && substr($str, -1) === $e2) { return substr($str, 1, -1); } else { return $str; } } /** * Explodes a sql expression respecting bracket placement. * * This method transform a sql expression in an array of simple clauses, * while observing the parentheses precedence. * * Note: bracketExplode always trims the returned pieces * * * $str = (age < 20 AND age > 18) AND email LIKE 'John@example.com' * $clauses = $tokenizer->bracketExplode($str, ' AND ', '(', ')'); * // array("(age < 20 AND age > 18)", "email LIKE 'John@example.com'") * * * @param string $str String to be bracket exploded * @param string $d Delimeter which explodes the string * @param string $e1 First bracket, usually '(' * @param string $e2 Second bracket, usually ')' * * @return array */ public function bracketExplode($str, $d = ' ', $e1 = '(', $e2 = ')') { if (is_string($d)) { $d = array($d); } // Bracket explode has to be case insensitive $regexp = $this->getSplitRegExpFromArray($d) . 'i'; $terms = $this->clauseExplodeRegExp($str, $regexp, $e1, $e2); $res = array(); // Trim is here for historical reasons foreach ($terms as $value) { $res[] = trim($value[0]); } return $res; } /** * Explode quotes from string * * Note: quoteExplode always trims the returned pieces * * example: * * parameters: * $str = email LIKE 'John@example.com' * $d = ' LIKE ' * * would return an array: * array("email", "LIKE", "'John@example.com'") * * @param string $str String to be quote exploded * @param string $d Delimeter which explodes the string * * @return array */ public function quoteExplode($str, $d = ' ') { if (is_string($d)) { $d = array($d); } // According to the testcases quoteExplode is case insensitive $regexp = $this->getSplitRegExpFromArray($d) . 'i'; $terms = $this->clauseExplodeCountBrackets($str, $regexp); $res = array(); foreach ($terms as $val) { $res[] = trim($val[0]); } return $res; } /** * Explodes a string into array using custom brackets and * quote delimeters * * Note: sqlExplode trims all returned parts * * example: * * parameters: * $str = "(age < 20 AND age > 18) AND name LIKE 'John Doe'" * $d = ' ' * $e1 = '(' * $e2 = ')' * * would return an array: * array( * '(age < 20 AND age > 18)', * 'name', * 'LIKE', * 'John Doe' * ); * * @param string $str String to be SQL exploded * @param string $d Delimeter which explodes the string * @param string $e1 First bracket, usually '(' * @param string $e2 Second bracket, usually ')' * * @return array */ public function sqlExplode($str, $d = ' ', $e1 = '(', $e2 = ')') { if (is_string($d)) { $d = array($d); } $terms = $this->clauseExplode($str, $d, $e1, $e2); $res = array(); foreach ($terms as $value) { $res[] = trim($value[0]); } return $res; } /** * Explodes a string into array using custom brackets and quote delimeters * Each array element is a array of length 2 where the first entry contains * the term, and the second entry contains the corresponding delimiter * * example: * * parameters: * $str = "(age < 20 AND age > 18) AND name LIKE 'John'+' Doe'" * $d = array(' ', '+') * $e1 = '(' * $e2 = ')' * * would return an array: * array( * array('(age < 20 AND age > 18)', ' '), * array('AND', ' '), * array('name', ' '), * array('LIKE', ' '), * array('John', '+'), * array(' Doe', '') * ); * * @param string $str String to be clause exploded * @param string $d Delimeter which explodes the string * @param string $e1 First bracket, usually '(' * @param string $e2 Second bracket, usually ')' * * @return array */ public function clauseExplode($str, array $d, $e1 = '(', $e2 = ')') { $regexp = $this->getSplitRegExpFromArray($d); return $this->clauseExplodeRegExp($str, $regexp, $e1, $e2); } /** * Builds regular expression for split from array. Return regular * expression to be applied * * @param $d * * @return string */ private function getSplitRegExpFromArray(array $d) { foreach ($d as $key => $string) { $escapedString = preg_quote($string); if (preg_match('#^\w+$#', $string)) { $escapedString = "\W$escapedString\W"; } $d[$key] = $escapedString; } if (in_array(' ', $d)) { $d[] = '\s'; } return '#(' . implode('|', $d) . ')#'; } /** * Same as clauseExplode, but you give a regexp, which splits the string * * @param $str * @param $regexp * @param $e1 * @param $e2 * * @return array */ private function clauseExplodeRegExp($str, $regexp, $e1 = '(', $e2 = ')') { $terms = $this->clauseExplodeCountBrackets($str, $regexp, $e1, $e2); $terms = $this->mergeBracketTerms($terms); // This is only here to comply with the old function signature foreach ($terms as & $val) { unset($val[2]); } return $terms; } /** * this function is like clauseExplode, but it doesn't merge bracket terms * * @param $str * @param $d * @param $e1 * @param $e2 * * @return unknown_type */ private function clauseExplodeCountBrackets($str, $regexp, $e1 = '(', $e2 = ')') { $quoteTerms = $this->quotedStringExplode($str); $terms = array(); $i = 0; foreach ($quoteTerms as $key => $val) { if ($key & 1) { // a quoted string // If the last term had no ending delimiter, we append the string to the element, // otherwise, we create a new element without delimiter if ($terms[$i - 1][1] == '') { $terms[$i - 1][0] .= $val; } else { $terms[$i++] = array($val, '', 0); } } else { // Not a quoted string // Do the clause explode $subterms = $this->clauseExplodeNonQuoted($val, $regexp); foreach ($subterms as &$sub) { $c1 = substr_count($sub[0], $e1); $c2 = substr_count($sub[0], $e2); $sub[2] = $c1 - $c2; } // If the previous term had no delimiter, merge them if ($i > 0 && $terms[$i - 1][1] == '') { $first = array_shift($subterms); $idx = $i - 1; $terms[$idx][0] .= $first[0]; $terms[$idx][1] = $first[1]; $terms[$idx][2] += $first[2]; } $terms = array_merge($terms, $subterms); $i += sizeof($subterms); } } return $terms; } /** * Explodes a string by the given delimiters, and counts quotes in every * term. This function doesn't respect quoted strings. * The returned array contains a array per term. These term array contain * the following elemnts: * [0] = the term itself * [1] = the delimiter splitting this term from the next * [2] = the sum of opening and closing brackets in this term * (eg. -2 means 2 closing brackets (or 1 opening and 3 closing)) * * example: * * parameters: * $str = "a (b '(c+d))'" * $d = array(' ', '+') * * returns: * array( * array('a', ' ', 0), * array('(b', ' ', 1), * array("'(c", '+', 1), * array("d))'", '', -2) * ); * * @param $str * @param $d * @param $e1 * @param $e2 * * @return array */ private function clauseExplodeNonQuoted($str, $regexp) { $str = preg_split($regexp, $str, -1, PREG_SPLIT_DELIM_CAPTURE); $term = array(); $i = 0; foreach ($str as $key => $val) { // Every odd entry is a delimiter, so add it to the previous term entry if ( ! ($key & 1)) { $term[$i] = array($val, ''); } else { $term[$i++][1] = $val; } } return $term; } /** * This expects input from clauseExplodeNonQuoted. * It will go through the result and merges any bracket terms with * unbalanced bracket count. * Note that only the third parameter in each term is used to get the * bracket overhang. This is needed to be able to handle quoted strings * wich contain brackets * * example: * * parameters: * $terms = array( * array("'a(b'", '+', 0) * array('(2', '+', 1), * array('3)', '-', -1), * array('5', '' , '0') * ); * * would return: * array( * array("'a(b'", '+', 0), * array('(2+3)', '-', 0), * array('5' , '' , 0) * ); * * @param $terms array * * @return array */ private function mergeBracketTerms(array $terms) { $res = array(); $i = 0; foreach ($terms as $val) { if ( ! isset($res[$i])) { $res[$i] = array($val[0], $val[1], $val[2]); } else { $res[$i][0] .= $res[$i][1] . $val[0]; $res[$i][1] = $val[1]; $res[$i][2] += $val[2]; } // Bracket overhang if ($res[$i][2] == 0) { $i++; } } return $res; } /** * Explodes the given string by * * example: * * paramters: * $str ="'a' AND name = 'John O\'Connor'" * * returns * array("", "'a'", " AND name = ", "'John O\'Connor'") * * Note the trailing empty string. In the result, all even elements are quoted strings. * * @param $str the string to split * * @return array */ public function quotedStringExplode($str) { // Split by all possible incarnations of a quote $split = array_map('preg_quote', array("\\'","''","'", "\\\"", "\"\"", "\"")); $split = '#(' . implode('|', $split) . ')#'; $str = preg_split($split, $str, -1, PREG_SPLIT_DELIM_CAPTURE); $parts = array(); $mode = false; // Mode is either ' or " if the loop is inside a string quoted with ' or " $i = 0; foreach ($str as $key => $val) { // This is some kind of quote if ($key & 1) { if ( ! $mode) { if ($val == "'" || $val == "\"") { $mode = $val; $i++; } } else if ($mode == $val) { if ( ! isset($parts[$i])) { $parts[$i] = $val; } else { $parts[$i] .= $val; } $mode = false; $i++; continue; } } if ( ! isset($parts[$i])) { $parts[$i] = $val; } else { $parts[$i] .= $val; } } return $parts; } }