* @copyright 2010-2014 Justin Swanhart and André Rothe * @license http://www.debian.org/misc/bsd.license BSD License (3 Clause) * @version SVN: $Id: PHPSQLLexer.php 842 2013-12-30 08:57:53Z phosco@gmx.de $ * */ require_once dirname(__FILE__) . '/LexerSplitter.php'; require_once dirname(__FILE__) . '/../exceptions/InvalidParameterException.php'; /** * This class splits the SQL string into little parts, which the parser can * use to build the result array. * * @author André Rothe * @license http://www.debian.org/misc/bsd.license BSD License (3 Clause) * */ class PHPSQLLexer { protected $splitters; /** * Constructor. * * It initializes some fields. */ public function __construct() { $this->splitters = new LexerSplitter(); } /** * Ends the given string $haystack with the string $needle? * * @param string $haystack * @param string $needle * * @return boolean true, if the parameter $haystack ends with the character sequences $needle, false otherwise */ protected function endsWith($haystack, $needle) { $length = strlen($needle); if ($length == 0) { return true; } return (substr($haystack, -$length) === $needle); } public function split($sql) { if (!is_string($sql)) { throw new InvalidParameterException($sql); } $tokens = array(); $token = ""; $splitLen = $this->splitters->getMaxLengthOfSplitter(); $found = false; $len = strlen($sql); $pos = 0; while ($pos < $len) { for ($i = $splitLen; $i > 0; $i--) { $substr = substr($sql, $pos, $i); if ($this->splitters->isSplitter($substr)) { if ($token !== "") { $tokens[] = $token; } $tokens[] = $substr; $pos += $i; $token = ""; continue 2; } } $token .= $sql[$pos]; $pos++; } if ($token !== "") { $tokens[] = $token; } $tokens = $this->concatEscapeSequences($tokens); $tokens = $this->balanceBackticks($tokens); $tokens = $this->concatColReferences($tokens); $tokens = $this->balanceParenthesis($tokens); $tokens = $this->concatComments($tokens); $tokens = $this->concatUserDefinedVariables($tokens); return $tokens; } protected function concatUserDefinedVariables($tokens) { $i = 0; $cnt = count($tokens); $userdef = false; while ($i < $cnt) { if (!isset($tokens[$i])) { $i++; continue; } $token = $tokens[$i]; if ($userdef !== false) { $tokens[$userdef] .= $token; unset($tokens[$i]); if ($token !== "@") { $userdef = false; } } if ($userdef === false && $token === "@") { $userdef = $i; } $i++; } return array_values($tokens); } protected function concatComments($tokens) { $i = 0; $cnt = count($tokens); $comment = false; while ($i < $cnt) { if (!isset($tokens[$i])) { $i++; continue; } $token = $tokens[$i]; if ($comment !== false) { if ($inline === true && ($token === "\n" || $token === "\r\n")) { $comment = false; } else { unset($tokens[$i]); $tokens[$comment] .= $token; } if ($inline === false && ($token === "*/")) { $comment = false; } } if (($comment === false) && ($token === "--")) { $comment = $i; $inline = true; } if (($comment === false) && ($token === "/*")) { $comment = $i; $inline = false; } $i++; } return array_values($tokens); } protected function isBacktick($token) { return ($token === "'" || $token === "\"" || $token === "`"); } protected function balanceBackticks($tokens) { $i = 0; $cnt = count($tokens); while ($i < $cnt) { if (!isset($tokens[$i])) { $i++; continue; } $token = $tokens[$i]; if ($this->isBacktick($token)) { $tokens = $this->balanceCharacter($tokens, $i, $token); } $i++; } return $tokens; } // backticks are not balanced within one token, so we have // to re-combine some tokens protected function balanceCharacter($tokens, $idx, $char) { $token_count = count($tokens); $i = $idx + 1; while ($i < $token_count) { if (!isset($tokens[$i])) { $i++; continue; } $token = $tokens[$i]; $tokens[$idx] .= $token; unset($tokens[$i]); if ($token === $char) { break; } $i++; } return array_values($tokens); } /** * This function concats some tokens to a column reference. * There are two different cases: * * 1. If the current token ends with a dot, we will add the next token * 2. If the next token starts with a dot, we will add it to the previous token * */ protected function concatColReferences($tokens) { $cnt = count($tokens); $i = 0; while ($i < $cnt) { if (!isset($tokens[$i])) { $i++; continue; } if ($tokens[$i][0] === ".") { // concat the previous tokens, till the token has been changed $k = $i - 1; $len = strlen($tokens[$i]); while (($k >= 0) && ($len == strlen($tokens[$i]))) { if (!isset($tokens[$k])) { // FIXME: this can be wrong if we have schema . table . column $k--; continue; } $tokens[$i] = $tokens[$k] . $tokens[$i]; unset($tokens[$k]); $k--; } } if ($this->endsWith($tokens[$i], '.') && !is_numeric($tokens[$i])) { // concat the next tokens, till the token has been changed $k = $i + 1; $len = strlen($tokens[$i]); while (($k < $cnt) && ($len == strlen($tokens[$i]))) { if (!isset($tokens[$k])) { $k++; continue; } $tokens[$i] .= $tokens[$k]; unset($tokens[$k]); $k++; } } $i++; } return array_values($tokens); } protected function concatEscapeSequences($tokens) { $tokenCount = count($tokens); $i = 0; while ($i < $tokenCount) { if ($this->endsWith($tokens[$i], "\\")) { $i++; if (isset($tokens[$i])) { $tokens[$i - 1] .= $tokens[$i]; unset($tokens[$i]); } } $i++; } return array_values($tokens); } protected function balanceParenthesis($tokens) { $token_count = count($tokens); $i = 0; while ($i < $token_count) { if ($tokens[$i] !== '(') { $i++; continue; } $count = 1; for ($n = $i + 1; $n < $token_count; $n++) { $token = $tokens[$n]; if ($token === '(') { $count++; } if ($token === ')') { $count--; } $tokens[$i] .= $token; unset($tokens[$n]); if ($count === 0) { $n++; break; } } $i = $n; } return array_values($tokens); } } ?>