flux/library/Zend/Pdf/StringParser.php
2014-09-16 08:00:32 +00:00

732 lines
22 KiB
PHP

<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Pdf
* @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id$
*/
/** Internally used classes */
require_once 'Zend/Pdf/Element/Array.php';
require_once 'Zend/Pdf/Element/String/Binary.php';
require_once 'Zend/Pdf/Element/Boolean.php';
require_once 'Zend/Pdf/Element/Dictionary.php';
require_once 'Zend/Pdf/Element/Name.php';
require_once 'Zend/Pdf/Element/Null.php';
require_once 'Zend/Pdf/Element/Numeric.php';
require_once 'Zend/Pdf/Element/Object.php';
require_once 'Zend/Pdf/Element/Object/Stream.php';
require_once 'Zend/Pdf/Element/Reference.php';
require_once 'Zend/Pdf/Element/String.php';
/**
* PDF string parser
*
* @package Zend_Pdf
* @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Pdf_StringParser
{
/**
* Source PDF
*
* @var string
*/
public $data = '';
/**
* Current position in a data
*
* @var integer
*/
public $offset = 0;
/**
* Current reference context
*
* @var Zend_Pdf_Element_Reference_Context
*/
private $_context = null;
/**
* Array of elements of the currently parsed object/trailer
*
* @var array
*/
private $_elements = array();
/**
* PDF objects factory.
*
* @var Zend_Pdf_ElementFactory_Interface
*/
private $_objFactory = null;
/**
* Clean up resources.
*
* Clear current state to remove cyclic object references
*/
public function cleanUp()
{
$this->_context = null;
$this->_elements = array();
$this->_objFactory = null;
}
/**
* Character with code $chCode is white space
*
* @param integer $chCode
* @return boolean
*/
public static function isWhiteSpace($chCode)
{
if ($chCode == 0x00 || // null character
$chCode == 0x09 || // Tab
$chCode == 0x0A || // Line feed
$chCode == 0x0C || // Form Feed
$chCode == 0x0D || // Carriage return
$chCode == 0x20 // Space
) {
return true;
} else {
return false;
}
}
/**
* Character with code $chCode is a delimiter character
*
* @param integer $chCode
* @return boolean
*/
public static function isDelimiter($chCode )
{
if ($chCode == 0x28 || // '('
$chCode == 0x29 || // ')'
$chCode == 0x3C || // '<'
$chCode == 0x3E || // '>'
$chCode == 0x5B || // '['
$chCode == 0x5D || // ']'
$chCode == 0x7B || // '{'
$chCode == 0x7D || // '}'
$chCode == 0x2F || // '/'
$chCode == 0x25 // '%'
) {
return true;
} else {
return false;
}
}
/**
* Skip white space
*
* @param boolean $skipComment
*/
public function skipWhiteSpace($skipComment = true)
{
if ($skipComment) {
while (true) {
$this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
// Skip comment
$this->offset += strcspn($this->data, "\r\n", $this->offset);
} else {
// Non white space character not equal to '%' is found
return;
}
}
} else {
$this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
}
// /** Original (non-optimized) implementation. */
//
// while ($this->offset < strlen($this->data)) {
// if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
// $this->offset++;
// } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
// $this->skipComment();
// } else {
// return;
// }
// }
}
/**
* Skip comment
*/
public function skipComment()
{
while ($this->offset < strlen($this->data))
{
if (ord($this->data[$this->offset]) != 0x0A || // Line feed
ord($this->data[$this->offset]) != 0x0d // Carriage return
) {
$this->offset++;
} else {
return;
}
}
}
/**
* Read comment line
*
* @return string
*/
public function readComment()
{
$this->skipWhiteSpace(false);
/** Check if it's a comment line */
if ($this->data[$this->offset] != '%') {
return '';
}
for ($start = $this->offset;
$this->offset < strlen($this->data);
$this->offset++) {
if (ord($this->data[$this->offset]) == 0x0A || // Line feed
ord($this->data[$this->offset]) == 0x0d // Carriage return
) {
break;
}
}
return substr($this->data, $start, $this->offset-$start);
}
/**
* Returns next lexeme from a pdf stream
*
* @return string
*/
public function readLexeme()
{
// $this->skipWhiteSpace();
while (true) {
$this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
$this->offset += strcspn($this->data, "\r\n", $this->offset);
} else {
break;
}
}
if ($this->offset >= strlen($this->data)) {
return '';
}
if ( /* self::isDelimiter( ord($this->data[$start]) ) */
strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
switch (substr($this->data, $this->offset, 2)) {
case '<<':
$this->offset += 2;
return '<<';
break;
case '>>':
$this->offset += 2;
return '>>';
break;
default:
return $this->data[$this->offset++];
break;
}
} else {
$start = $this->offset;
$compare = '';
if( version_compare( phpversion(), '5.2.5' ) >= 0) {
$compare = "()<>[]{}/%\x00\t\n\f\r ";
} else {
$compare = "()<>[]{}/%\x00\t\n\r ";
}
$this->offset += strcspn($this->data, $compare, $this->offset);
return substr($this->data, $start, $this->offset - $start);
}
}
/**
* Read elemental object from a PDF stream
*
* @return Zend_Pdf_Element
* @throws Zend_Pdf_Exception
*/
public function readElement($nextLexeme = null)
{
if ($nextLexeme === null) {
$nextLexeme = $this->readLexeme();
}
/**
* Note: readElement() method is a public method and could be invoked from other classes.
* If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
* about _elements member management.
*/
switch ($nextLexeme) {
case '(':
return ($this->_elements[] = $this->_readString());
case '<':
return ($this->_elements[] = $this->_readBinaryString());
case '/':
return ($this->_elements[] = new Zend_Pdf_Element_Name(
Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
));
case '[':
return ($this->_elements[] = $this->_readArray());
case '<<':
return ($this->_elements[] = $this->_readDictionary());
case ')':
// fall through to next case
case '>':
// fall through to next case
case ']':
// fall through to next case
case '>>':
// fall through to next case
case '{':
// fall through to next case
case '}':
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
$this->offset));
default:
if (strcasecmp($nextLexeme, 'true') == 0) {
return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
} else if (strcasecmp($nextLexeme, 'false') == 0) {
return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
} else if (strcasecmp($nextLexeme, 'null') == 0) {
return ($this->_elements[] = new Zend_Pdf_Element_Null());
}
$ref = $this->_readReference($nextLexeme);
if ($ref !== null) {
return ($this->_elements[] = $ref);
}
return ($this->_elements[] = $this->_readNumeric($nextLexeme));
}
}
/**
* Read string PDF object
* Also reads trailing ')' from a pdf stream
*
* @return Zend_Pdf_Element_String
* @throws Zend_Pdf_Exception
*/
private function _readString()
{
$start = $this->offset;
$openedBrackets = 1;
$this->offset += strcspn($this->data, '()\\', $this->offset);
while ($this->offset < strlen($this->data)) {
switch (ord( $this->data[$this->offset] )) {
case 0x28: // '(' - opened bracket in the string, needs balanced pair.
$this->offset++;
$openedBrackets++;
break;
case 0x29: // ')' - pair to the opened bracket
$this->offset++;
$openedBrackets--;
break;
case 0x5C: // '\\' - escape sequence, skip next char from a check
$this->offset += 2;
}
if ($openedBrackets == 0) {
break; // end of string
}
$this->offset += strcspn($this->data, '()\\', $this->offset);
}
if ($openedBrackets != 0) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
}
return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
$start,
$this->offset - $start - 1) ));
}
/**
* Read binary string PDF object
* Also reads trailing '>' from a pdf stream
*
* @return Zend_Pdf_Element_String_Binary
* @throws Zend_Pdf_Exception
*/
private function _readBinaryString()
{
$start = $this->offset;
$this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
if ($this->offset >= strlen($this->data) - 1) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
}
if ($this->data[$this->offset++] != '>') {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
}
return new Zend_Pdf_Element_String_Binary(
Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
$start,
$this->offset - $start - 1) ));
}
/**
* Read array PDF object
* Also reads trailing ']' from a pdf stream
*
* @return Zend_Pdf_Element_Array
* @throws Zend_Pdf_Exception
*/
private function _readArray()
{
$elements = array();
while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
if ($nextLexeme != ']') {
$elements[] = $this->readElement($nextLexeme);
} else {
return new Zend_Pdf_Element_Array($elements);
}
}
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
}
/**
* Read dictionary PDF object
* Also reads trailing '>>' from a pdf stream
*
* @return Zend_Pdf_Element_Dictionary
* @throws Zend_Pdf_Exception
*/
private function _readDictionary()
{
$dictionary = new Zend_Pdf_Element_Dictionary();
while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
if ($nextLexeme != '>>') {
$nameStart = $this->offset - strlen($nextLexeme);
$name = $this->readElement($nextLexeme);
$value = $this->readElement();
if (!$name instanceof Zend_Pdf_Element_Name) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
}
$dictionary->add($name, $value);
} else {
return $dictionary;
}
}
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
}
/**
* Read reference PDF object
*
* @param string $nextLexeme
* @return Zend_Pdf_Element_Reference
*/
private function _readReference($nextLexeme = null)
{
$start = $this->offset;
if ($nextLexeme === null) {
$objNum = $this->readLexeme();
} else {
$objNum = $nextLexeme;
}
if (!ctype_digit($objNum)) { // it's not a reference
$this->offset = $start;
return null;
}
$genNum = $this->readLexeme();
if (!ctype_digit($genNum)) { // it's not a reference
$this->offset = $start;
return null;
}
$rMark = $this->readLexeme();
if ($rMark != 'R') { // it's not a reference
$this->offset = $start;
return null;
}
$ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
return $ref;
}
/**
* Read numeric PDF object
*
* @param string $nextLexeme
* @return Zend_Pdf_Element_Numeric
*/
private function _readNumeric($nextLexeme = null)
{
if ($nextLexeme === null) {
$nextLexeme = $this->readLexeme();
}
return new Zend_Pdf_Element_Numeric($nextLexeme);
}
/**
* Read inderect object from a PDF stream
*
* @param integer $offset
* @param Zend_Pdf_Element_Reference_Context $context
* @return Zend_Pdf_Element_Object
*/
public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
{
if ($offset === null ) {
return new Zend_Pdf_Element_Null();
}
// Save current offset to make getObject() reentrant
$offsetSave = $this->offset;
$this->offset = $offset;
$this->_context = $context;
$this->_elements = array();
$objNum = $this->readLexeme();
if (!ctype_digit($objNum)) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
}
$genNum = $this->readLexeme();
if (!ctype_digit($genNum)) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
}
$objKeyword = $this->readLexeme();
if ($objKeyword != 'obj') {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
}
$objValue = $this->readElement();
$nextLexeme = $this->readLexeme();
if( $nextLexeme == 'endobj' ) {
/**
* Object is not generated by factory (thus it's not marked as modified object).
* But factory is assigned to the obect.
*/
$obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
foreach ($this->_elements as $element) {
$element->setParentObject($obj);
}
// Restore offset value
$this->offset = $offsetSave;
return $obj;
}
/**
* It's a stream object
*/
if ($nextLexeme != 'stream') {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
}
if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
}
/**
* References are automatically dereferenced at this moment.
*/
$streamLength = $objValue->Length->value;
/**
* 'stream' keyword must be followed by either cr-lf sequence or lf character only.
* This restriction gives the possibility to recognize all cases exactly
*/
if ($this->data[$this->offset] == "\r" &&
$this->data[$this->offset + 1] == "\n" ) {
$this->offset += 2;
} else if ($this->data[$this->offset] == "\n" ) {
$this->offset++;
} else {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
}
$dataOffset = $this->offset;
$this->offset += $streamLength;
$nextLexeme = $this->readLexeme();
if ($nextLexeme != 'endstream') {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
}
$nextLexeme = $this->readLexeme();
if ($nextLexeme != 'endobj') {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
}
$obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
$dataOffset,
$streamLength),
(int)$objNum,
(int)$genNum,
$this->_objFactory->resolve(),
$objValue);
foreach ($this->_elements as $element) {
$element->setParentObject($obj);
}
// Restore offset value
$this->offset = $offsetSave;
return $obj;
}
/**
* Get length of source string
*
* @return integer
*/
public function getLength()
{
return strlen($this->data);
}
/**
* Get source string
*
* @return string
*/
public function getString()
{
return $this->data;
}
/**
* Parse integer value from a binary stream
*
* @param string $stream
* @param integer $offset
* @param integer $size
* @return integer
*/
public static function parseIntFromStream($stream, $offset, $size)
{
$value = 0;
for ($count = 0; $count < $size; $count++) {
$value *= 256;
$value += ord($stream[$offset + $count]);
}
return $value;
}
/**
* Set current context
*
* @param Zend_Pdf_Element_Reference_Context $context
*/
public function setContext(Zend_Pdf_Element_Reference_Context $context)
{
$this->_context = $context;
}
/**
* Object constructor
*
* Note: PHP duplicates string, which is sent by value, only of it's updated.
* Thus we don't need to care about overhead
*
* @param string $pdfString
* @param Zend_Pdf_ElementFactory_Interface $factory
*/
public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
{
$this->data = $source;
$this->_objFactory = $factory;
}
}