diff --git a/modules/translation/library/Translation/Catalog/CatalogParser.php b/modules/translation/library/Translation/Catalog/CatalogParser.php new file mode 100644 index 000000000..f8a385819 --- /dev/null +++ b/modules/translation/library/Translation/Catalog/CatalogParser.php @@ -0,0 +1,417 @@ + "\n", + '\"' => '"', + '\t' => "\t", + '\r' => "\r", + '\\' => "\\" + ); + + /** + * The path of the file being parsed + * + * @var string + */ + protected $catalogPath; + + /** + * The File being parsed + * + * @var File + */ + protected $catalogFile = null; + + /** + * The line that is being parsed + * + * @var string + */ + protected $stack = ''; + + /** + * Number of current line + * + * @var int + */ + protected $lineNumber = 0; + + /** + * Position in current stack + * + * @var int + */ + protected $position = 0; + + /** + * Create a new CatalogParser + * + * @param string $catalogPath The path to the catalog file to parse + */ + public function __construct($catalogPath) + { + $this->catalogPath = $catalogPath; + $this->catalogFile = new File($catalogPath); + $this->catalogFile->setFlags(SplFileObject::DROP_NEW_LINE); + } + + /** + * Parse the given catalog file and return its entries + * + * @param string $catalogPath The path to the catalog file to parse + */ + public static function parsePath($catalogPath) + { + Benchmark::measure('CatalogParser::parsePath()'); + $parser = new static($catalogPath); + return $parser->parse(); + } + + /** + * Parse the catalog file and return its entries + * + * @return array + */ + public function parse() + { + $parsedData = array(); + $currentEntry = array(); + $lastType = null; + $lastNumber = 0; + while ($this->checkStack()) { + $returnedValue = $this->handleStack(); + if (isset($returnedValue['type']) && $returnedValue['type'] === 'newline') { + if (! empty($currentEntry)) { + $parsedData[] = $currentEntry; + $currentEntry = array(); + $lastType = null; + $lastNumber = 0; + } + } else { + if (isset($returnedValue['type'])) { + $lastType = $returnedValue['type']; + } + if (isset($returnedValue['number'])) { + $lastNumber = $returnedValue['number']; + } + if (! isset($currentEntry['obsolete']) || isset($returnedValue['obsolete'])) { + $currentEntry['obsolete'] = isset($returnedValue['obsolete']); + } + if (isset($returnedValue['value'])) { + $currentEntry = $this->processParsedValues( + $currentEntry, + $returnedValue['value'], + $lastType, + $lastNumber + ); + } + } + } + + if (! empty($currentEntry)) { + $parsedData[] = $currentEntry; + } + + return $parsedData; + } + + /** + * Process values parsed by method parse + * + * @param array $currentEntry The current entry of method parse + * @param array|string $returnedValue The value returned by handleStack + * @param string $lastType The type the current value belongs to + * @param int $lastNumber The key the value belongs to if msgstr_plural + * + * @return array + * + * @throws CatalogParserException + */ + protected function processParsedValues($currentEntry, $returnedValue, $lastType, $lastNumber) + { + if ($lastType === null) { + throw new CatalogParserException( + $this->catalogPath, + $this->lineNumber, + $this->position - strlen($returnedValue) - 1, + "Missing type before \"$returnedValue\"" + ); + } + + $escapedChars = static::$escapedChars; + if (is_array($returnedValue)) { + $returnedValue = array_map( + function ($value) use ($escapedChars) { return strtr($value, $escapedChars); }, + $returnedValue + ); + } else { + $returnedValue = strtr($returnedValue, $escapedChars); + } + + if ($lastType === 'msgstr') { + if (isset($currentEntry['msgstr'][$lastNumber])) { + $currentEntry['msgstr'][$lastNumber] .= $returnedValue; + } else { + $currentEntry['msgstr'][$lastNumber] = $returnedValue; + } + } else { + if (isset($currentEntry[$lastType])) { + if (is_array($currentEntry[$lastType])) { + $currentEntry[$lastType] = array_merge($currentEntry[$lastType], $returnedValue); + } else { + $currentEntry[$lastType] .= $returnedValue; + } + } else { + $currentEntry[$lastType] = $returnedValue; + } + } + + return $currentEntry; + } + + /** + * Return whether there is still data available on the stack + * + * @return bool + */ + protected function checkStack() + { + if (! $this->stack) { + if (! $this->catalogFile->eof()) { + $line = $this->catalogFile->fgets(); + $this->lineNumber++; + $this->stack = $line; + $this->position = 0; + return true; + } else { + return false; + } + } else { + return true; + } + } + + /** + * Parse the current main expression and return the result + * + * @return array + */ + protected function handleStack() + { + $this->trimStackLeft(); + if (! $this->stack) { + return array('type' => 'newline'); + } + + switch ($char = $this->pullCharFromStack()) + { + case '#': + return $this->handleHash(); + case '"': + return array('value' => $this->readUntil('"')); + default: + $this->putCharInStack($char); + return $this->handleKeyword($this->readUntil(' ')); + } + } + + /** + * Return first character from stack and remove it + * + * @return string + */ + protected function pullCharFromStack() + { + if (! $this->stack) { + return null; + } + + $char = $this->stack[0]; + $this->stack = substr($this->stack, 1) ?: ''; + $this->position++; + + return $char; + } + + /** + * Parse the current hash expression and return the result + * + * @return array + * + * @throws CatalogParserException + */ + protected function handleHash() + { + switch ($char = $this->pullCharFromStack()) { + case ' ': + return array( + 'type' => 'translator_comments', + 'value' => array($this->getStackAndClear()) + ); + case '.': + return array( + 'type' => 'extracted_comments', + 'value' => array(ltrim($this->getStackAndClear())) + ); + case ':': + return array( + 'type' => 'paths', + 'value' => preg_split('/:\d+\K\s+(?=\S+)/', trim($this->getStackAndClear())) + ); + case ',': + return array( + 'type' => 'flags', + 'value' => array_map('trim', explode(',', $this->getStackAndClear())) + ); + case '|': + return $this->handlePrevious(); + case '~': + return array( + 'obsolete' => true + ); + case null: + return array( + 'type' => 'translator_comments', + 'value' => array('') + ); + default: + throw new CatalogParserException( + $this->catalogPath, + $this->lineNumber, + $this->position, + "Unexpected char \"$char\" after #" + ); + } + } + + /** + * Return stack content and clear it afterwards + * + * @return string + */ + protected function getStackAndClear() + { + $stack = $this->stack; + $this->stack = ''; + + return $stack; + } + + /** + * Handle stack if first two chars were #| + * + * @return array Contains the key value if successful + */ + protected function handlePrevious() + { + $this->trimStackLeft(); + $result = $this->handleKeyword($this->readUntil(' ')); + + return array('type' => 'previous_' . $result['type']); + } + + /** + * Trim whitespaces on the left of the stack + */ + protected function trimStackLeft() + { + $oldStack = $this->stack; + $this->stack = ltrim($this->stack); + + if ($this->stack !== $oldStack) { + $this->position += strlen($oldStack) - strlen($this->stack); + } + } + + /** + * Read until given char comes up + * + * @param string $endPoint Char to search for + * + * @return string + * + * @throws CatalogParserException In case the given char cannot be found + */ + protected function readUntil($endPoint) + { + $pattern = '/(?stack) = preg_split($pattern, $this->stack, 2); + } catch (Exception $_) { + throw new CatalogParserException( + $this->catalogPath, + $this->lineNumber, + $this->position + strlen($this->stack) + 1, + "Missing \"$endPoint\"" + ); + } + + $this->position += strlen($string) + 1; + return $string; + } + + /** + * Check if keyword is correct + * + * @param string $keyword The keyword to check + * + * @return array Returns array with key type if correct + * + * @throws CatalogParserException In case the given keyword is incorrect + */ + protected function handleKeyword($keyword) + { + switch ($keyword) + { + case 'msgctxt': + case 'msgid': + case 'msgid_plural': + case 'msgstr': + return array('type' => $keyword); + case (preg_match('/^(?:msgstr\[([0-9])\])$/', $keyword, $matches) ? true : false): + return array('type' => 'msgstr', 'number' => $matches[1]); + default: + throw new CatalogParserException( + $this->catalogPath, + $this->lineNumber, + $this->position - strlen($keyword), + "\"$keyword\" is not a valid keyword" + ); + } + } + + /** + * Put char in front of the current stack + * + * @param string $char Char to be put in front + */ + protected function putCharInStack($char) + { + $this->position--; + $this->stack = $char . $this->stack; + } +} diff --git a/modules/translation/library/Translation/Exception/CatalogParserException.php b/modules/translation/library/Translation/Exception/CatalogParserException.php new file mode 100644 index 000000000..7eed91ea3 --- /dev/null +++ b/modules/translation/library/Translation/Exception/CatalogParserException.php @@ -0,0 +1,88 @@ +path = $path; + $this->lineNumber = $lineNumber; + $this->position = $position; + + parent::__construct( + 'Syntax error in file %s on line %s and position %s: %s', + $path, + $lineNumber, + $position, + $message + ); + } + + /** + * Return path in which the exception appears + * + * @return string + */ + public function getPath() + { + return $this->path; + } + + /** + * Return line in which the exception appears + * + * @return int + */ + public function getLineNumber() + { + return $this->lineNumber; + } + + /** + * Return position in which the exception appears + * + * @return int + */ + public function getPosition() + { + return $this->position; + } +} \ No newline at end of file diff --git a/modules/translation/test/php/library/Translation/Catalog/CatalogParserTest.php b/modules/translation/test/php/library/Translation/Catalog/CatalogParserTest.php new file mode 100644 index 000000000..3e630feed --- /dev/null +++ b/modules/translation/test/php/library/Translation/Catalog/CatalogParserTest.php @@ -0,0 +1,579 @@ +tmpFilePath = tempnam(sys_get_temp_dir(), 'CatalogParserTest_TestFile'); + } + + public function tearDown() + { + unlink($this->tmpFilePath); + } + + protected function parseString($string) + { + file_put_contents($this->tmpFilePath, $string); + return CatalogParser::parsePath($this->tmpFilePath); + } + + public function testWhetherAMessageContextIsParsedCorrectly() + { + $parserResult = $this->parseString('msgctxt "context of the message"'); + + $this->assertEquals( + 'context of the message', + $parserResult[0]['msgctxt'], + 'CatalogParser does not parse a msgctxt correctly' + ); + } + + public function testWhetherAnObsoleteMessageContextIsParsedCorrectly() + { + $parserResult = $this->parseString('#~ msgctxt "context of the message"'); + + $this->assertEquals( + 'context of the message', + $parserResult[0]['msgctxt'], + 'CatalogParser does not parse a obsolete msgctxt correctly' + ); + } + + public function testWhetherAPreviousMessageContextIsParsedCorrectly() + { + $parserResult = $this->parseString('#| msgctxt "previous context of the message"'); + + $this->assertEquals( + 'previous context of the message', + $parserResult[0]['previous_msgctxt'], + 'CatalogParser does not parse a previous msgctxt correctly' + ); + } + + public function testWhetherAMessageIdIsParsedCorrectly() + { + $parserResult = $this->parseString('msgid "this is a msgid"'); + + $this->assertEquals( + 'this is a msgid', + $parserResult[0]['msgid'], + 'CatalogParser does not parse a msgid correctly' + ); + } + + public function testWhetherAnObsoleteMessageIdIsParsedCorrectly() + { + $parserResult = $this->parseString('#~ msgid "this is a msgid"'); + + $this->assertEquals( + 'this is a msgid', + $parserResult[0]['msgid'], + 'CatalogParser does not parse a obsolete msgid correctly' + ); + } + + public function testWhetherAPreviousMessageIdIsParsedCorrectly() + { + $parserResult = $this->parseString('#| msgid "fuzzy id of the message"'); + + $this->assertEquals( + 'fuzzy id of the message', + $parserResult[0]['previous_msgid'], + 'CatalogParser does not parse a previous msgid correctly' + ); + } + + public function testWhetherAMessageStringIsParsedCorrectly() + { + $parserResult = $this->parseString('msgstr "translation"'); + + $this->assertEquals( + 'translation', + $parserResult[0]['msgstr'][0], + 'CatalogParser does not parse a msgstr correctly' + ); + } + + public function testWhetherAnObsoleteMessageStringIsParsedCorrectly() + { + $parserResult = $this->parseString('#~ msgstr "translation"'); + + $this->assertEquals( + 'translation', + $parserResult[0]['msgstr'][0], + 'CatalogParser does not parse a obsolete msgstr correctly' + ); + } + + public function testWhetherAPluralMessageIdIsParsedCorrectly() + { + $parserResult = $this->parseString('msgid_plural "id_plural"'); + + $this->assertEquals( + 'id_plural', + $parserResult[0]['msgid_plural'], + 'CatalogParser does not parse a msgid_plural correctly' + ); + } + + public function testWhetherAnObsoletePluralMessageIdIsParsedCorrectly() + { + $parserResult = $this->parseString('#~ msgid_plural "id_plural"'); + + $this->assertEquals( + 'id_plural', + $parserResult[0]['msgid_plural'], + 'CatalogParser does not parse a obsolete msgid_plural correctly' + ); + } + + public function testWhetherAPreviousPluralMessageIdIsParsedCorrectly() + { + $parserResult = $this->parseString('#| msgid_plural "id_plural"'); + + $this->assertEquals( + 'id_plural', + $parserResult[0]['previous_msgid_plural'], + 'CatalogParser does not parse a previous msgid_plural correctly' + ); + } + + public function testWhetherAPluralMessageStringIsParsedCorrectly() + { + $parserResult = $this->parseString(<<assertEquals( + 'translation0', + $parserResult[0]['msgstr'][0], + 'CatalogParser does not parse a msgstr[0] correctly' + ); + $this->assertEquals( + 'translation1', + $parserResult[0]['msgstr'][1], + 'CatalogParser does not parse a msgstr[1] correctly' + ); + } + + public function testWhetherAnObsoletePluralMessageStringIsParsedCorrectly() + { + $parserResult = $this->parseString(<<assertEquals( + 'translation0', + $parserResult[0]['msgstr'][0], + 'CatalogParser does not parse a obsolete msgstr[0] correctly' + ); + $this->assertEquals( + 'translation1', + $parserResult[0]['msgstr'][1], + 'CatalogParser does not parse a obsolete msgstr[1] correctly' + ); + } + + public function testWhetherAnObsoleteEntryIsCorrectlyIdentified() + { + $parserResult = $this->parseString('#~ msgid "translation"'); + + $this->assertTrue( + $parserResult[0]['obsolete'], + 'CatalogParser does not identify obsolete entries correctly' + ); + } + + public function testWhetherATranslatorCommentIsParsedCorrectly() + { + $parserResult = $this->parseString('# this is a translator comment'); + + $this->assertEquals( + 'this is a translator comment', + $parserResult[0]['translator_comments'][0], + 'CatalogParser does not parse a translator comment correctly' + ); + } + + public function testWhetherAExtractedCommentIsParsedCorrectly() + { + $parserResult = $this->parseString('#. this is a extracted comment'); + + $this->assertEquals( + 'this is a extracted comment', + $parserResult[0]['extracted_comments'][0], + 'CatalogParser does not parse a extracted comment correctly' + ); + } + + public function testWhetherASinglePathIsParsedCorrectly() + { + $parserResult = $this->parseString('#: /this/is/a/test/path:999'); + + $this->assertEquals( + '/this/is/a/test/path:999', + $parserResult[0]['paths'][0], + 'CatalogParser does not parse paths correctly' + ); + } + + public function testWhetherMultiplePathsAreParsedCorrectly() + { + $parserResult = $this->parseString(<<assertEquals( + '/this/is/a/test/path:999', + $parserResult[0]['paths'][0], + 'CatalogParser does not parse paths correctly' + ); + $this->assertEquals( + '/this/is/another/test/path:99', + $parserResult[0]['paths'][1], + 'CatalogParser does not parse paths correctly' + ); + $this->assertEquals( + '/this/is/still/another/test/path:9', + $parserResult[0]['paths'][2], + 'CatalogParser does not parse paths correctly' + ); + } + + public function testWhetherASingleFlagIsParsedCorrectly() + { + $parserResult = $this->parseString('#, this-is-a-flag'); + + $this->assertEquals( + 'this-is-a-flag', + $parserResult[0]['flags'][0], + 'CatalogParser does not parse flags correctly' + ); + } + + public function testWhetherMultipleFlagsAreParsedCorrectly() + { + $parserResult = $this->parseString(<<assertEquals( + 'this-is-a-flag', + $parserResult[0]['flags'][0], + 'CatalogParser does not parse flags correctly' + ); + $this->assertEquals( + 'this-is-another-flag', + $parserResult[0]['flags'][1], + 'CatalogParser does not parse flags correctly' + ); + $this->assertEquals( + 'this-is-still-another-flag', + $parserResult[0]['flags'][2], + 'CatalogParser does not parse flags correctly' + ); + } + + /** + * @depends testWhetherAExtractedCommentIsParsedCorrectly + * @depends testWhetherAMessageIdIsParsedCorrectly + * @depends testWhetherAMessageContextIsParsedCorrectly + * @depends testWhetherAMessageStringIsParsedCorrectly + * @depends testWhetherAPluralMessageStringIsParsedCorrectly + */ + public function testWhetherEscapedCharactersAreProperlyResolved() + { + $parserResult = $this->parseString(<<assertEquals( + "one line\nanother line", + $parserResult[0]['extracted_comments'][0], + 'CatalogParser does not properly resolve escaped characters in extracted comments' + ); + $this->assertEquals( + "a\nb", + $parserResult[0]['msgid'], + 'CatalogParser does not properly resolve escaped characters in message ids' + ); + $this->assertEquals( + "a\r\nb", + $parserResult[0]['msgctxt'], + 'CatalogParser does not properly resolve escaped characters in message contexts' + ); + $this->assertEquals( + "a\\\\\tb", + $parserResult[0]['msgstr'][0], + 'CatalogParser does not properly resolve escaped characters in message strings' + ); + $this->assertEquals( + 'a"', + $parserResult[0]['msgstr'][1], + 'CatalogParser does not properly resolve escaped characters in plural message strings' + ); + } + + public function testWhetherMissingKeywordsCauseAnError() + { + try { + $this->parseString(' "string with type missing in front"'); + } catch (CatalogParserException $e) { + $this->assertEquals(5, $e->getPosition(), 'CatalogParser reports incorrect error positions'); + return; + } + + $this->fail('CatalogParser does not throw an exception if keyword is missing'); + } + + public function testWhetherInvalidKeywordsCauseAnError() + { + try { + $this->parseString('wrongkeyword "string with invalid type in front"'); + } catch (CatalogParserException $e) { + $this->assertEquals(1, $e->getPosition(), 'CatalogParser reports incorrect error positions'); + return; + } + + $this->fail('CatalogParser does not throw an exception if given keyword is wrong'); + } + + public function testWhetherInvalidEmbeddedKeywordsCauseAnError() + { + try { + $this->parseString('#| wrongkeyword "string with type missing in front"'); + } catch (CatalogParserException $e) { + $this->assertEquals(4, $e->getPosition(), 'CatalogParser reports incorrect error positions'); + return; + } + + $this->fail('CatalogParser does not throw an exception if given previous keyword is wrong'); + } + + public function testWhetherSuperfluousQuotesCauseAnError() + { + try { + $this->parseString('#| msgid "string with a superfluous " in it"'); + } catch (CatalogParserException $e) { + $this->assertEquals(42, $e->getPosition(), 'CatalogParser reports incorrect error positions'); + return; + } + + $this->fail('CatalogParser does not throw an exception if superfluous quotes exist'); + } + + public function testWhetherMissingClosingQuotesCauseAnError() + { + try { + $this->parseString('msgstr "string with missing closing quote'); + } catch (CatalogParserException $e) { + $this->assertEquals(47, $e->getPosition(), 'CatalogParser reports incorrect error positions'); + return; + } + + $this->fail('CatalogParser does not throw an exception if closing quote is missing'); + } + + public function testWhetherMissingSpacesAfterAValidKeywordCauseAnError() + { + try { + $this->parseString('msgstr'); + } catch (CatalogParserException $e) { + $this->assertEquals(7, $e->getPosition(), 'CatalogParser reports incorrect error positions'); + return; + } + + $this->fail('CatalogParser does not throw an exception if space is missing after keyword'); + } + + public function testWhetherInvalidHashIdentifiersCauseAnError() + { + try { + $this->parseString('#a'); + } catch (CatalogParserException $e) { + $this->assertEquals(2, $e->getPosition(), 'CatalogParser reports incorrect error positions'); + return; + } + + $this->fail('CatalogParser does not throw an exception if char after hash is wrong'); + } + + public function testWhetherParserParsesAWholeFile() + { + $parserResult = $this->parseString(<<assertEquals( + "TranslatorComment is here", + $parserResult[0]['translator_comments'][0], + 'CatalogParser does not properly parse the first line in translator comments in complete file test' + ); + + $this->assertEquals( + "and some more translator comments", + $parserResult[0]['translator_comments'][1], + 'CatalogParser does not properly parse the second line in translator comments in complete file test' + ); + + $this->assertEquals( + "Header: Info\nMore header info: info\nMore header info: info\nMore header info: info\n", + $parserResult[0]['msgstr'][0], + 'CatalogParser does not properly parse the header correctly in complete file test' + ); + + $this->assertEquals( + "This is an extracted comment", + $parserResult[1]['extracted_comments'][0], + 'CatalogParser does not properly parse extracted comments correctly in complete file test' + ); + + $this->assertEquals( + "/this/is/a/path:123", + $parserResult[1]['paths'][0], + 'CatalogParser does not properly parse the first path correctly in complete file test' + ); + + $this->assertEquals( + "/this/is/another/path:456", + $parserResult[1]['paths'][1], + 'CatalogParser does not properly parse the second path correctly in complete file test' + ); + + $this->assertEquals( + "/this/is/yet/another/path:789", + $parserResult[1]['paths'][2], + 'CatalogParser does not properly parse the third path correctly in complete file test' + ); + + $this->assertEquals( + "php-format", + $parserResult[1]['flags'][0], + 'CatalogParser does not properly parse the first flag correctly in complete file test' + ); + + $this->assertEquals( + "fuzzy", + $parserResult[1]['flags'][1], + 'CatalogParser does not properly parse the second flag correctly in complete file test' + ); + + $this->assertEquals( + "Message context", + $parserResult[1]['msgctxt'], + 'CatalogParser does not properly parse the message context correctly in complete file test' + ); + + $this->assertEquals( + "To be translated", + $parserResult[1]['previous_msgid'], + 'CatalogParser does not properly parse the previous message id correctly in complete file test' + ); + + $this->assertEquals( + "To translate", + $parserResult[1]['msgid'], + 'CatalogParser does not properly parse the message id correctly in complete file test' + ); + + $this->assertEquals( + "Zu übersetzen", + $parserResult[1]['msgstr'][0], + 'CatalogParser does not properly parse the message correctly in complete file test' + ); + + $this->assertEquals( + "This is a comment for a normal entry", + $parserResult[2]['translator_comments'][0], + 'CatalogParser does not properly parse the translator comments correctly in complete file test' + ); + + $this->assertEquals( + "Translation for plural", + $parserResult[2]['msgid_plural'], + 'CatalogParser does not properly parse message id plural correctly in complete file test' + ); + + $this->assertEquals( + "Übersetzung für plural 1", + $parserResult[2]['msgstr'][0], + 'CatalogParser does not properly parse the first plural message correctly in complete file test' + ); + + $this->assertEquals( + "Übersetzung für plural 2", + $parserResult[2]['msgstr'][1], + 'CatalogParser does not properly parse the second plural message correctly in complete file test' + ); + + $this->assertTrue( + $parserResult[3]['obsolete'], + 'CatalogParser does not set obsolete correctly in complete file test' + ); + + $this->assertEquals( + "Obsolete message id", + $parserResult[3]['msgid'], + 'CatalogParser does not properly parse obsolete message id correctly in complete file test' + ); + + $this->assertEquals( + "Obsolete message string", + $parserResult[3]['msgstr'][0], + 'CatalogParser does not properly parse obsolete message correctly in complete file test' + ); + + } +} \ No newline at end of file