diff options
Diffstat (limited to 'MLEB/Translate/ffs/AndroidXmlFFS.php')
-rw-r--r-- | MLEB/Translate/ffs/AndroidXmlFFS.php | 165 |
1 files changed, 68 insertions, 97 deletions
diff --git a/MLEB/Translate/ffs/AndroidXmlFFS.php b/MLEB/Translate/ffs/AndroidXmlFFS.php index 636cb3b4..2e947c61 100644 --- a/MLEB/Translate/ffs/AndroidXmlFFS.php +++ b/MLEB/Translate/ffs/AndroidXmlFFS.php @@ -4,7 +4,7 @@ * * @file * @author Niklas Laxström - * @license GPL-2.0+ + * @license GPL-2.0-or-later */ /** @@ -13,21 +13,17 @@ * @ingroup FFS */ class AndroidXmlFFS extends SimpleFFS { - protected static $pluralWords = array( - 'zero' => 1, - 'one' => 1, - 'two' => 1, - 'few' => 1, - 'many' => 1, - 'other' => 1, - ); + public function __construct( FileBasedMessageGroup $group ) { + parent::__construct( $group ); + $this->flattener = $this->getFlattener(); + } public function supportsFuzzy() { return 'yes'; } public function getFileExtensions() { - return array( '.xml' ); + return [ '.xml' ]; } /** @@ -37,7 +33,7 @@ class AndroidXmlFFS extends SimpleFFS { public function readFromVariable( $data ) { $reader = new SimpleXMLElement( $data ); - $messages = array(); + $messages = []; $mangler = $this->group->getMangler(); /** @var SimpleXMLElement $element */ @@ -47,11 +43,11 @@ class AndroidXmlFFS extends SimpleFFS { if ( $element->getName() === 'string' ) { $value = $this->readElementContents( $element ); } elseif ( $element->getName() === 'plurals' ) { - $forms = array(); + $forms = []; foreach ( $element as $item ) { $forms[(string)$item['quantity']] = $this->readElementContents( $item ); } - $value = $this->flattenPlural( $forms ); + $value = $this->flattener->flattenCLDRPlurals( $forms ); } else { wfDebug( __METHOD__ . ': Unknown XML element name.' ); continue; @@ -64,10 +60,26 @@ class AndroidXmlFFS extends SimpleFFS { $messages[$key] = $value; } - return array( - 'AUTHORS' => array(), // @todo + return [ + 'AUTHORS' => $this->scrapeAuthors( $data ), 'MESSAGES' => $mangler->mangle( $messages ), - ); + ]; + } + + protected function scrapeAuthors( $string ) { + $match = []; + preg_match( '~<!-- Authors:\n((?:\* .*\n)*)-->~', $string, $match ); + if ( !$match ) { + return []; + } + + $authors = $matches = []; + preg_match_all( '~\* (.*)~', $match[ 1 ], $matches ); + foreach ( $matches[1] as $author ) { + // PHP7: \u{2011} + $authors[] = str_replace( "\xE2\x80\x91\xE2\x80\x91", '--', $author ); + } + return $authors; } protected function readElementContents( $element ) { @@ -82,14 +94,41 @@ class AndroidXmlFFS extends SimpleFFS { // Add backslash to escape it too. $escaped = '\\' . $escaped; } + // All html entities seen would be inserted by translators themselves. + // Treat them as plain text. + $escaped = str_replace( '&', '&', $escaped ); + + // Newlines must be escaped + $escaped = str_replace( "\n", '\n', $escaped ); return $escaped; } + protected function doAuthors( MessageCollection $collection ) { + $authors = $collection->getAuthors(); + $authors = $this->filterAuthors( $authors, $collection->code ); + + if ( !$authors ) { + return ''; + } + + $output = "\n<!-- Authors:\n"; + + foreach ( $authors as $author ) { + // Since -- is not allowed in XML comments, we rewrite them to + // U+2011 (non-breaking hyphen). PHP7: \u{2011} + $author = str_replace( '--', "\xE2\x80\x91\xE2\x80\x91", $author ); + $output .= "* $author\n"; + } + + $output .= "-->\n"; + + return $output; + } + protected function writeReal( MessageCollection $collection ) { - $template = <<<XML -<?xml version="1.0" encoding="utf-8"?> -<resources></resources> -XML; + $template = '<?xml version="1.0" encoding="utf-8"?>'; + $template .= $this->doAuthors( $collection ); + $template .= '<resources></resources>'; $writer = new SimpleXMLElement( $template ); $mangler = $this->group->getMangler(); @@ -108,13 +147,13 @@ XML; $value = $m->translation(); $value = str_replace( TRANSLATE_FUZZY, '', $value ); - // Handle plurals - if ( strpos( $value, '{{PLURAL' ) === false ) { + $plurals = $this->flattener->unflattenCLDRPlurals( '', $value ); + + if ( $plurals === false ) { $element = $writer->addChild( 'string', $this->formatElementContents( $value ) ); } else { $element = $writer->addChild( 'plurals' ); - $forms = $this->unflattenPlural( $value ); - foreach ( $forms as $quantity => $content ) { + foreach ( $plurals as $quantity => $content ) { $item = $element->addChild( 'item', $this->formatElementContents( $content ) ); $item->addAttribute( 'quantity', $quantity ); } @@ -135,80 +174,12 @@ XML; return $dom->saveXML(); } - /** - * Flattens array of plurals into string. - * - * @param array $forms array - * @return string - */ - protected function flattenPlural( array $forms ) { - $pls = '{{PLURAL'; - foreach ( $forms as $key => $value ) { - $pls .= "|$key=$value"; - } - - $pls .= "}}"; - return $pls; + protected function getFlattener() { + $flattener = new ArrayFlattener( '', true ); + return $flattener; } - /** - * Converts the flattened plural into messages - * - * @param string $message - * @return array - */ - protected function unflattenPlural( $message ) { - $regex = '~\{\{PLURAL\|(.*?)}}~s'; - $matches = array(); - $match = array(); - - while ( preg_match( $regex, $message, $match ) ) { - $uniqkey = TranslateUtils::getPlaceholder(); - $matches[$uniqkey] = $match; - $message = preg_replace( $regex, $uniqkey, $message, 1 ); - } - - // No plurals, should not happen. - if ( !count( $matches ) ) { - return array(); - } - - // The final array of alternative plurals forms. - $alts = array(); - - /* - * Then loop trough each plural block and replacing the placeholders - * to construct the alternatives. Produces invalid output if there is - * multiple plural bocks which don't have the same set of keys. - */ - $pluralChoice = implode( '|', array_keys( self::$pluralWords ) ); - $regex = "~($pluralChoice)\s*=\s*(.+)~s"; - foreach ( $matches as $ph => $plu ) { - $forms = explode( '|', $plu[1] ); - - foreach ( $forms as $form ) { - if ( $form === '' ) { - continue; - } - - $match = array(); - if ( !preg_match( $regex, $form, $match ) ) { - // No quantity key was provided - continue; - } - - $formWord = $match[1]; - $value = $match[2]; - if ( !isset( $alts[$formWord] ) ) { - $alts[$formWord] = $message; - } - - $string = $alts[$formWord]; - - $alts[$formWord] = str_replace( $ph, $value, $string ); - } - } - - return $alts; + public function isContentEqual( $a, $b ) { + return $this->flattener->compareContent( $a, $b ); } } |