diff options
Diffstat (limited to 'MLEB/Translate/src')
49 files changed, 3947 insertions, 0 deletions
diff --git a/MLEB/Translate/src/Jobs/GenericTranslateJob.php b/MLEB/Translate/src/Jobs/GenericTranslateJob.php new file mode 100644 index 00000000..a74677a7 --- /dev/null +++ b/MLEB/Translate/src/Jobs/GenericTranslateJob.php @@ -0,0 +1,66 @@ +<?php +/** + * Contains a generic job class + * + * @file + * @author Abijeet Patro + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Jobs; + +use MediaWiki\Logger\LoggerFactory; +use Psr\Log\LoggerInterface; + +/** + * Generic Job class extended by other jobs. Provides logging functionality. + * @since 2019.08 + */ +abstract class GenericTranslateJob extends \Job { + /** + * A logger instance + * @var LoggerInterface + */ + protected $logger; + + /** + * Channel name to be used during logging + * @var string + */ + private const CHANNEL_NAME = 'Translate.Jobs'; + + /** + * Returns a logger instance with the channel name. Can have only a single + * channel per job, so once instantiated, the same instance is returned. + * @return LoggerInterface + */ + protected function getLogger() { + if ( $this->logger ) { + return $this->logger; + } + + $this->logger = LoggerFactory::getInstance( self::CHANNEL_NAME ); + return $this->logger; + } + + protected function getLogPrefix() { + return '[Job: ' . $this->getType() . '][Request ID: ' . $this->getRequestId() . + '][Title: ' . $this->title->getPrefixedText() . '] '; + } + + protected function logInfo( $msg, $context = [] ) { + $this->getLogger()->info( $this->getLogPrefix() . $msg, $context ); + } + + protected function logDebug( $msg, $context = [] ) { + $this->getLogger()->debug( $this->getLogPrefix() . $msg, $context ); + } + + protected function logError( $msg, $context = [] ) { + $this->getLogger()->error( $this->getLogPrefix() . $msg, $context ); + } + + protected function logWarning( $msg, $context = [] ) { + $this->getLogger()->warning( $this->getLogPrefix() . $msg, $context ); + } +} diff --git a/MLEB/Translate/src/MessageSync/MessageSourceChange.php b/MLEB/Translate/src/MessageSync/MessageSourceChange.php new file mode 100644 index 00000000..b9f51caf --- /dev/null +++ b/MLEB/Translate/src/MessageSync/MessageSourceChange.php @@ -0,0 +1,561 @@ +<?php +/** + * Contains a class to track changes to the messages when importing messages from remote source. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @file + */ + +namespace MediaWiki\Extensions\Translate\MessageSync; + +use InvalidArgumentException; + +/** + * Class is use to track the changes made when importing messages from the remote sources + * using processMessageChanges. Also provides an interface to query these changes, and + * update them. + * @since 2019.10 + */ +class MessageSourceChange { + /** + * @var array[][][] + * @codingStandardsIgnoreStart + * @phan-var array<string,array<string,array<string|int,array{key:string,content:string,similarity?:float,matched_to?:string,previous_state?:string}>>> + * @codingStandardsIgnoreEnd + */ + protected $changes = []; + + public const ADDITION = 'addition'; + public const CHANGE = 'change'; + public const DELETION = 'deletion'; + public const RENAME = 'rename'; + public const NONE = 'none'; + + private const SIMILARITY_THRESHOLD = 0.9; + + /** + * Contains a mapping of mesasge type, and the corresponding addition function + * @var callable[] + */ + protected $addFunctionMap; + + /** + * Contains a mapping of message type, and the corresponding removal function + * @var callable[] + */ + protected $removeFunctionMap; + + /** + * @param array[][][] $changes + */ + public function __construct( $changes = [] ) { + $this->changes = $changes; + $this->addFunctionMap = [ + self::ADDITION => [ $this, 'addAddition' ], + self::DELETION => [ $this, 'addDeletion' ], + self::CHANGE => [ $this, 'addChange' ] + ]; + + $this->removeFunctionMap = [ + self::ADDITION => [ $this, 'removeAdditions' ], + self::DELETION => [ $this, 'removeDeletions' ], + self::CHANGE => [ $this, 'removeChanges' ] + ]; + } + + /** + * Add a change under a message group for a specific language + * @param string $language + * @param string $key + * @param string $content + */ + public function addChange( $language, $key, $content ) { + $this->addModification( $language, self::CHANGE, $key, $content ); + } + + /** + * Add an addition under a message group for a specific language + * @param string $language + * @param string $key + * @param string $content + */ + public function addAddition( $language, $key, $content ) { + $this->addModification( $language, self::ADDITION, $key, $content ); + } + + /** + * Adds a deletion under a message group for a specific language + * @param string $language + * @param string $key + * @param string $content + */ + public function addDeletion( $language, $key, $content ) { + $this->addModification( $language, self::DELETION, $key, $content ); + } + + /** + * Adds a rename under a message group for a specific language + * @param string $language + * @param string[] $addedMessage + * @param string[] $deletedMessage + * @param float $similarity + */ + public function addRename( $language, $addedMessage, $deletedMessage, $similarity = 0 ) { + $this->changes[$language][self::RENAME][$addedMessage['key']] = [ + 'content' => $addedMessage['content'], + 'similarity' => $similarity, + 'matched_to' => $deletedMessage['key'], + 'previous_state' => self::ADDITION, + 'key' => $addedMessage['key'] + ]; + + $this->changes[$language][self::RENAME][$deletedMessage['key']] = [ + 'content' => $deletedMessage['content'], + 'similarity' => $similarity, + 'matched_to' => $addedMessage['key'], + 'previous_state' => self::DELETION, + 'key' => $deletedMessage['key'] + ]; + } + + public function setRenameState( $language, $msgKey, $state ) { + $possibleStates = [ self::ADDITION, self::CHANGE, self::DELETION, + self::NONE, self::RENAME ]; + if ( !in_array( $state, $possibleStates ) ) { + throw new InvalidArgumentException( + "Invalid state passed - '$state'. Possible states - " + . implode( ', ', $possibleStates ) + ); + } + + $languageChanges = null; + if ( isset( $this->changes[ $language ] ) ) { + $languageChanges = &$this->changes[ $language ]; + } + if ( $languageChanges !== null && isset( $languageChanges[ 'rename' ][ $msgKey ] ) ) { + $languageChanges[ 'rename' ][ $msgKey ][ 'previous_state' ] = $state; + } + } + + /** + * @param string $language + * @param string $type + * @param string $key + * @param string $content + */ + protected function addModification( $language, $type, $key, $content ) { + $this->changes[$language][$type][] = [ + 'key' => $key, + 'content' => $content, + ]; + } + + /** + * Fetch changes for a message group under a language + * @param string $language + * @return array[] + */ + public function getChanges( $language ) { + return $this->getModification( $language, self::CHANGE ); + } + + /** + * Fetch deletions for a message group under a language + * @param string $language + * @return array[] + */ + public function getDeletions( $language ) { + return $this->getModification( $language, self::DELETION ); + } + + /** + * Fetch additions for a message group under a language + * @param string $language + * @return array[] + */ + public function getAdditions( $language ) { + return $this->getModification( $language, self::ADDITION ); + } + + /** + * Finds a message with the given key across different types of modifications. + * @param string $language + * @param string $key + * @param string[] $possibleStates + * @param string|null &$modificationType + * @return array|null + */ + public function findMessage( $language, $key, $possibleStates = [], &$modificationType = null ) { + $allChanges = []; + $allChanges[self::ADDITION] = $this->getAdditions( $language ); + $allChanges[self::DELETION] = $this->getDeletions( $language ); + $allChanges[self::CHANGE] = $this->getChanges( $language ); + $allChanges[self::RENAME] = $this->getRenames( $language ); + + if ( $possibleStates === [] ) { + $possibleStates = [ self::ADDITION, self::CHANGE, self::DELETION, self::RENAME ]; + } + + foreach ( $allChanges as $type => $modifications ) { + if ( !in_array( $type, $possibleStates ) ) { + continue; + } + + if ( $type === self::RENAME ) { + if ( isset( $modifications[$key] ) ) { + $modificationType = $type; + return $modifications[$key]; + } + continue; + } + + foreach ( $modifications as $modification ) { + $currentKey = $modification['key']; + if ( $currentKey === $key ) { + $modificationType = $type; + return $modification; + } + } + } + + $modificationType = null; + return null; + } + + /** + * Break reanmes, and put messages back into their previous state. + * @param string $languageCode + * @param string $msgKey + * @return string|null previous state of the message + */ + public function breakRename( $languageCode, $msgKey ) { + $msg = $this->findMessage( $languageCode, $msgKey, [ self::RENAME ] ); + if ( $msg === null ) { + return null; + } + $matchedMsg = $this->getMatchedMessage( $languageCode, $msg['key'] ); + if ( $matchedMsg === null ) { + return null; + } + + // Remove them from the renames array + $this->removeRenames( $languageCode, [ $matchedMsg['key'], $msg['key'] ] ); + + $matchedMsgState = $matchedMsg[ 'previous_state' ]; + $msgState = $msg[ 'previous_state' ]; + + // Add them to the changes under the appropriate state + if ( $matchedMsgState !== self::NONE ) { + if ( $matchedMsgState === self::CHANGE ) { + $matchedMsg['key'] = $msg['key']; + } + call_user_func( + $this->addFunctionMap[ $matchedMsgState ], + $languageCode, + $matchedMsg['key'], + $matchedMsg['content'] + ); + } + + if ( $msgState !== self::NONE ) { + if ( $msgState === self::CHANGE ) { + $msg['key'] = $matchedMsg['key']; + } + call_user_func( + $this->addFunctionMap[ $msgState ], + $languageCode, + $msg['key'], + $msg['content'] + ); + } + + return $msgState; + } + + /** + * Fetch renames for a message group under a language + * @param string $language + * @return array[] + */ + public function getRenames( $language ) { + $renames = $this->getModification( $language, self::RENAME ); + foreach ( $renames as $key => &$rename ) { + $rename['key'] = $key; + } + + return $renames; + } + + /** + * @param string $language + * @param string $type + * @return array[] + */ + protected function getModification( $language, $type ) { + return $this->changes[$language][$type] ?? []; + } + + /** + * Remove additions for a language under the group. + * @param string $language + * @param array|null $keysToRemove + */ + public function removeAdditions( $language, $keysToRemove ) { + $this->removeModification( $language, self::ADDITION, $keysToRemove ); + } + + /** + * Remove deletions for a language under the group. + * @param string $language + * @param array|null $keysToRemove + */ + public function removeDeletions( $language, $keysToRemove ) { + $this->removeModification( $language, self::DELETION, $keysToRemove ); + } + + /** + * Remove changes for a language under the group. + * @param string $language + * @param array|null $keysToRemove + */ + public function removeChanges( $language, $keysToRemove ) { + $this->removeModification( $language, self::CHANGE, $keysToRemove ); + } + + /** + * Remove renames for a language under the group. + * @param string $language + * @param array|null $keysToRemove + */ + public function removeRenames( $language, $keysToRemove ) { + $this->removeModification( $language, self::RENAME, $keysToRemove ); + } + + /** + * Remove modifications based on the type. Avoids usage of ugly if / switch + * statement. + * @param string $language + * @param array $keysToRemove + * @param string $type - One of ADDITION, CHANGE, DELETION + */ + public function removeBasedOnType( $language, $keysToRemove, $type ) { + $callable = $this->removeFunctionMap[ $type ] ?? null; + + if ( $callable === null ) { + throw new InvalidArgumentException( 'Type should be one of ' . + implode( ', ', [ self::ADDITION, self::CHANGE, self::DELETION ] ) . + ". Invalid type $type passed." + ); + } + + call_user_func( $callable, $language, $keysToRemove ); + } + + /** + * Remove all language related changes for a group. + * @param string $language + */ + public function removeChangesForLanguage( $language ) { + unset( $this->changes[ $language ] ); + } + + protected function removeModification( $language, $type, $keysToRemove = null ) { + if ( !isset( $this->changes[$language][$type] ) ) { + return; + } + + if ( $keysToRemove === null ) { + unset( $this->changes[$language][$type] ); + } + + if ( $keysToRemove === [] ) { + return; + } + + if ( $type === self::RENAME ) { + $this->changes[$language][$type] = + array_diff_key( $this->changes[$language][$type], array_flip( $keysToRemove ) ); + } else { + $this->changes[$language][$type] = array_filter( + $this->changes[$language][$type], + function ( $change ) use ( $keysToRemove ) { + if ( in_array( $change['key'], $keysToRemove, true ) ) { + return false; + } + return true; + } + ); + } + } + + /** + * Return all modifications for the group. + * @return array[][][] + */ + public function getAllModifications() { + return $this->changes; + } + + /** + * Get all for a language under the group. + * @param string $language + * @return array[][] + */ + public function getModificationsForLanguage( $language ) { + return $this->changes[$language] ?? []; + } + + /** + * Loads the changes, and returns an instance of the class. + * @param array $changesData + * @return self + */ + public static function loadModifications( $changesData ) { + return new self( $changesData ); + } + + /** + * Get all language keys with modifications under the group + * @return string[] + */ + public function getLanguages() { + return array_keys( $this->changes ); + } + + /** + * Determines if the group has only a certain type of change under a language. + * + * @param string $language + * @param string $type + * @return bool + */ + public function hasOnly( $language, $type ) { + $deletions = $this->getDeletions( $language ); + $additions = $this->getAdditions( $language ); + $renames = $this->getRenames( $language ); + $changes = $this->getChanges( $language ); + $hasOnlyAdditions = $hasOnlyRenames = + $hasOnlyChanges = $hasOnlyDeletions = true; + + if ( $deletions ) { + $hasOnlyAdditions = $hasOnlyRenames = $hasOnlyChanges = false; + } + + if ( $renames ) { + $hasOnlyDeletions = $hasOnlyAdditions = $hasOnlyChanges = false; + } + + if ( $changes ) { + $hasOnlyAdditions = $hasOnlyRenames = $hasOnlyDeletions = false; + } + + if ( $additions ) { + $hasOnlyDeletions = $hasOnlyRenames = $hasOnlyChanges = false; + } + + if ( $type === self::DELETION ) { + $response = $hasOnlyDeletions; + } elseif ( $type === self::RENAME ) { + $response = $hasOnlyRenames; + } elseif ( $type === self::CHANGE ) { + $response = $hasOnlyChanges; + } elseif ( $type === self::ADDITION ) { + $response = $hasOnlyAdditions; + } else { + throw new InvalidArgumentException( "Unknown $type passed." ); + } + + return $response; + } + + /** + * Checks if the previous state of a renamed message matches a given value + * @param string $languageCode + * @param string $key + * @param string[] $types + * @return bool + */ + public function isPreviousState( $languageCode, $key, array $types ) { + $msg = $this->findMessage( $languageCode, $key, [ self::RENAME ] ); + + return isset( $msg['previous_state'] ) && in_array( $msg['previous_state'], $types ); + } + + /** + * Get matched rename message for a given key + * @param string $languageCode + * @param string $key + * @return array Matched message if found, else null + */ + public function getMatchedMessage( $languageCode, $key ) { + $matchedKey = $this->getMatchedKey( $languageCode, $key ); + if ( $matchedKey ) { + return $this->changes[ $languageCode ][ self::RENAME ][ $matchedKey ] ?? null; + } + + return null; + } + + /** + * Get matched rename key for a given key + * @param string $languageCode + * @param string $key + * @return string|null Matched key if found, else null + */ + public function getMatchedKey( $languageCode, $key ) { + return $this->changes[ $languageCode ][ self::RENAME ][ $key ][ 'matched_to' ] ?? null; + } + + /** + * Returns the calculated similarity for a rename + * @param string $languageCode + * @param string $key + * @return float|null + */ + public function getSimilarity( $languageCode, $key ) { + $msg = $this->findMessage( $languageCode, $key, [ self::RENAME ] ); + + return $msg[ 'similarity' ] ?? null; + } + + /** + * Checks if a given key is equal to matched rename message + * @param string $languageCode + * @param string $key + * @return bool + */ + public function isEqual( $languageCode, $key ) { + $msg = $this->findMessage( $languageCode, $key, [ self::RENAME ] ); + return $msg && $this->areStringsEqual( $msg[ 'similarity' ] ); + } + + /** + * Checks if a given key is similar to matched rename message + * + * @param string $languageCode + * @param string $key + * @return bool + */ + public function isSimilar( $languageCode, $key ) { + $msg = $this->findMessage( $languageCode, $key, [ self::RENAME ] ); + return $msg && $this->areStringsSimilar( $msg[ 'similarity' ] ); + } + + /** + * Checks if the similarity percent passed passes the min threshold + * @param float $similarity + * @return bool + */ + public function areStringsSimilar( $similarity ) { + return $similarity >= self::SIMILARITY_THRESHOLD; + } + + /** + * Checks if the similarity percent passed + * @param float $similarity + * @return bool + */ + public function areStringsEqual( $similarity ) { + return $similarity === 1; + } +} diff --git a/MLEB/Translate/src/MessageValidator/ValidationResult.php b/MLEB/Translate/src/MessageValidator/ValidationResult.php new file mode 100644 index 00000000..173da0ec --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/ValidationResult.php @@ -0,0 +1,105 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator; + +use IContextSource; +use InvalidArgumentException; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; + +/** + * Container for validation issues returned by MessageValidator. + * + * @author Abijeet Patro + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @since 2020.06 (originally 2019.06) + */ +class ValidationResult { + /** @var ValidationIssues */ + protected $errors; + + /** @var ValidationIssues */ + protected $warnings; + + public function __construct( ValidationIssues $errors, ValidationIssues $warnings ) { + $this->errors = $errors; + $this->warnings = $warnings; + } + + public function hasIssues(): bool { + return $this->hasWarnings() || $this->hasErrors(); + } + + public function getIssues(): ValidationIssues { + $issues = new ValidationIssues(); + $issues->merge( $this->errors ); + $issues->merge( $this->warnings ); + return $issues; + } + + public function hasWarnings(): bool { + return $this->warnings->hasIssues(); + } + + public function hasErrors(): bool { + return $this->errors->hasIssues(); + } + + public function getWarnings(): ValidationIssues { + return $this->warnings; + } + + public function getErrors(): ValidationIssues { + return $this->errors; + } + + public function getDescriptiveWarnings( IContextSource $context ): array { + return $this->expandMessages( $context, $this->warnings ); + } + + public function getDescriptiveErrors( IContextSource $context ): array { + return $this->expandMessages( $context, $this->errors ); + } + + private function expandMessages( IContextSource $context, ValidationIssues $issues ): array { + $expandMessage = function ( ValidationIssue $issue ) use ( $context ): string { + $params = $this->fixMessageParams( $context, $issue->messageParams() ); + return $context->msg( $issue->messageKey() )->params( $params )->parse(); + }; + + return array_map( $expandMessage, iterator_to_array( $issues ) ); + } + + private function fixMessageParams( IContextSource $context, array $params ): array { + $out = []; + $lang = $context->getLanguage(); + + foreach ( $params as $param ) { + if ( !is_array( $param ) ) { + $out[] = $param; + } else { + [ $type, $value ] = $param; + if ( $type === 'COUNT' ) { + $out[] = $lang->formatNum( $value ); + } elseif ( $type === 'PARAMS' ) { + $out[] = $lang->commaList( $value ); + } elseif ( $type === 'PLAIN-PARAMS' ) { + $value = array_map( 'wfEscapeWikiText', $value ); + $out[] = $lang->commaList( $value ); + } elseif ( $type === 'PLAIN' ) { + $out[] = wfEscapeWikiText( $value ); + } elseif ( $type === 'MESSAGE' ) { + $messageKey = array_shift( $value ); + $messageParams = $this->fixMessageParams( $context, $value ); + $out[] = $context->msg( $messageKey )->params( $messageParams ); + } else { + throw new InvalidArgumentException( "Unknown type $type" ); + } + } + } + + return $out; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validator.php b/MLEB/Translate/src/MessageValidator/Validator.php new file mode 100644 index 00000000..27325703 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validator.php @@ -0,0 +1,21 @@ +<?php +/** + * Interface to be implemented by Validators. + * + * @file + * @author Abijeet Patro + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\MessageValidator; + +use TMessage; + +/** + * Interface class built to be implement by validators + * @since 2019.06 + * @deprecated since 2020.06 + */ +interface Validator { + public function validate( TMessage $message, $code, array &$notices ); +} diff --git a/MLEB/Translate/src/MessageValidator/ValidatorFactory.php b/MLEB/Translate/src/MessageValidator/ValidatorFactory.php new file mode 100644 index 00000000..0bc46171 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/ValidatorFactory.php @@ -0,0 +1,118 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator; + +use InvalidArgumentException; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\BraceBalanceValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\EscapeCharacterValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\GettextNewlineValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\GettextPluralValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\InsertableRegexValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\InsertableRubyVariableValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\IosVariableValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\MatchSetValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\MediaWikiLinkValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\MediaWikiPageNameValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\MediaWikiParameterValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\MediaWikiPluralValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\MediaWikiTimeListValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\NewlineValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\NumericalParameterValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\PrintfValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\PythonInterpolationValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\ReplacementValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\SmartFormatPluralValidator; +use MediaWiki\Extensions\Translate\MessageValidator\Validators\UnicodePluralValidator; +use MediaWiki\Extensions\Translate\Validation\LegacyValidatorAdapter; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use RuntimeException; + +/** + * A factory class used to instantiate instances of pre-provided Validators + * + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.06 + */ +class ValidatorFactory { + /** @var string[] */ + protected static $validators = [ + 'BraceBalance' => BraceBalanceValidator::class, + 'EscapeCharacter' => EscapeCharacterValidator::class, + 'GettextNewline' => GettextNewlineValidator::class, + 'GettextPlural' => GettextPluralValidator::class, + 'InsertableRegex' => InsertableRegexValidator::class, + 'InsertableRubyVariable' => InsertableRubyVariableValidator::class, + 'IosVariable' => IosVariableValidator::class, + 'MatchSet' => MatchSetValidator::class, + 'MediaWikiLink' => MediaWikiLinkValidator::class, + 'MediaWikiPageName' => MediaWikiPageNameValidator::class, + 'MediaWikiParameter' => MediaWikiParameterValidator::class, + 'MediaWikiPlural' => MediaWikiPluralValidator::class, + 'MediaWikiTimeList' => MediaWikiTimeListValidator::class, + 'Newline' => NewlineValidator::class, + 'NumericalParameter' => NumericalParameterValidator::class, + 'Printf' => PrintfValidator::class, + 'PythonInterpolation' => PythonInterpolationValidator::class, + 'Replacement' => ReplacementValidator::class, + 'SmartFormatPlural' => SmartFormatPluralValidator::class, + 'UnicodePlural' => UnicodePluralValidator::class, + // BC: remove when unused + 'WikiLink' => MediaWikiLinkValidator::class, + // BC: remove when unused + 'WikiParameter' => MediaWikiParameterValidator::class, + ]; + + /** + * Returns a validator instance based on the id specified + * + * @param string $id Id of the pre-defined validator class + * @param mixed|null $params + * @throws InvalidArgumentException + * @return MessageValidator + */ + public static function get( $id, $params = null ) { + if ( !isset( self::$validators[ $id ] ) ) { + throw new InvalidArgumentException( "Could not find validator with id - '$id'. " ); + } + + return self::loadInstance( self::$validators[ $id ], $params ); + } + + /** + * Takes a Validator class name, and returns an instance of that class. + * + * @param string $class Custom validator class name + * @param mixed|null $params + * @throws InvalidArgumentException + * @return MessageValidator + */ + public static function loadInstance( $class, $params = null ): MessageValidator { + if ( !class_exists( $class ) ) { + throw new InvalidArgumentException( "Could not find validator class - '$class'. " ); + } + + $validator = new $class( $params ); + + if ( $validator instanceof Validator ) { + return new LegacyValidatorAdapter( $validator ); + } + + return $validator; + } + + /** + * Adds / Updates available list of validators + * @param string $id Id of the validator + * @param string $validator Validator class name + * @param string $ns + */ + public static function set( $id, $validator, $ns = '\\' ) { + if ( !class_exists( $ns . $validator ) ) { + throw new RuntimeException( 'Could not find validator class - ' . $ns . $validator ); + } + + self::$validators[ $id ] = $ns . $validator; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/BraceBalanceValidator.php b/MLEB/Translate/src/MessageValidator/Validators/BraceBalanceValidator.php new file mode 100644 index 00000000..cd277c6c --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/BraceBalanceValidator.php @@ -0,0 +1,55 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * Handles brace balance validation + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.06 + */ +class BraceBalanceValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $definition = $message->definition(); + $translation = $message->translation(); + $balanceIssues = []; + $braceTypes = [ + [ '{', '}' ], + [ '[', ']' ], + [ '(', ')' ], + ]; + + foreach ( $braceTypes as [ $open, $close ] ) { + $definitionBalance = $this->getBalance( $definition, $open, $close ); + $translationBalance = $this->getBalance( $translation, $open, $close ); + + if ( $definitionBalance === 0 && $translationBalance !== 0 ) { + $balanceIssues[] = "$open$close: $translationBalance"; + } + } + + $issues = new ValidationIssues(); + if ( $balanceIssues ) { + $params = [ + [ 'PARAMS', $balanceIssues ], + [ 'COUNT', count( $balanceIssues ) ], + ]; + + // Create an issue if braces are unbalanced in translation, but balanced in the definition + $issue = new ValidationIssue( 'balance', 'brace', 'translate-checks-balance', $params ); + $issues->add( $issue ); + } + + return $issues; + } + + private function getBalance( string $source, string $str1, string $str2 ): int { + return substr_count( $source, $str1 ) - substr_count( $source, $str2 ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/EscapeCharacterValidator.php b/MLEB/Translate/src/MessageValidator/Validators/EscapeCharacterValidator.php new file mode 100644 index 00000000..05d79827 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/EscapeCharacterValidator.php @@ -0,0 +1,89 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * Ensures that only the specified escape characters are present. + * @license GPL-2.0-or-later + * @since 2020.01 + */ +class EscapeCharacterValidator implements MessageValidator { + /** @var string[] */ + protected $allowedCharacters; + + /** @var string */ + protected $regex; + + /** List of valid escape characters recognized. */ + private const VALID_CHARS = [ '\t', '\n', '\\\'', '\"', '\f', '\r', '\a', '\b', '\\\\' ]; + + public function __construct( array $params ) { + $this->allowedCharacters = $params['values'] ?? []; + + if ( $this->allowedCharacters === [] || !is_array( $this->allowedCharacters ) ) { + throw new \InvalidArgumentException( + 'No values provided for EscapeCharacter validator.' + ); + } + + $this->regex = $this->buildRegex( $this->allowedCharacters ); + } + + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + $translation = $message->translation(); + preg_match_all( "/$this->regex/U", $translation, $transVars ); + + // Check for missing variables in the translation + $params = $transVars[0]; + if ( count( $params ) ) { + $messageParams = [ + [ 'PARAMS', $params ], + [ 'COUNT', count( $params ) ], + [ 'PARAMS', $this->allowedCharacters ], + [ 'COUNT', count( $this->allowedCharacters ) ] + ]; + + $issue = + new ValidationIssue( + 'escape', 'invalid', 'translate-checks-escape', $messageParams + ); + $issues->add( $issue ); + } + + return $issues; + } + + private function buildRegex( array $allowedCharacters ): string { + $regex = '\\\\[^'; + $prefix = ''; + foreach ( $allowedCharacters as $character ) { + if ( !in_array( $character, self::VALID_CHARS ) ) { + throw new \InvalidArgumentException( + "Invalid escape character encountered: $character during configuration." . + 'Valid escape characters include: ' . implode( ', ', self::VALID_CHARS ) + ); + } + + if ( $character !== '\\' ) { + $character = stripslashes( $character ); + // negative look ahead, to avoid "\\ " being treated as an accidental escape + $prefix = '(?<!\\\\)'; + } + + // This is done because in the regex we need slashes for some characters such as + // \", \', but not for others such as \n, \t etc + $normalizedChar = addslashes( $character ); + $regex .= $normalizedChar; + } + $regex .= ']'; + + return $prefix . $regex; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/GettextNewlineValidator.php b/MLEB/Translate/src/MessageValidator/Validators/GettextNewlineValidator.php new file mode 100644 index 00000000..53895975 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/GettextNewlineValidator.php @@ -0,0 +1,47 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * Ensures that the translation has the same number of newlines as the source + * message at the beginning and end of the string. This works specifically + * for GettextFFS. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.09 + */ +class GettextNewlineValidator extends NewlineValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $translation = $message->translation(); + $definition = $message->definition(); + + // ending newlines in GetText are bounded by a "\" + $definition = $this->removeTrailingSlash( $definition ); + $translation = $this->removeTrailingSlash( $translation ); + + $definitionStartNewline = $this->getStartingNewLinesCount( $definition ); + $definitionEndNewline = $this->getEndingNewLineCount( $definition ); + + $translationStartNewline = $this->getStartingNewLinesCount( $translation ); + $translationEndNewline = $this->getEndingNewLineCount( $translation ); + + $failingChecks = array_merge( + $this->validateStartingNewline( $definitionStartNewline, $translationStartNewline ), + $this->validateEndingNewline( $definitionEndNewline, $translationEndNewline ) + ); + + return $this->createIssues( $failingChecks ); + } + + private function removeTrailingSlash( string $str ): string { + if ( substr( $str, -strlen( '\\' ) ) === '\\' ) { + return substr( $str, 0, -1 ); + } + + return $str; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/GettextPluralValidator.php b/MLEB/Translate/src/MessageValidator/Validators/GettextPluralValidator.php new file mode 100644 index 00000000..f87dff6a --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/GettextPluralValidator.php @@ -0,0 +1,106 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Utilities\GettextPlural; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * @license GPL-2.0-or-later + * @since 2019.09 + */ +class GettextPluralValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + $pluralRule = GettextPlural::getPluralRule( $targetLanguage ); + // Skip validation for languages for which we do not know the plural rule + if ( !$pluralRule ) { + return $issues; + } + + $definition = $message->definition(); + $translation = $message->translation(); + $expectedPluralCount = GettextPlural::getPluralCount( $pluralRule ); + $definitionHasPlural = GettextPlural::hasPlural( $definition ); + $translationHasPlural = GettextPlural::hasPlural( $translation ); + + $presence = $this->pluralPresenceCheck( + $definitionHasPlural, + $translationHasPlural, + $expectedPluralCount + ); + + if ( $presence === 'ok' ) { + [ $msgcode, $data ] = $this->pluralFormCountCheck( $translation, $expectedPluralCount ); + if ( $msgcode === 'invalid-count' ) { + $issue = new ValidationIssue( + 'plural', + 'forms', + 'translate-checks-gettext-plural-count', + [ + [ 'COUNT', $expectedPluralCount ], + [ 'COUNT', $data[ 'count' ] ], + ] + ); + $issues->add( $issue ); + } + } elseif ( $presence === 'missing' ) { + $issue = new ValidationIssue( + 'plural', + 'missing', + 'translate-checks-gettext-plural-missing' + ); + $issues->add( $issue ); + } elseif ( $presence === 'unsupported' ) { + $issue = new ValidationIssue( + 'plural', + 'unsupported', + 'translate-checks-gettext-plural-unsupported' + ); + $issues->add( $issue ); + } + // else not-applicable: Plural is not present in translation, but that is fine + + return $issues; + } + + private function pluralPresenceCheck( + $definitionHasPlural, + $translationHasPlural, + $expectedPluralCount + ) { + if ( !$definitionHasPlural && $translationHasPlural ) { + return 'unsupported'; + } elseif ( $definitionHasPlural && !$translationHasPlural ) { + if ( $expectedPluralCount > 1 ) { + return 'missing'; + } else { + // It's okay to omit plural completely for languages without variance + return 'not-applicable'; + } + } elseif ( !$definitionHasPlural && !$translationHasPlural ) { + return 'not-applicable'; + } + + // Both have plural + return 'ok'; + } + + private function pluralFormCountCheck( $text, $expectedPluralCount ) { + [ , $instanceMap ] = GettextPlural::parsePluralForms( $text ); + + foreach ( $instanceMap as $forms ) { + $formsCount = count( $forms ); + if ( $formsCount !== $expectedPluralCount ) { + return [ 'invalid-count', [ 'count' => $formsCount ] ]; + } + } + + return [ 'ok', [] ]; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/InsertableRegexValidator.php b/MLEB/Translate/src/MessageValidator/Validators/InsertableRegexValidator.php new file mode 100644 index 00000000..89f783b1 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/InsertableRegexValidator.php @@ -0,0 +1,78 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use InvalidArgumentException; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use RegexInsertablesSuggester; +use TMessage; + +/** + * A generic regex validator and insertable that can be reused by other classes. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.06 + */ +class InsertableRegexValidator extends RegexInsertablesSuggester implements MessageValidator { + /** @var string */ + private $validationRegex; + + public function __construct( $params ) { + parent::__construct( $params ); + + if ( is_string( $params ) ) { + $this->validationRegex = $params; + } elseif ( is_array( $params ) ) { + $this->validationRegex = $params['regex'] ?? null; + } + + if ( $this->validationRegex === null ) { + throw new InvalidArgumentException( 'The configuration for InsertableRegexValidator does not ' . + 'specify a regular expression.' ); + } + } + + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + preg_match_all( $this->validationRegex, $message->definition(), $definitionMatch ); + preg_match_all( $this->validationRegex, $message->translation(), $translationMatch ); + $definitionVariables = $definitionMatch[0]; + $translationVariables = $translationMatch[0]; + + $missingVariables = array_diff( $definitionVariables, $translationVariables ); + if ( $missingVariables ) { + $issue = new ValidationIssue( + 'variable', + 'missing', + 'translate-checks-parameters', + [ + [ 'PLAIN-PARAMS', $missingVariables ], + [ 'COUNT', count( $missingVariables ) ] + ] + ); + + $issues->add( $issue ); + } + + $unknownVariables = array_diff( $translationVariables, $definitionVariables ); + if ( $unknownVariables ) { + $issue = new ValidationIssue( + 'variable', + 'unknown', + 'translate-checks-parameters-unknown', + [ + [ 'PLAIN-PARAMS', $unknownVariables ], + [ 'COUNT', count( $unknownVariables ) ] + ] + ); + + $issues->add( $issue ); + } + + return $issues; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/InsertableRubyVariableValidator.php b/MLEB/Translate/src/MessageValidator/Validators/InsertableRubyVariableValidator.php new file mode 100644 index 00000000..dafac507 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/InsertableRubyVariableValidator.php @@ -0,0 +1,16 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +/** + * An insertable Ruby variable validator that also acts as an InsertableSuggester + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.06 + */ +class InsertableRubyVariableValidator extends InsertableRegexValidator { + public function __construct() { + parent::__construct( '/%{[a-zA-Z_]+}/' ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/IosVariableValidator.php b/MLEB/Translate/src/MessageValidator/Validators/IosVariableValidator.php new file mode 100644 index 00000000..938db1f9 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/IosVariableValidator.php @@ -0,0 +1,21 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +// phpcs:disable Generic.Files.LineLength.TooLong +/** + * An insertable IOS variable validator. + * See: https://github.com/dcordero/Rubustrings/blob/61d477bffbb318ca3ffed9c2afc49ec301931d93/lib/rubustrings/action.rb#L91 + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2020.03 + */ +class IosVariableValidator extends InsertableRegexValidator { + public function __construct() { + parent::__construct( + "/%(?:([1-9]\d*)\$|\(([^\)]+)\))?(\+)?(0|\'[^$])?" . + "(-)?(\d+)?(?:\.(\d+))?(hh|ll|[hlLzjt])?([b-fiosuxX@])/" + ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/MatchSetValidator.php b/MLEB/Translate/src/MessageValidator/Validators/MatchSetValidator.php new file mode 100644 index 00000000..63d12915 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/MatchSetValidator.php @@ -0,0 +1,66 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use InvalidArgumentException; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * Ensures that the translation for a message matches a value from a list. + * @license GPL-2.0-or-later + * @since 2019.12 + */ +class MatchSetValidator implements MessageValidator { + /** @var string[] */ + protected $possibleValues; + + /** @var string[] */ + protected $normalizedValues; + + /** @var bool */ + protected $caseSensitive; + + public function __construct( array $params ) { + $this->possibleValues = $params['values'] ?? []; + $this->caseSensitive = (bool)( $params['caseSensitive'] ?? true ); + + if ( $this->possibleValues === [] ) { + throw new InvalidArgumentException( 'No values provided for MatchSet validator.' ); + } + + if ( $this->caseSensitive ) { + $this->normalizedValues = $this->possibleValues; + } else { + $this->normalizedValues = array_map( 'strtolower', $this->possibleValues ); + } + } + + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + $translation = $message->translation(); + if ( $this->caseSensitive ) { + $translation = strtolower( $translation ); + } + + if ( array_search( $translation, $this->normalizedValues, true ) === false ) { + $issue = new ValidationIssue( + 'value-not-present', + 'invalid', + 'translate-checks-value-not-present', + [ + [ 'PLAIN-PARAMS', $this->possibleValues ], + [ 'COUNT', count( $this->possibleValues ) ] + ] + ); + + $issues->add( $issue ); + } + + return $issues; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/MediaWikiLinkValidator.php b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiLinkValidator.php new file mode 100644 index 00000000..72da49e5 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiLinkValidator.php @@ -0,0 +1,72 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use Title; +use TMessage; + +/** + * Checks if the translation uses links that are discouraged. Valid links are those that link + * to Special: or {{ns:special}}: or project pages trough MediaWiki messages like + * {{MediaWiki:helppage-url}}:. Also links in the definition are allowed. + * @license GPL-2.0-or-later + * @since 2020.02 + */ +class MediaWikiLinkValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + $definition = $message->definition(); + $translation = $message->translation(); + + $links = $this->getLinksMissingInTarget( $definition, $translation ); + if ( $links !== [] ) { + $issue = new ValidationIssue( + 'links', + 'missing', + 'translate-checks-links-missing', + [ + [ 'PARAMS', $links ], + [ 'COUNT', count( $links ) ], + ] + ); + $issues->add( $issue ); + } + + $links = $this->getLinksMissingInTarget( $translation, $definition ); + if ( $links !== [] ) { + $issue = new ValidationIssue( + 'links', + 'extra', + 'translate-checks-links', + [ + [ 'PARAMS', $links ], + [ 'COUNT', count( $links ) ], + ] + ); + $issues->add( $issue ); + } + + return $issues; + } + + private function getLinksMissingInTarget( string $source, string $target ): array { + $tc = Title::legalChars() . '#%{}'; + $matches = $links = []; + + preg_match_all( "/\[\[([{$tc}]+)(\\|(.+?))?]]/sDu", $source, $matches ); + $count = count( $matches[0] ); + for ( $i = 0; $i < $count; $i++ ) { + $backMatch = preg_quote( $matches[1][$i], '/' ); + if ( preg_match( "/\[\[$backMatch/", $target ) !== 1 ) { + $links[] = "[[{$matches[1][$i]}{$matches[2][$i]}]]"; + } + } + + return $links; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/MediaWikiPageNameValidator.php b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiPageNameValidator.php new file mode 100644 index 00000000..aa16eab8 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiPageNameValidator.php @@ -0,0 +1,39 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * Ensures that translations do not translate namespaces. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2020.02 + */ +class MediaWikiPageNameValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + $definition = $message->definition(); + $translation = $message->translation(); + + $namespaces = 'help|project|\{\{ns:project}}|mediawiki'; + $matches = []; + if ( preg_match( "/^($namespaces):[\w\s]+$/ui", $definition, $matches ) && + !preg_match( "/^{$matches[1]}:.+$/u", $translation ) + ) { + $issue = new ValidationIssue( + 'pagename', + 'namespace', + 'translate-checks-pagename' + ); + $issues->add( $issue ); + } + + return $issues; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/MediaWikiParameterValidator.php b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiParameterValidator.php new file mode 100644 index 00000000..9c44c5b2 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiParameterValidator.php @@ -0,0 +1,15 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +/** + * An insertable wiki parameter validator that also acts as an InsertableSuggester + * @license GPL-2.0-or-later + * @since 2019.12 + */ +class MediaWikiParameterValidator extends InsertableRegexValidator { + public function __construct() { + parent::__construct( '/\$[1-9]/' ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/MediaWikiPluralValidator.php b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiPluralValidator.php new file mode 100644 index 00000000..97379b2a --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiPluralValidator.php @@ -0,0 +1,145 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use Language; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use MediaWiki\MediaWikiServices; +use Parser; +use ParserOptions; +use PPFrame; +use TMessage; +use User; + +/** + * Handles plural validation for MediaWiki inline plural syntax. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.06 + */ +class MediaWikiPluralValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + $this->pluralCheck( $message, $issues ); + $this->pluralFormsCheck( $message, $targetLanguage, $issues ); + + return $issues; + } + + private function pluralCheck( TMessage $message, ValidationIssues $issues ): void { + $definition = $message->definition(); + $translation = $message->translation(); + + if ( + stripos( $definition, '{{plural:' ) !== false && + stripos( $translation, '{{plural:' ) === false + ) { + $issue = new ValidationIssue( 'plural', 'missing', 'translate-checks-plural' ); + $issues->add( $issue ); + } + } + + protected function pluralFormsCheck( + TMessage $message, string $code, ValidationIssues $issues + ): void { + $translation = $message->translation(); + // Are there any plural forms for this language in this message? + if ( stripos( $translation, '{{plural:' ) === false ) { + return; + } + + $plurals = self::getPluralForms( $translation ); + $allowed = self::getPluralFormCount( $code ); + + foreach ( $plurals as $forms ) { + $forms = self::removeExplicitPluralForms( $forms ); + $provided = count( $forms ); + + if ( $provided > $allowed ) { + $issue = new ValidationIssue( + 'plural', + 'forms', + 'translate-checks-plural-forms', + [ + [ 'COUNT', $provided ], + [ 'COUNT', $allowed ], + ] + ); + + $issues->add( $issue ); + } + + // Are the last two forms identical? + if ( $provided > 1 && $forms[$provided - 1] === $forms[$provided - 2] ) { + $issue = new ValidationIssue( 'plural', 'dupe', 'translate-checks-plural-dupe' ); + $issues->add( $issue ); + } + } + } + + /** Returns the number of plural forms %MediaWiki supports for a language. */ + public static function getPluralFormCount( string $code ): int { + $forms = Language::factory( $code )->getPluralRules(); + + // +1 for the 'other' form + return count( $forms ) + 1; + } + + /** + * Ugly home made probably awfully slow looping parser that parses {{PLURAL}} instances from + * a message and returns array of invocations having array of forms. + * + * @return array[] + */ + public static function getPluralForms( string $translation ): array { + // Stores the forms from plural invocations + $plurals = []; + + $cb = function ( $parser, $frame, $args ) use ( &$plurals ) { + $forms = []; + + foreach ( $args as $index => $form ) { + // The first arg is the number, we skip it + if ( $index !== 0 ) { + // Collect the raw text + $forms[] = $frame->expand( $form, PPFrame::RECOVER_ORIG ); + // Expand the text to process embedded plurals + $frame->expand( $form ); + } + } + $plurals[] = $forms; + + return ''; + }; + + // Setup parser + $parser = MediaWikiServices::getInstance()->getParserFactory()->create(); + // Load the default magic words etc now. + $parser->firstCallInit(); + // So that they don't overrider our own callback + $parser->setFunctionHook( 'plural', $cb, Parser::SFH_NO_HASH | Parser::SFH_OBJECT_ARGS ); + + // Setup things needed for preprocess + $title = null; + $options = new ParserOptions( new User(), Language::factory( 'en' ) ); + + $parser->preprocess( $translation, $title, $options ); + + return $plurals; + } + + /** Remove forms that start with an explicit number. */ + public static function removeExplicitPluralForms( array $forms ): array { + // Handle explicit 0= and 1= forms + foreach ( $forms as $index => $form ) { + if ( preg_match( '/^[0-9]+=/', $form ) ) { + unset( $forms[$index] ); + } + } + + return array_values( $forms ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/MediaWikiTimeListValidator.php b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiTimeListValidator.php new file mode 100644 index 00000000..579b277a --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/MediaWikiTimeListValidator.php @@ -0,0 +1,82 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * "Time list" message format validation for MediaWiki. + * + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.06 + */ +class MediaWikiTimeListValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + $definition = $message->definition(); + $translation = $message->translation(); + $defArray = explode( ',', $definition ); + $traArray = explode( ',', $translation ); + + $defCount = count( $defArray ); + $traCount = count( $traArray ); + if ( $defCount !== $traCount ) { + $issue = new ValidationIssue( + 'miscmw', + 'timelist-count', + 'translate-checks-format', + [ + [ + 'MESSAGE', + [ + 'translate-checks-parametersnotequal', + [ 'COUNT', $traCount ], + [ 'COUNT', $defCount ], + ] + ] + ] + ); + $issues->add( $issue ); + + return $issues; + } + + for ( $i = 0; $i < $defCount; $i++ ) { + $defItems = array_map( 'trim', explode( ':', $defArray[$i] ) ); + $traItems = array_map( 'trim', explode( ':', $traArray[$i] ) ); + + if ( count( $traItems ) !== 2 ) { + $issue = new ValidationIssue( + 'miscmw', + 'timelist-format', + 'translate-checks-format', + [ [ 'MESSAGE', [ 'translate-checks-malformed', $traArray[$i] ] ] ] + ); + + $issues->add( $issue ); + continue; + } + + if ( $traItems[1] !== $defItems[1] ) { + $issue = new ValidationIssue( + 'miscmw', + 'timelist-format-value', + 'translate-checks-format', + // FIXME: i18n missing. + [ "<samp><nowiki>$traItems[1] !== $defItems[1]</nowiki></samp>" ] + ); + + $issues->add( $issue ); + continue; + } + } + + return $issues; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/NewlineValidator.php b/MLEB/Translate/src/MessageValidator/Validators/NewlineValidator.php new file mode 100644 index 00000000..645a09c6 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/NewlineValidator.php @@ -0,0 +1,100 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * Ensures that the translation has the same number of newlines as the source + * message at the beginning of the string. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2019.09 + */ +class NewlineValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $translation = $message->translation(); + $definition = $message->definition(); + + $definitionStartNewline = $this->getStartingNewLinesCount( $definition ); + $translationStartNewline = $this->getStartingNewLinesCount( $translation ); + + $failingChecks = $this->validateStartingNewline( + $definitionStartNewline, $translationStartNewline + ); + + return $this->createIssues( $failingChecks ); + } + + protected function getStartingNewLinesCount( string $str ): int { + return strspn( $str, "\n" ); + } + + protected function getEndingNewLineCount( string $str ): int { + return strspn( strrev( $str ), "\n" ); + } + + protected function validateStartingNewline( + int $definitionStartNewline, + int $translationStartNewline + ): array { + $failingChecks = []; + if ( $definitionStartNewline < $translationStartNewline ) { + // Extra whitespace at beginning + $failingChecks[] = [ + 'extra-start', + $translationStartNewline - $definitionStartNewline + ]; + } elseif ( $definitionStartNewline > $translationStartNewline ) { + // Missing whitespace at beginnning + $failingChecks[] = [ + 'missing-start', + $definitionStartNewline - $translationStartNewline + ]; + } + + return $failingChecks; + } + + protected function validateEndingNewline( + int $definitionEndNewline, + int $translationEndNewline + ): array { + $failingChecks = []; + if ( $definitionEndNewline < $translationEndNewline ) { + // Extra whitespace at end + $failingChecks[] = [ + 'extra-end', + $translationEndNewline - $definitionEndNewline + ]; + } elseif ( $definitionEndNewline > $translationEndNewline ) { + // Missing whitespace at end + $failingChecks[] = [ + 'missing-end', + $definitionEndNewline - $translationEndNewline + ]; + } + + return $failingChecks; + } + + protected function createIssues( array $failingChecks ): ValidationIssues { + $issues = new ValidationIssues(); + foreach ( $failingChecks as [ $subType, $count ] ) { + $issue = new ValidationIssue( + 'newline', + $subType, + "translate-checks-newline-$subType", + [ 'COUNT', $count ] + ); + + $issues->add( $issue ); + } + + return $issues; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/NumericalParameterValidator.php b/MLEB/Translate/src/MessageValidator/Validators/NumericalParameterValidator.php new file mode 100644 index 00000000..8ab8c91a --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/NumericalParameterValidator.php @@ -0,0 +1,15 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +/** + * An insertable numerical parameter validator that also acts as an InsertableSuggester + * @license GPL-2.0-or-later + * @since 2020.03 + */ +class NumericalParameterValidator extends InsertableRegexValidator { + public function __construct() { + parent::__construct( '/\$\d+/' ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/PrintfValidator.php b/MLEB/Translate/src/MessageValidator/Validators/PrintfValidator.php new file mode 100644 index 00000000..79b4caab --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/PrintfValidator.php @@ -0,0 +1,16 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +/** + * A validator that checks for missing and unknown printf formatting characters + * in translations. Can also be used as an Insertable suggester + * @license GPL-2.0-or-later + * @since 2019.12 + */ +class PrintfValidator extends InsertableRegexValidator { + public function __construct() { + parent::__construct( '/%(\d+\$)?(\.\d+)?[sduf]/U' ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/PythonInterpolationValidator.php b/MLEB/Translate/src/MessageValidator/Validators/PythonInterpolationValidator.php new file mode 100644 index 00000000..b6d87b95 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/PythonInterpolationValidator.php @@ -0,0 +1,16 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +/** + * An insertable python interpolation validator that also acts as an InsertableSuggester + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2020.02 + */ +class PythonInterpolationValidator extends InsertableRegexValidator { + public function __construct() { + parent::__construct( '/\%(?:\([a-zA-Z0-9_]*?\))?[diouxXeEfFgGcrs]/U' ); + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/ReplacementValidator.php b/MLEB/Translate/src/MessageValidator/Validators/ReplacementValidator.php new file mode 100644 index 00000000..c402a5c0 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/ReplacementValidator.php @@ -0,0 +1,52 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use InvalidArgumentException; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @since 2020.07 + */ +class ReplacementValidator implements MessageValidator { + private $search; + private $replace; + + public function __construct( array $params ) { + $this->search = $params['search'] ?? null; + $this->replace = $params['replace'] ?? null; + if ( !is_string( $this->search ) ) { + throw new InvalidArgumentException( '`search` is not a string' ); + } + + if ( !is_string( $this->replace ) ) { + throw new InvalidArgumentException( '`replace` is not a string' ); + } + } + + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + if ( strpos( $message->translation(), $this->search ) !== false ) { + $issue = new ValidationIssue( + 'replacement', + 'replacement', + 'translate-checks-replacement', + [ + [ 'PLAIN', $this->search ], + [ 'PLAIN', $this->replace ], + ] + ); + + $issues->add( $issue ); + } + + return $issues; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/SmartFormatPluralValidator.php b/MLEB/Translate/src/MessageValidator/Validators/SmartFormatPluralValidator.php new file mode 100644 index 00000000..b1d21bac --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/SmartFormatPluralValidator.php @@ -0,0 +1,110 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use Insertable; +use InsertablesSuggester; +use MediaWiki\Extensions\Translate\Utilities\SmartFormatPlural; +use MediaWiki\Extensions\Translate\Utilities\UnicodePlural; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * @license GPL-2.0-or-later + * @since 2019.11 + */ +class SmartFormatPluralValidator implements MessageValidator, InsertablesSuggester { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + $expectedKeywords = UnicodePlural::getPluralKeywords( $targetLanguage ); + // Skip validation for languages for which we do not know the plural rule + if ( $expectedKeywords === null ) { + return $issues; + } + + $definition = $message->definition(); + $translation = $message->translation(); + $expectedPluralCount = count( $expectedKeywords ); + $definitionPlurals = SmartFormatPlural::getPluralInstances( $definition ); + $translationPlurals = SmartFormatPlural::getPluralInstances( $translation ); + + $unsupportedVariables = array_diff( + array_keys( $translationPlurals ), array_keys( $definitionPlurals ) + ); + + foreach ( $unsupportedVariables as $unsupportedVariable ) { + $issue = new ValidationIssue( + 'plural', + 'unsupported', + 'translate-checks-smartformat-plural-unsupported', + [ + [ 'PLAIN', '{' . $unsupportedVariable . '}' ], + ] + ); + + $issues->add( $issue ); + } + + if ( $expectedPluralCount > 1 ) { + $missingVariables = array_diff( + array_keys( $definitionPlurals ), array_keys( $translationPlurals ) + ); + + foreach ( $missingVariables as $missingVariable ) { + $issue = new ValidationIssue( + 'plural', + 'missing', + 'translate-checks-smartformat-plural-missing', + [ + [ 'PLAIN', '{' . $missingVariable . '}' ], + ] + ); + + $issues->add( $issue ); + } + } + + // This returns only translation plurals for variables that exists in source + $commonVariables = array_intersect_key( $translationPlurals, $definitionPlurals ); + foreach ( $commonVariables as $pluralInstances ) { + foreach ( $pluralInstances as $pluralInstance ) { + $actualPluralCount = count( $pluralInstance[ 'forms' ] ); + if ( $actualPluralCount !== $expectedPluralCount ) { + $issue = new ValidationIssue( + 'plural', + 'forms', + 'translate-checks-smartformat-plural-count', + [ + [ 'COUNT', $expectedPluralCount ], + [ 'COUNT', $actualPluralCount ], + [ 'PLAIN', $pluralInstance[ 'original' ] ], + ] + ); + + $issues->add( $issue ); + } + } + } + + return $issues; + } + + public function getInsertables( $text ) : array { + $definitionPlurals = SmartFormatPlural::getPluralInstances( $text ); + $insertables = []; + + // This could be more language specific if we were given more information, but + // we only have text. + foreach ( array_keys( $definitionPlurals ) as $variable ) { + $pre = '{' . "$variable:"; + $post = '|}'; + $insertables[] = new Insertable( "$pre$post", $pre, $post ); + } + + return $insertables; + } +} diff --git a/MLEB/Translate/src/MessageValidator/Validators/UnicodePluralValidator.php b/MLEB/Translate/src/MessageValidator/Validators/UnicodePluralValidator.php new file mode 100644 index 00000000..824290b4 --- /dev/null +++ b/MLEB/Translate/src/MessageValidator/Validators/UnicodePluralValidator.php @@ -0,0 +1,110 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\MessageValidator\Validators; + +use MediaWiki\Extensions\Translate\Utilities\UnicodePlural; +use MediaWiki\Extensions\Translate\Validation\MessageValidator; +use MediaWiki\Extensions\Translate\Validation\ValidationIssue; +use MediaWiki\Extensions\Translate\Validation\ValidationIssues; +use TMessage; + +/** + * This is a very strict validator class for Unicode CLDR based plural markup. + * + * It requires all forms to be present and in correct order. Whitespace around keywords + * and values is trimmed. The keyword `other` is left out, though it is allowed in input. + * @since 2019.09 + * @license GPL-2.0-or-later + */ +class UnicodePluralValidator implements MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $issues = new ValidationIssues(); + + $expectedKeywords = UnicodePlural::getPluralKeywords( $targetLanguage ); + // Skip validation for languages for which we do not know the plural rule + if ( $expectedKeywords === null ) { + return $issues; + } + + $definition = $message->definition(); + $translation = $message->translation(); + $definitionHasPlural = UnicodePlural::hasPlural( $definition ); + $translationHasPlural = UnicodePlural::hasPlural( $translation ); + + $presence = $this->pluralPresenceCheck( + $definitionHasPlural, + $translationHasPlural + ); + + // Using same check keys as MediaWikiPluralValidator + if ( $presence === 'missing' ) { + $issue = new ValidationIssue( 'plural', 'missing', 'translate-checks-unicode-plural-missing' ); + $issues->add( $issue ); + } elseif ( $presence === 'unsupported' ) { + $issue = new ValidationIssue( 'plural', 'unsupported', 'translate-checks-unicode-plural-unsupported' ); + $issues->add( $issue ); + } elseif ( $presence === 'ok' ) { + [ $msgcode, $actualKeywords ] = + $this->pluralFormCheck( $translation, $expectedKeywords ); + if ( $msgcode === 'invalid' ) { + $expectedExample = UnicodePlural::flattenList( + array_map( [ $this, 'createFormExample' ], $expectedKeywords ) + ); + $actualExample = UnicodePlural::flattenList( + array_map( [ $this, 'createFormExample' ], $actualKeywords ) + ); + + $issue = new ValidationIssue( + 'plural', + 'forms', + 'translate-checks-unicode-plural-invalid', + [ + [ 'PLAIN', $expectedExample ], + [ 'PLAIN', $actualExample ], + ] + ); + $issues->add( $issue ); + } + } // else: not-applicable + + return $issues; + } + + private function createFormExample( string $keyword ): array { + return [ $keyword, '…' ]; + } + + private function pluralPresenceCheck( + bool $definitionHasPlural, + bool $translationHasPlural + ): string { + if ( !$definitionHasPlural && $translationHasPlural ) { + return 'unsupported'; + } elseif ( $definitionHasPlural && !$translationHasPlural ) { + return 'missing'; + } elseif ( !$definitionHasPlural && !$translationHasPlural ) { + return 'not-applicable'; + } + + // Both have plural + return 'ok'; + } + + private function pluralFormCheck( string $text, array $expectedKeywords ): array { + [ , $instanceMap ] = UnicodePlural::parsePluralForms( $text ); + + foreach ( $instanceMap as $forms ) { + $actualKeywords = []; + foreach ( $forms as [ $keyword, ] ) { + $actualKeywords[] = $keyword; + } + + if ( $actualKeywords !== $expectedKeywords ) { + return [ 'invalid', $actualKeywords ]; + } + } + + return [ 'ok', [] ]; + } +} diff --git a/MLEB/Translate/src/ServiceWiring.php b/MLEB/Translate/src/ServiceWiring.php new file mode 100644 index 00000000..2e0f807b --- /dev/null +++ b/MLEB/Translate/src/ServiceWiring.php @@ -0,0 +1,43 @@ +<?php +/** + * List of services in this extension with construction instructions. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +use MediaWiki\Extensions\Translate\Statistics\TranslatorActivity; +use MediaWiki\Extensions\Translate\Statistics\TranslatorActivityQuery; +use MediaWiki\Extensions\Translate\Synchronization\GroupSynchronizationCache; +use MediaWiki\Extensions\Translate\Utilities\ParsingPlaceholderFactory; +use MediaWiki\MediaWikiServices; + +return [ + 'Translate:GroupSynchronizationCache' => function (): GroupSynchronizationCache { + return new GroupSynchronizationCache( ObjectCache::getInstance( CACHE_DB ) ); + }, + + 'Translate:ParsingPlaceholderFactory' => function (): ParsingPlaceholderFactory { + return new ParsingPlaceholderFactory(); + }, + + 'Translate:TranslatorActivity' => function ( MediaWikiServices $services ): TranslatorActivity { + $query = new TranslatorActivityQuery( + $services->getMainConfig(), + $services->getDBLoadBalancer() + ); + + $languageValidator = function ( string $language ): bool { + return Language::isKnownLanguageTag( $language ); + }; + + return new TranslatorActivity( + $services->getMainObjectStash(), + $query, + JobQueueGroup::singleton(), + $languageValidator + ); + }, + +]; diff --git a/MLEB/Translate/src/Services.php b/MLEB/Translate/src/Services.php new file mode 100644 index 00000000..731715fd --- /dev/null +++ b/MLEB/Translate/src/Services.php @@ -0,0 +1,63 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ +namespace MediaWiki\Extensions\Translate; + +use MediaWiki\Extensions\Translate\Statistics\TranslatorActivity; +use MediaWiki\Extensions\Translate\Synchronization\GroupSynchronizationCache; +use MediaWiki\Extensions\Translate\Utilities\ParsingPlaceholderFactory; +use MediaWiki\MediaWikiServices; +use Psr\Container\ContainerInterface; + +/** + * Minimal service container. + * + * Main purpose is to give type-hinted getters for services defined in this extensions. + * + * @since 2020.04 + */ +class Services implements ContainerInterface { + /** @var self */ + private static $instance; + + /** @var MediaWikiServices */ + private $container; + + private function __construct( MediaWikiServices $container ) { + $this->container = $container; + } + + public static function getInstance(): Services { + self::$instance = self::$instance ?? new self( MediaWikiServices::getInstance() ); + return self::$instance; + } + + /** @inheritDoc */ + public function get( $id ) { + // Can be changed to using ::get once we drop support for MW 1.33 + return $this->container->getService( $id ); + } + + /** @inheritDoc */ + public function has( $id ) { + // Can be changed to using ::has once we drop support for MW 1.33 + return $this->container->hasService( $id ); + } + + public function getGroupSynchronizationCache(): GroupSynchronizationCache { + return $this->container->getService( 'Translate:GroupSynchronizationCache' ); + } + + /** @since 2020.07 */ + public function getParsingPlaceholderFactory(): ParsingPlaceholderFactory { + return $this->container->getService( 'Translate:ParsingPlaceholderFactory' ); + } + + public function getTranslatorActivity(): TranslatorActivity { + return $this->container->getService( 'Translate:TranslatorActivity' ); + } + +} diff --git a/MLEB/Translate/src/Statistics/StatisticsUnavailable.php b/MLEB/Translate/src/Statistics/StatisticsUnavailable.php new file mode 100644 index 00000000..e92a62fe --- /dev/null +++ b/MLEB/Translate/src/Statistics/StatisticsUnavailable.php @@ -0,0 +1,16 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Statistics; + +use RuntimeException; + +/** + * @since 2020.04 + */ +class StatisticsUnavailable extends RuntimeException { +} diff --git a/MLEB/Translate/src/Statistics/TranslatorActivity.php b/MLEB/Translate/src/Statistics/TranslatorActivity.php new file mode 100644 index 00000000..b8ee3b3f --- /dev/null +++ b/MLEB/Translate/src/Statistics/TranslatorActivity.php @@ -0,0 +1,155 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Statistics; + +use BagOStuff; +use InvalidArgumentException; +use JobQueueGroup; +use Language; +use PoolCounterWorkViaCallback; +use Wikimedia\Timestamp\ConvertibleTimestamp; + +/** + * Handles caching of translator activity. + * + * @since 2020.04 + */ +class TranslatorActivity { + public const CACHE_TIME = 3 * 24 * 3600; + // 25 hours so that it's easy to configure the maintenance script run daily + public const CACHE_STALE = 25 * 3600; + private $cache; + private $query; + private $jobQueue; + private $languageValidator; + + public function __construct( + BagOStuff $cache, + TranslatorActivityQuery $query, + JobQueueGroup $jobQueue, + callable $languageValidator + ) { + $this->cache = $cache; + $this->query = $query; + $this->jobQueue = $jobQueue; + // FIXME: use LanguageNameUtils once 1.33 is no longer supported + $this->languageValidator = $languageValidator; + } + + /** + * Get translations activity for a given language. + * + * @param string $language Language tag. + * @return array Array with keys users and asOfTime + * @throws StatisticsUnavailable If loading statistics is temporarily not possible. + */ + public function inLanguage( string $language ): array { + if ( !$this->isValidLanguage( $language ) ) { + throw new InvalidArgumentException( "Invalid language tag '$language'" ); + } + + $cachedValue = $this->getFromCache( $language ); + + if ( is_array( $cachedValue ) ) { + if ( $this->isStale( $cachedValue ) ) { + $this->queueCacheRefresh( $language ); + } + + return $cachedValue; + } + + $queriedValue = $this->doQueryAndCache( $language ); + if ( !$queriedValue ) { + throw new StatisticsUnavailable( "Unable to load stats" ); + } + + return $queriedValue; + } + + private function getFromCache( string $language ) { + $cacheKey = $this->getCacheKey( $language ); + return $this->cache->get( $cacheKey ); + } + + private function getCacheKey( string $language ): string { + return $this->cache->makeKey( 'translate-translator-activity-v1', $language ); + } + + private function isStale( array $value ): bool { + $age = ConvertibleTimestamp::now( TS_UNIX ) - $value['asOfTime']; + return $age >= self::CACHE_STALE; + } + + private function queueCacheRefresh( string $language ): void { + $job = UpdateTranslatorActivityJob::newJobForLanguage( $language ); + $this->jobQueue->push( $job ); + } + + private function doQueryAndCache( string $language ) { + $now = ConvertibleTimestamp::now( TS_UNIX ); + + $work = new PoolCounterWorkViaCallback( + 'TranslateFetchTranslators', "TranslateFetchTranslators-$language", [ + 'doWork' => function () use ( $language, $now ) { + $users = $this->query->inLanguage( $language ); + $data = [ 'users' => $users, 'asOfTime' => $now ]; + $this->addToCache( $data, $language ); + return $data; + }, + 'doCachedWork' => function () use ( $language ) { + $data = $this->getFromCache( $language ); + // Use new cache value from other thread + return is_array( $data ) ? $data : false; + }, + ] + ); + + return $work->execute(); + } + + private function addToCache( array $value, string $language ): void { + $cacheKey = $this->getCacheKey( $language ); + $this->cache->set( $cacheKey, $value, self::CACHE_TIME ); + } + + /** + * Update cache for all languages, even if not stale. + */ + public function updateAllLanguages(): void { + $now = ConvertibleTimestamp::now( TS_UNIX ); + foreach ( $this->query->inAllLanguages() as $language => $users ) { + if ( !Language::isKnownLanguageTag( $language ) ) { + continue; + } + + $data = [ 'users' => $users, 'asOfTime' => $now ]; + $this->addToCache( $data, $language ); + } + } + + /** + * Update cache for one language, even if not stale. + * + * @param string $language Language tag + * @throws StatisticsUnavailable If loading statistics is temporarily not possible. + */ + public function updateLanguage( string $language ): void { + if ( !$this->isValidLanguage( $language ) ) { + throw new InvalidArgumentException( "Invalid language tag '$language'" ); + } + + $queriedValue = $this->doQueryAndCache( $language ); + if ( !$queriedValue ) { + throw new StatisticsUnavailable( "Unable to load stats" ); + } + } + + private function isValidLanguage( string $language ): bool { + return call_user_func( $this->languageValidator, $language ); + } +} diff --git a/MLEB/Translate/src/Statistics/TranslatorActivityQuery.php b/MLEB/Translate/src/Statistics/TranslatorActivityQuery.php new file mode 100644 index 00000000..d941a273 --- /dev/null +++ b/MLEB/Translate/src/Statistics/TranslatorActivityQuery.php @@ -0,0 +1,122 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Statistics; + +use ActorMigration; +use Config; +use MediaWiki\Config\ServiceOptions; +use Wikimedia\Rdbms\ILoadBalancer; + +/** + * Gathers translator activity from the database. + * + * @since 2020.04 + */ +class TranslatorActivityQuery { + public const USER_TRANSLATIONS = 0; + public const USER_LAST_ACTIVITY = 1; + private $options; + private $loadBalancer; + + /** + * @param Config|ServiceOptions $options + * @param ILoadBalancer $loadBalancer + */ + public function __construct( $options, ILoadBalancer $loadBalancer ) { + $this->options = $options; + $this->loadBalancer = $loadBalancer; + } + + /** + * Fetch the translators for a language + * + * @param string $code Language tag + * @return array<string,array<int|string>> Map of user name to translation stats + */ + public function inLanguage( string $code ): array { + $dbr = $this->loadBalancer->getConnection( DB_REPLICA, 'vslow' ); + + $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' ); + + $tables = [ 'page', 'revision' ] + $actorQuery['tables']; + $fields = [ + 'rev_user_text' => $actorQuery['fields']['rev_user_text'], + 'MAX(rev_timestamp) as lastedit', + 'count(page_id) as count', + ]; + $conds = [ + 'page_title' . $dbr->buildLike( $dbr->anyString(), '/', $code ), + 'page_namespace' => $this->options->get( 'TranslateMessageNamespaces' ), + ]; + $options = [ + 'GROUP BY' => $actorQuery['fields']['rev_user_text'], + 'ORDER BY' => 'NULL', + ]; + $joins = [ + 'revision' => [ 'JOIN', 'page_id=rev_page' ], + ] + $actorQuery['joins']; + + $res = $dbr->select( $tables, $fields, $conds, __METHOD__, $options, $joins ); + + $data = []; + foreach ( $res as $row ) { + $data[$row->rev_user_text] = [ + self::USER_TRANSLATIONS => $row->count, + self::USER_LAST_ACTIVITY => $row->lastedit, + ]; + } + + return $data; + } + + /** + * Fetch the translators for all languages. + * + * This is faster than doing each language separately. + * + * @return array<string,array<string,array<int|string>>> Map of language tags to user name to + * translation stats + */ + public function inAllLanguages(): array { + $dbr = $this->loadBalancer->getConnection( DB_REPLICA, 'vslow' ); + + $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' ); + + $tables = [ 'page', 'revision' ] + $actorQuery['tables']; + $fields = [ + 'rev_user_text' => $actorQuery['fields']['rev_user_text'], + 'substring_index(page_title, \'/\', -1) as lang', + 'MAX(rev_timestamp) as lastedit', + 'count(page_id) as count', + ]; + $conds = [ + 'page_title' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ), + 'page_namespace' => $this->options->get( 'TranslateMessageNamespaces' ), + ]; + $options = [ + 'GROUP BY' => [ 'lang', $actorQuery['fields']['rev_user_text'] ], + 'ORDER BY' => 'NULL', + ]; + + $joins = [ + 'revision' => [ 'JOIN', 'page_id=rev_page' ], + ] + $actorQuery['joins']; + + $res = $dbr->select( $tables, $fields, $conds, __METHOD__, $options, $joins ); + + $data = []; + foreach ( $res as $row ) { + $data[$row->lang][$row->rev_user_text] = [ + self::USER_TRANSLATIONS => $row->count, + self::USER_LAST_ACTIVITY => $row->lastedit, + ]; + } + + return $data; + } +} diff --git a/MLEB/Translate/src/Statistics/UpdateTranslatorActivityJob.php b/MLEB/Translate/src/Statistics/UpdateTranslatorActivityJob.php new file mode 100644 index 00000000..c07c25fe --- /dev/null +++ b/MLEB/Translate/src/Statistics/UpdateTranslatorActivityJob.php @@ -0,0 +1,40 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Statistics; + +use GenericParameterJob; +use MediaWiki\Extensions\Translate\Jobs\GenericTranslateJob; +use MediaWiki\Extensions\Translate\Services; + +/** + * @since 2020.04 + */ +class UpdateTranslatorActivityJob extends GenericTranslateJob implements GenericParameterJob { + public function __construct( array $params ) { + parent::__construct( 'UpdateTranslatorActivity', $params ); + $this->removeDuplicates = true; + } + + public static function newJobForLanguage( string $language ): self { + return new self( [ 'language' => $language ] ); + } + + public function run() { + $activity = Services::getInstance()->getTranslatorActivity(); + + try { + $activity->updateLanguage( $this->getParams()['language'] ); + } catch ( StatisticsUnavailable $e ) { + $this->logInfo( $e->getMessage() ); + // The job will be retried according to JobQueue configuration + return false; + } + + return true; + } +} diff --git a/MLEB/Translate/src/Statistics/UpdateTranslatorActivityMaintenanceScript.php b/MLEB/Translate/src/Statistics/UpdateTranslatorActivityMaintenanceScript.php new file mode 100644 index 00000000..89eaa8b9 --- /dev/null +++ b/MLEB/Translate/src/Statistics/UpdateTranslatorActivityMaintenanceScript.php @@ -0,0 +1,27 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Statistics; + +use Maintenance; +use MediaWiki\Extensions\Translate\Services; + +/** + * @since 2020.04 + */ +class UpdateTranslatorActivityMaintenanceScript extends Maintenance { + public function __construct() { + parent::__construct(); + $this->addDescription( 'Updates cached translator activity statistics' ); + $this->requireExtension( 'Translate' ); + } + + public function execute() { + Services::getInstance()->getTranslatorActivity()->updateAllLanguages(); + $this->output( "Done.\n" ); + } +} diff --git a/MLEB/Translate/src/Synchronization/CompleteExternalTranslationMaintenanceScript.php b/MLEB/Translate/src/Synchronization/CompleteExternalTranslationMaintenanceScript.php new file mode 100644 index 00000000..0c4d5aec --- /dev/null +++ b/MLEB/Translate/src/Synchronization/CompleteExternalTranslationMaintenanceScript.php @@ -0,0 +1,83 @@ +<?php + +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\Synchronization; + +use Maintenance; +use MediaWiki\Extensions\Translate\Services; +use MediaWiki\Logger\LoggerFactory; +use MessageIndex; + +/** + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2020.06 + */ +class CompleteExternalTranslationMaintenanceScript extends Maintenance { + public function __construct() { + parent::__construct(); + $this->addDescription( + 'Check and run MessageIndexRebuild and MessageGroupStats update once ' . + 'MessageUpdateJobs are done. Intended to be run periodically' + ); + $this->requireExtension( 'Translate' ); + } + + public function execute() { + $logger = LoggerFactory::getInstance( 'Translate.GroupSynchronization' ); + $groupSyncCache = Services::getInstance()->getGroupSynchronizationCache(); + $groupsInSync = $groupSyncCache->getGroupsInSync(); + if ( !$groupsInSync ) { + $logger->info( 'All message groups are in sync' ); + return; + } + + $logger->info( 'Group synchronization is in progress' ); + + $groupsInProgress = []; + $groupResponses = []; + foreach ( $groupsInSync as $groupId ) { + $groupResponse = $groupSyncCache->getSynchronizationStatus( $groupId ); + $groupResponses[] = $groupResponse; + + if ( $groupResponse->isDone() ) { + $groupSyncCache->endSync( $groupId ); + continue; + } + + if ( $groupResponse->hasTimedOut() ) { + $remainingMessageKeys = $groupResponse->getRemainingMessages(); + $logger->warning( + 'MessageUpdateJobs timed out for group - {groupId}; ' . + 'Messages - {messages}; ' . + 'Jobs remaining - {jobRemaining}', + [ + 'groupId' => $groupId , + 'jobRemaining' => count( $remainingMessageKeys ), + 'messages' => implode( ', ', $remainingMessageKeys ) + ] + ); + wfLogWarning( 'MessageUpdateJob timed out for group - ' . $groupId ); + + $groupSyncCache->endSync( $groupId ); + } else { + $groupsInProgress[] = $groupId; + } + } + + if ( !$groupsInProgress ) { + // No groups in progress. + $logger->info( 'All message groups are now in sync. Starting MessageIndex rebuild' ); + MessageIndex::singleton()->rebuild(); + } + + $logger->info( + "Script completed successfully. " . + "{inProgressGroupCount} group synchronization(s) is/are in progress", + [ + 'inProgressGroupCount' => count( $groupsInProgress ) + ] + ); + } +} diff --git a/MLEB/Translate/src/Synchronization/GroupSynchronizationCache.php b/MLEB/Translate/src/Synchronization/GroupSynchronizationCache.php new file mode 100644 index 00000000..34b0c3f4 --- /dev/null +++ b/MLEB/Translate/src/Synchronization/GroupSynchronizationCache.php @@ -0,0 +1,280 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\Synchronization; + +use BagOStuff; +use DateTime; + +/** + * Message group synchronization cache. Handles storage of data in the cache + * to track which groups are currently being synchronized + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2020.06 + */ +class GroupSynchronizationCache { + private const CACHE_PREFIX = 'translate-msg-group-sync'; + + private const OP_ADD = 'add'; + + private const OP_DEL = 'remove'; + + /** @var BagOStuff */ + private $cache; + + /** @var int */ + private $timeout; + + public function __construct( BagOStuff $cache, int $timeout = 600 ) { + $this->cache = $cache; + $this->timeout = $timeout; + } + + /** + * Get the groups currently in sync + * @return string[] + */ + public function getGroupsInSync(): array { + $groupsCacheKey = $this->getGroupsKey(); + $groupsInSync = $this->cache->get( $groupsCacheKey ); + + return $groupsInSync === false ? [] : $groupsInSync; + } + + /** Start the synchronization process for a group with the given groupId */ + public function startSync( string $groupId ): void { + $this->cache->set( $this->getSyncTimeKey( $groupId ), ( new DateTime() )->getTimestamp() ); + $this->cache->set( $this->getGroupKey( $groupId ), [] ); + + $this->modifyGroupsInSync( $groupId, self::OP_ADD ); + } + + public function getSyncStartTime( string $groupId ): ?int { + $timestamp = $this->cache->get( $this->getSyncTimeKey( $groupId ) ); + if ( $timestamp === false ) { + return null; + } + + return (int)$timestamp; + } + + /** + * End synchronization for a group. Removes the sync time, deletes the group key, and + * removes the groupId from groups in sync list + */ + public function endSync( string $groupId ): void { + // Remove all the messages for the group + $groupKey = $this->getGroupKey( $groupId ); + $groupMessageKeys = $this->cache->get( $groupKey ); + $this->removeMessages( ...$groupMessageKeys ); + + // Remove the group message list + $this->cache->delete( $groupKey ); + + // Delete the group sync start time + $this->cache->delete( $this->getSyncTimeKey( $groupId ) ); + + // Remove the group from groups in sync list + $this->modifyGroupsInSync( $groupId, self::OP_DEL ); + } + + /** Add multiple messages from a group to the cache */ + public function addMessages( string $groupId, MessageUpdateParameter ...$messageParams ): void { + $messagesToAdd = []; + foreach ( $messageParams as $messageParam ) { + $messagesToAdd[ $this->getMessageTitleKey( $messageParam->getPageName() ) ] = + $messageParam; + } + + $this->cache->setMulti( $messagesToAdd ); + $this->modifyGroupMessagesInSync( $groupId, $messageParams, self::OP_ADD ); + } + + /** Check if the group is in synchronization */ + public function isGroupBeingProcessed( string $groupId ): bool { + $groupMessages = $this->cache->get( $this->getGroupKey( $groupId ) ); + return $groupMessages !== false; + } + + /** + * Return messages keys belonging to group Id currently in synchronization. + * @param string $groupId + * @return string[] + */ + public function getGroupMessageKeys( string $groupId ): array { + $groupMessages = $this->cache->get( $this->getGroupKey( $groupId ) ); + if ( $groupMessages === false ) { + return []; + } + + return $groupMessages; + } + + /** + * Return values for multiple messages from the cache. + * @param string ...$messageKeys + * @return MessageUpdateParameter[] Returns a key value pair, with the key being the + * messageKey and value being MessageUpdateParameter or null if the key is not available + * in the cache. + */ + public function getMessages( string ...$messageKeys ): array { + $messageCacheKeys = []; + foreach ( $messageKeys as $messageKey ) { + $messageCacheKeys[] = $this->getMessageTitleKey( $messageKey ); + } + + $messageParams = $this->cache->getMulti( $messageCacheKeys ); + + $allMessageParams = []; + foreach ( $messageCacheKeys as $index => $messageCacheKey ) { + $allMessageParams[$messageKeys[$index]] = $messageParams[$messageCacheKey] ?? null; + } + + return $allMessageParams; + } + + /** + * Update the group cache with the latest information with the status of message + * update jobs, then check if the group has timed out and returns the latest information + */ + public function getSynchronizationStatus( string $groupId ): GroupSynchronizationResponse { + $this->syncGroup( $groupId ); + $syncStartTime = $this->getSyncStartTime( $groupId ); + if ( !$syncStartTime ) { + // Processing is done + return new GroupSynchronizationResponse( $groupId, [], false ); + } + + $hasTimedOut = $this->hasGroupTimedOut( $syncStartTime ); + $remainingMessages = $this->getGroupMessageKeys( $groupId ); + + return new GroupSynchronizationResponse( + $groupId, + $remainingMessages, + $hasTimedOut + ); + } + + /** + * Remove messages from the cache. Removes the message keys, but DOES NOT the update group + * message key list. + */ + public function removeMessages( string ...$messageKeys ): void { + $messageCacheKeys = []; + foreach ( $messageKeys as $key ) { + $messageCacheKeys[] = $this->getMessageTitleKey( $key ); + } + + $this->cache->deleteMulti( $messageCacheKeys ); + } + + /** + * Check messages keys that are still present in the cache and update the list of keys + * in the message group. + */ + private function syncGroup( string $groupId ): void { + $groupCacheKey = $this->getGroupKey( $groupId ); + $groupMessages = $this->cache->get( $groupCacheKey ); + if ( $groupMessages === false ) { + return; + } + + $messageCacheKeys = []; + foreach ( $groupMessages as $messageKey ) { + $messageCacheKeys[] = $this->getMessageTitleKey( $messageKey ); + } + + $messageParams = $this->cache->getMulti( $messageCacheKeys ); + + // No keys are present, delete the message and mark the group as synced + if ( !$messageParams ) { + $this->endSync( $groupId ); + return; + } + + // Make a list of remaining jobs that are running. + $remainingJobTitle = []; + foreach ( $messageCacheKeys as $index => $messageCacheKey ) { + if ( isset( $messageParams[$messageCacheKey] ) ) { + $groupMessageTitle = $groupMessages[$index]; + $remainingJobTitle[] = $groupMessageTitle; + } + } + + // Set the group cache with the remaining job title. + $this->cache->set( $groupCacheKey, $remainingJobTitle ); + } + + private function hasGroupTimedOut( int $syncStartTime ): bool { + $secondsSinceSyncStart = ( new DateTime() )->getTimestamp() - $syncStartTime; + return $secondsSinceSyncStart > $this->timeout; + } + + private function modifyGroupsInSync( string $groupId, string $op ): void { + $groupsCacheKey = $this->getGroupsKey(); + $this->cache->lock( $groupsCacheKey ); + + $groupsInSync = $this->getGroupsInSync(); + if ( $groupsInSync === [] && $op === self::OP_DEL ) { + return; + } + + $this->modifyArray( $groupsInSync, $groupId, $op ); + + $this->cache->set( $groupsCacheKey, $groupsInSync ); + $this->cache->unlock( $groupsCacheKey ); + } + + private function modifyGroupMessagesInSync( + string $groupId, array $messageParams, string $op + ): void { + $groupCacheKey = $this->getGroupKey( $groupId ); + + $this->cache->lock( $groupCacheKey ); + + $groupMessages = $this->getGroupMessageKeys( $groupId ); + if ( $groupMessages === [] && $op === self::OP_DEL ) { + return; + } + + /** @var MessageUpdateParameter $messageParam */ + foreach ( $messageParams as $messageParam ) { + $messageTitle = $messageParam->getPageName(); + $this->modifyArray( $groupMessages, $messageTitle, $op ); + } + + $this->cache->set( $groupCacheKey, $groupMessages ); + $this->cache->unlock( $groupCacheKey ); + } + + private function modifyArray( + array &$toModify, string $needle, string $op + ): void { + $needleIndex = array_search( $needle, $toModify ); + if ( $op === self::OP_ADD && $needleIndex === false ) { + $toModify[] = $needle; + } elseif ( $op === self::OP_DEL && $needleIndex !== false ) { + array_splice( $toModify, $needleIndex, 1 ); + } + } + + // Cache keys related functions start here. + + private function getGroupsKey(): string { + return $this->cache->makeKey( self::CACHE_PREFIX ); + } + + private function getSyncTimeKey( string $groupId ): string { + return $this->cache->makeKey( self::CACHE_PREFIX, $groupId, 'time' ); + } + + private function getGroupKey( string $groupId ): string { + return $this->cache->makeKey( self::CACHE_PREFIX, 'group', $groupId ); + } + + private function getMessageTitleKey( string $title ): string { + return $this->cache->makeKey( self::CACHE_PREFIX, 'msg-title', $title ); + } + +} diff --git a/MLEB/Translate/src/Synchronization/GroupSynchronizationResponse.php b/MLEB/Translate/src/Synchronization/GroupSynchronizationResponse.php new file mode 100644 index 00000000..83a58ef6 --- /dev/null +++ b/MLEB/Translate/src/Synchronization/GroupSynchronizationResponse.php @@ -0,0 +1,47 @@ +<?php + +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\Synchronization; + +/** + * Class encapsulating the response returned by the GroupSynchronizationCache + * when requested for an update on a group synchronization status. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2020.06 + */ +class GroupSynchronizationResponse { + /** @var array */ + private $remainingMessageKeys; + + /** @var string */ + private $groupId; + + /** @var bool */ + private $timeout; + + public function __construct( + string $groupId, array $remainingMessageKeys, bool $hasTimedOut + ) { + $this->groupId = $groupId; + $this->remainingMessageKeys = $remainingMessageKeys; + $this->timeout = $hasTimedOut; + } + + public function isDone(): bool { + return $this->remainingMessageKeys === []; + } + + public function getRemainingMessages(): array { + return $this->remainingMessageKeys; + } + + public function getGroupId(): string { + return $this->groupId; + } + + public function hasTimedOut(): bool { + return $this->timeout; + } +} diff --git a/MLEB/Translate/src/Synchronization/MessageUpdateParameter.php b/MLEB/Translate/src/Synchronization/MessageUpdateParameter.php new file mode 100644 index 00000000..21109c85 --- /dev/null +++ b/MLEB/Translate/src/Synchronization/MessageUpdateParameter.php @@ -0,0 +1,102 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\Synchronization; + +use FormatJson; +use MessageUpdateJob; +use Serializable; + +/** + * Store params for MessageUpdateJob. + * @author Abijeet Patro + * @license GPL-2.0-or-later + * @since 2020.06 + */ +class MessageUpdateParameter implements Serializable { + /** @var string */ + private $pageName; + + /** @var bool */ + private $rename; + + /** @var bool */ + private $fuzzy; + + /** @var string */ + private $content; + + /** @var string */ + private $target; + + /** @var string */ + private $replacement; + + /** @var array */ + private $otherLangs; + + public function __construct( array $params ) { + $this->assignPropsFromArray( $params ); + } + + public function getPageName(): string { + return $this->pageName; + } + + public function isRename(): bool { + return boolval( $this->rename ); + } + + public function getReplacementValue(): string { + return $this->replacement; + } + + public function getTargetValue(): string { + return $this->target; + } + + public function getContent(): string { + return $this->content; + } + + public function isFuzzy(): bool { + return $this->fuzzy; + } + + public function getOtherLangs(): array { + return $this->otherLangs; + } + + public function serialize(): string { + $return = FormatJson::encode( get_object_vars( $this ), false, FormatJson::ALL_OK ); + return $return; + } + + public function unserialize( $deserialized ) { + $params = FormatJson::decode( $deserialized, true ); + $this->assignPropsFromArray( $params ); + } + + private function assignPropsFromArray( array $params ) { + // We are using "rename" as value for $params['rename'] + // at some places otherwise this could be simplified to + // $params['rename'] ?? false + $this->rename = isset( $params['rename'] ) && $params['rename']; + $this->fuzzy = $params['fuzzy']; + $this->content = $params['content']; + $this->pageName = $params['title'] ?? $params['pageName']; + + if ( $this->rename ) { + $this->target = $params['target']; + $this->replacement = $params['replacement']; + $this->otherLangs = $params['otherLangs'] ?? []; + } + } + + /** Create a new instance of the class from MessageUpdateJob */ + public static function createFromJob( MessageUpdateJob $job ): self { + $jobParams = $job->getParams(); + $jobParams['title'] = $job->getTitle()->getPrefixedDBkey(); + return new self( $jobParams ); + } +} diff --git a/MLEB/Translate/src/SystemUsers/FuzzyBot.php b/MLEB/Translate/src/SystemUsers/FuzzyBot.php new file mode 100644 index 00000000..4e654ade --- /dev/null +++ b/MLEB/Translate/src/SystemUsers/FuzzyBot.php @@ -0,0 +1,25 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\SystemUsers; + +use User; + +/** + * FuzzyBot - the misunderstood workhorse. + * + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @since 2012-01-02 + */ +class FuzzyBot { + public static function getUser(): User { + return User::newSystemUser( self::getName(), [ 'steal' => true ] ); + } + + public static function getName(): string { + global $wgTranslateFuzzyBotName; + + return $wgTranslateFuzzyBotName; + } +} diff --git a/MLEB/Translate/src/SystemUsers/TranslateUserManager.php b/MLEB/Translate/src/SystemUsers/TranslateUserManager.php new file mode 100644 index 00000000..d008567a --- /dev/null +++ b/MLEB/Translate/src/SystemUsers/TranslateUserManager.php @@ -0,0 +1,25 @@ +<?php +/** + * System account to handle user related modifications + * + * @file + * @author Abijeet Patro + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\SystemUsers; + +/** + * @since 2019.08 + */ +class TranslateUserManager { + public static function getUser() { + return \User::newSystemUser( self::getName(), [ 'steal' => true ] ); + } + + public static function getName() { + global $wgTranslateUserManagerName; + + return $wgTranslateUserManagerName; + } +} diff --git a/MLEB/Translate/src/Utilities/GettextPlural.php b/MLEB/Translate/src/Utilities/GettextPlural.php new file mode 100644 index 00000000..e1ead9e0 --- /dev/null +++ b/MLEB/Translate/src/Utilities/GettextPlural.php @@ -0,0 +1,203 @@ +<?php +/** + * @file + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Utilities; + +use GettextPluralException; +use InvalidArgumentException; +use TranslateUtils; + +/** + * @since 2019.09 + */ +class GettextPlural { + private const PRE = '{{PLURAL:GETTEXT|'; + private const POST = '}}'; + + /** + * Returns Gettext plural rule for given language. + * + * @param string $code Language tag in MediaWiki internal format. + * @return string Empty string if no plural rule found + */ + public static function getPluralRule( $code ) { + global $wgTranslateDocumentationLanguageCode; + + if ( $code === $wgTranslateDocumentationLanguageCode ) { + return 'nplurals=1; plural=0;'; + } + + $rulefile = __DIR__ . '/../../data/plural-gettext.txt'; + $rules = file_get_contents( $rulefile ); + foreach ( explode( "\n", $rules ) as $line ) { + if ( trim( $line ) === '' ) { + continue; + } + [ $rulecode, $rule ] = explode( "\t", $line ); + if ( $rulecode === $code ) { + return $rule; + } + } + + return ''; + } + + /** + * Returns how many plural forms are expected by a given plural rule. + * + * @param string $rule Gettext style plural rule. + * @return int + * @throws InvalidArgumentException + */ + public static function getPluralCount( $rule ) { + $m = []; + $ok = preg_match( '/nplurals=([0-9]+).*;/', $rule, $m ); + if ( !$ok ) { + throw new InvalidArgumentException( "Rule $rule is malformed" ); + } + return (int)$m[ 1 ]; + } + + /** + * Quick way to check if the text contains plural syntax. + * + * @param string $text + * @return bool + */ + public static function hasPlural( $text ) { + return strpos( $text, self::PRE ) !== false; + } + + /** + * Format plural forms as single string suitable for translation. + * + * @param string[] $forms + * @return string + */ + public static function flatten( array $forms ) { + return self::PRE . implode( '|', $forms ) . self::POST; + } + + /** + * Format translation with plural forms as array of forms. + * + * Reverse of flatten. Do note that A may be != flatten( unflatten( A ) ) because + * translators can place part of the text outside the plural markup or use multiple + * instances of the markup. + * + * @param string $text + * @param int $expectedPluralCount + * @return string[] + */ + public static function unflatten( $text, $expectedPluralCount ) { + [ $template, $instanceMap ] = self::parsePluralForms( $text ); + return self::expandTemplate( $template, $instanceMap, $expectedPluralCount ); + } + + /** + * Replaces problematic markup which can confuse our plural syntax markup with placeholders + * + * @param string $text + * @return array [ string $text, array $map ] + */ + private static function armour( $text ) { + // |/| is commonly used in KDE to support inflections. It needs to be escaped + // to avoid it messing up the plural markup. + $replacements = [ + '|/|' => TranslateUtils::getPlaceholder(), + ]; + // {0} is a common variable format + preg_match_all( '/\{\d+\}/', $text, $matches ); + foreach ( $matches[0] as $m ) { + $replacements[$m] = TranslateUtils::getPlaceholder(); + } + + $text = strtr( $text, $replacements ); + $map = array_flip( $replacements ); + + return [ $text, $map ]; + } + + /** + * Reverse of armour. + * + * @param string $text + * @param array $map Map returned by armour. + * @return string + */ + private static function unarmour( $text, array $map ) { + return strtr( $text, $map ); + } + + /** + * Parses plural markup into a structure form. + * + * @param string $text + * @return array [ string $template, array $instanceMap ] + */ + public static function parsePluralForms( $text ) { + $m = []; + $pre = preg_quote( self::PRE, '/' ); + $post = preg_quote( self::POST, '/' ); + + [ $armouredText, $armourMap ] = self::armour( $text ); + + $ok = preg_match_all( "/$pre(.*)$post/Us", $armouredText, $m ); + if ( $ok === false ) { + throw new GettextPluralException( "Plural regular expression failed for text: $text" ); + } + + $template = $armouredText; + $instanceMap = []; + + foreach ( $m[0] as $instanceIndex => $instanceText ) { + $ph = TranslateUtils::getPlaceholder(); + + // Using preg_replace instead of str_replace because of the limit parameter + $pattern = '/' . preg_quote( $instanceText, '/' ) . '/'; + $template = preg_replace( $pattern, $ph, $template, 1 ); + + $instanceForms = explode( '|', $m[ 1 ][ $instanceIndex ] ); + foreach ( $instanceForms as $i => $v ) { + $instanceForms[ $i ] = self::unarmour( $v, $armourMap ); + } + + $instanceMap[$ph] = $instanceForms; + } + + $template = self::unarmour( $template, $armourMap ); + return [ $template, $instanceMap ]; + } + + /** + * Gives fully expanded forms given a template and parsed plural markup instances. + * + * @param string $template + * @param array $instanceMap + * @param int $expectedPluralCount + * @return string[] + */ + public static function expandTemplate( $template, array $instanceMap, $expectedPluralCount ) { + $formArray = []; + for ( $formIndex = 0; $formIndex < $expectedPluralCount; $formIndex++ ) { + // Start with the whole string + $form = $template; + + // Loop over each plural markup instance and replace it with the plural form belonging + // to the current index + foreach ( $instanceMap as $ph => $instanceForms ) { + // For missing forms, fall back to empty text. + // Extra forms are excluded because $formIndex < $expectedPluralCount + $replacement = $instanceForms[ $formIndex ] ?? ''; + $form = str_replace( $ph, $replacement, $form ); + } + + $formArray[ $formIndex ] = $form; + } + + return $formArray; + } +} diff --git a/MLEB/Translate/src/Utilities/ParsingPlaceholderFactory.php b/MLEB/Translate/src/Utilities/ParsingPlaceholderFactory.php new file mode 100644 index 00000000..f4b523a9 --- /dev/null +++ b/MLEB/Translate/src/Utilities/ParsingPlaceholderFactory.php @@ -0,0 +1,23 @@ +<?php +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\Utilities; + +/** + * Create unique placeholders that can be used when parsing (wiki)text. + * @author Niklas Laxström + * @license GPL-2.0-or-later + * @since 2020.07 + */ +class ParsingPlaceholderFactory { + private $i = 0; + + /** Return value is guaranteed to only contain [a-zA-Z0-9\x7f] */ + public function make(): string { + return "\x7fUNIQ" . + dechex( mt_rand( 0, 0x7fffffff ) ) . + dechex( mt_rand( 0, 0x7fffffff ) ) . + '-' . + $this->i++; + } +} diff --git a/MLEB/Translate/src/Utilities/SmartFormatPlural.php b/MLEB/Translate/src/Utilities/SmartFormatPlural.php new file mode 100644 index 00000000..3ba8c350 --- /dev/null +++ b/MLEB/Translate/src/Utilities/SmartFormatPlural.php @@ -0,0 +1,64 @@ +<?php +/** + * @file + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Utilities; + +/** + * Implements partial support for SmartFormat plural syntax parsing. + * @see https://github.com/axuno/SmartFormat/wiki/Pluralization + * @since 2019.11 + */ +class SmartFormatPlural { + /** + * Example input: + * {0} {0:message|messages} older than {1} {1:week|weeks} {0:has|have} been deleted. + * Example output: + * [ + * '0' => [ + * [ + * 'forms' => [ 'message', 'messages' ], + * 'original' => '{0:message|messages}', + * ], + * [ + * 'forms' => [ 'has', 'have' ], + * 'original' => '{0:has|have}', + * ], + * ], + * '1' => [ + * [ + * 'forms' => [ 'week', 'weeks' ], + * 'original' => '{1:week|weeks}', + * ], + * ], + * ] + * + * @param string $text + * @return array + */ + public static function getPluralInstances( string $text ) : array { + // ldns = Large Deeply-Nested Structure + $ldns = []; + + // Named variables seem to be supported by the spec, but we limit ourselves + // only to numbers. Example syntax {0:message|messages} + $regex = '/\{(\d+):([^}]+)\}/Us'; + $matches = []; + preg_match_all( $regex, $text, $matches, PREG_SET_ORDER ); + + foreach ( $matches as $instance ) { + $original = $instance[ 0 ]; + $variable = $instance[ 1 ]; + $forms = explode( '|', $instance[ 2 ] ); + $ldns[ $variable ] = $ldns[ $variable ] ?? []; + $ldns[ $variable ][] = [ + 'forms' => $forms, + 'original' => $original, + ]; + } + + return $ldns; + } +} diff --git a/MLEB/Translate/src/Utilities/StringComparators/SimpleStringComparator.php b/MLEB/Translate/src/Utilities/StringComparators/SimpleStringComparator.php new file mode 100644 index 00000000..1c24d2c8 --- /dev/null +++ b/MLEB/Translate/src/Utilities/StringComparators/SimpleStringComparator.php @@ -0,0 +1,29 @@ +<?php +/** + * Contains a simple string compare class. + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Utilities\StringComparators; + +/** + * A simple string comparator, that compares two strings and determines if they are an exact match. + * @since 2019.10 + */ +class SimpleStringComparator implements StringComparator { + /** + * @inheritDoc + */ + public function getSimilarity( $addedMessage, $deletedMessage ) { + if ( $addedMessage === $deletedMessage ) { + return 1; + } + + if ( trim( mb_strtolower( $addedMessage ) ) === trim( mb_strtolower( $deletedMessage ) ) ) { + // This is an arbitrarily chosen number to differentiate it from an exact match. + return 0.95; + } + + return 0; + } +} diff --git a/MLEB/Translate/src/Utilities/StringComparators/StringComparator.php b/MLEB/Translate/src/Utilities/StringComparators/StringComparator.php new file mode 100644 index 00000000..bbf9e96b --- /dev/null +++ b/MLEB/Translate/src/Utilities/StringComparators/StringComparator.php @@ -0,0 +1,18 @@ +<?php + +namespace MediaWiki\Extensions\Translate\Utilities\StringComparators; + +/** + * An interface to be implemented by comparators that will compare percentage + * of similarity between strings. + */ +interface StringComparator { + /** + * Compares the two messages and returns a similarity percentage + * + * @param string $a + * @param string $b + * @return float 0-1 with 1 being an exact match + */ + public function getSimilarity( $a, $b ); +} diff --git a/MLEB/Translate/src/Utilities/TranslateReplaceTitle.php b/MLEB/Translate/src/Utilities/TranslateReplaceTitle.php new file mode 100644 index 00000000..a498d7a9 --- /dev/null +++ b/MLEB/Translate/src/Utilities/TranslateReplaceTitle.php @@ -0,0 +1,75 @@ +<?php +/** + * Contains a helper class to help replace titles. + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Utilities; + +use MessageHandle; +use Title; + +/** + * Helper class that cotains utility methods to help with identifying and replace titles. + * @since 2019.10 + */ +class TranslateReplaceTitle { + + /** + * Returns two lists: a set of message handles that would be moved/renamed by + * the current text replacement, and the set of message handles that would ordinarily + * be moved but are not moveable, due to permissions or any other reason. + * @param MessageHandle $sourceMessageHandle + * @param string $replacement + * @return array + */ + public static function getTitlesForMove( + MessageHandle $sourceMessageHandle, $replacement + ) { + $titlesForMove = []; + $namespace = $sourceMessageHandle->getTitle()->getNamespace(); + + $titles = self::getMatchingTitles( $sourceMessageHandle ); + + foreach ( $titles as $title ) { + $handle = new MessageHandle( $title ); + // This takes care of situations where we have two different titles + // foo and foo/bar, both will be matched and fetched but the slash + // does not represent a language separator + if ( $handle->getKey() !== $sourceMessageHandle->getKey() ) { + continue; + } + $targetTitle = Title::makeTitle( + $namespace, + \TranslateUtils::title( $replacement, $handle->getCode(), $namespace ) + ); + $titlesForMove[] = [ $title, $targetTitle ]; + } + + return $titlesForMove; + } + + /** + * @param MessageHandle $handle + * @return \TitleArrayFromResult + */ + private static function getMatchingTitles( MessageHandle $handle ) { + $dbr = wfGetDB( DB_MASTER ); + + $tables = [ 'page' ]; + $vars = [ 'page_title', 'page_namespace', 'page_id' ]; + + $comparisonCond = 'page_title ' . $dbr->buildLike( + $handle->getTitleForBase()->getDBkey(), '/', $dbr->anyString() + ); + + $conds = [ + $comparisonCond, + 'page_namespace' => $handle->getTitle()->getNamespace(), + ]; + + $result = $dbr->select( $tables, $vars, $conds, __METHOD__ ); + + return \TitleArray::newFromResult( $result ); + } +} diff --git a/MLEB/Translate/src/Utilities/UnicodePlural.php b/MLEB/Translate/src/Utilities/UnicodePlural.php new file mode 100644 index 00000000..a01c82af --- /dev/null +++ b/MLEB/Translate/src/Utilities/UnicodePlural.php @@ -0,0 +1,193 @@ +<?php +/** + * @file + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Utilities; + +use RuntimeException; +use TranslateUtils; + +/** + * @since 2019.09 + */ +class UnicodePlural { + private const PRE = '{{PLURAL|'; + private const POST = '}}'; + + /** + * Returns CLDR plural rule for given language. + * + * @param string $code Language tag in MediaWiki internal format. + * @return array|null Empty string if no plural rule found + */ + public static function getPluralKeywords( $code ) { + $filePath = __DIR__ . '/../../data/plural-cldr.json'; + $ruleData = json_decode( file_get_contents( $filePath ), true ); + + $ruleSet = $ruleData[ 'supplemental' ][ 'plurals-type-cardinal' ][ $code ] ?? null; + if ( $ruleSet === null ) { + return null; + } + + $keywords = []; + foreach ( array_keys( $ruleSet ) as $name ) { + $keywords[] = str_replace( 'pluralRule-count-', '', $name ); + } + + return $keywords; + } + + /** + * Quick way to check if the text contains plural syntax. + * + * @param string $text + * @return bool + */ + public static function hasPlural( $text ) { + return strpos( $text, self::PRE ) !== false; + } + + /** + * Format plural forms map as single string suitable for translation. + * + * This does not check validity of forms. Use ::convertFormListToFormMap for that. + * @param string[] $forms + * @return string + */ + public static function flattenMap( array $forms ) { + $list = []; + foreach ( $forms as $keyword => $value ) { + $list[] = [ $keyword, $value ]; + } + + return self::flattenList( $list ); + } + + /** + * Format plural forms list as single string. + * + * This does not check validity of forms. + * @param array[] $formList [ keyword, form ] pairs. + * @return string + */ + public static function flattenList( array $formList ) { + $formatted = []; + foreach ( $formList as list( $keyword, $value ) ) { + $formatted[] = self::formatForm( $keyword, $value ); + } + + return self::PRE . implode( '|', $formatted ) . self::POST; + } + + private static function formatForm( $keyword, $value ) { + $prefix = $keyword === 'other' ? '' : "$keyword="; + return $prefix . $value; + } + + /** + * Format translation with plural forms as array of forms. + * + * Reverse of flatten. Do note that A may be != flatten( unflatten( A ) ) because + * translators can place part of the text outside the plural markup or use multiple + * instances of the markup. + * + * @param string $text + * @param string[] $expectedKeywords + * @return string[] + */ + public static function unflatten( $text, $expectedKeywords ) { + list( $template, $instanceMap ) = self::parsePluralForms( $text ); + return self::expandTemplate( $template, $instanceMap, $expectedKeywords ); + } + + /** + * Parses plural markup into a structure form. + * + * @param string $text + * @return array [ string $template, array $instanceMap ] + */ + public static function parsePluralForms( $text ) { + $m = []; + $pre = preg_quote( self::PRE, '/' ); + $post = preg_quote( self::POST, '/' ); + + $ok = preg_match_all( "/$pre(.*)$post/Us", $text, $m ); + if ( $ok === false ) { + throw new RuntimeException( "Plural regular expression failed for text: $text" ); + } + + $template = $text; + $instanceMap = []; + + foreach ( $m[0] as $instanceIndex => $instanceText ) { + $ph = TranslateUtils::getPlaceholder(); + + // Using preg_replace instead of str_replace because of the limit parameter + $pattern = '/' . preg_quote( $instanceText, '/' ) . '/'; + $template = preg_replace( $pattern, $ph, $template, 1 ); + + $instanceForms = []; + foreach ( explode( '|', $m[ 1 ][ $instanceIndex ] ) as $form ) { + $m2 = []; + $ok = preg_match( "~\s*([a-z]+)\s*=(.+)~s", $form, $m2 ); + $keyword = $ok ? $m2[ 1 ] : 'other'; + $value = $ok ? trim( $m2[ 2 ] ) : $form; + $instanceForms[] = [ $keyword, $value ]; + } + + $instanceMap[$ph] = $instanceForms; + } + + return [ $template, $instanceMap ]; + } + + /** + * Gives fully expanded forms given a template and parsed plural markup instances. + * + * @param string $template + * @param array $instanceMap + * @param string[] $expectedKeywords + * @return string[] + */ + public static function expandTemplate( $template, array $instanceMap, $expectedKeywords ) { + $formArray = []; + + // Convert from list of forms to map of forms for easier processing + foreach ( $instanceMap as $ph => $list ) { + $instanceMap[ $ph ] = self::convertFormListToFormMap( $list, $expectedKeywords ); + } + + foreach ( $expectedKeywords as $keyword ) { + // Start with the whole string + $form = $template; + + // Loop over each plural markup instance and replace it with the plural form belonging + // to the current index + foreach ( $instanceMap as $ph => $instanceFormMap ) { + // For missing forms, fall back to empty text. + $replacement = $instanceFormMap[ $keyword ] ?? ''; + $form = str_replace( $ph, $replacement, $form ); + } + + $formArray[ $keyword ] = $form; + } + + return $formArray; + } + + public static function convertFormListToFormMap( array $formList, array $expectedKeywords ) { + $formMap = []; + foreach ( $formList as list( $keyword, $value ) ) { + $formMap[ $keyword ] = $value; + } + + $sortedFormMap = []; + foreach ( $expectedKeywords as $keyword ) { + $sortedFormMap[ $keyword ] = $formMap[ $keyword ] ?? null; + } + + return $sortedFormMap; + } +} diff --git a/MLEB/Translate/src/Validation/LegacyValidatorAdapter.php b/MLEB/Translate/src/Validation/LegacyValidatorAdapter.php new file mode 100644 index 00000000..a63699d5 --- /dev/null +++ b/MLEB/Translate/src/Validation/LegacyValidatorAdapter.php @@ -0,0 +1,62 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\Validation; + +use InsertablesSuggester; +use MediaWiki\Extensions\Translate\MessageValidator\Validator; +use TMessage; + +/** + * Object adapter for message validators that implement the deprecated interface. + * + * @since 2020.06 + */ +class LegacyValidatorAdapter implements MessageValidator, InsertablesSuggester { + /** @var Validator */ + private $validator; + + public function __construct( Validator $validator ) { + $this->validator = $validator; + } + + /** @inheritDoc */ + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues { + $notices = []; + $this->validator->validate( $message, $targetLanguage, $notices ); + return $this->convertNoticesToValidationIssues( $notices, $message->key() ); + } + + private function convertNoticesToValidationIssues( + array $notices, + string $messageKey + ): ValidationIssues { + $issues = new ValidationIssues(); + foreach ( $notices[$messageKey] ?? [] as $notice ) { + $issue = new ValidationIssue( + $notice[0][0], + $notice[0][1], + $notice[1], + array_slice( $notice, 2 ) + ); + $issues->add( $issue ); + } + + return $issues; + } + + /** @inheritDoc */ + public function getInsertables( $text ) { + if ( $this->validator instanceof InsertablesSuggester ) { + return $this->validator->getInsertables( $text ); + } + + return []; + } +} diff --git a/MLEB/Translate/src/Validation/MessageValidator.php b/MLEB/Translate/src/Validation/MessageValidator.php new file mode 100644 index 00000000..a488a021 --- /dev/null +++ b/MLEB/Translate/src/Validation/MessageValidator.php @@ -0,0 +1,24 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +declare( strict_types = 1 ); + +namespace MediaWiki\Extensions\Translate\Validation; + +use TMessage; + +/** + * Interim interface for message validators. + * + * In the future, it is expected that this will be deprecated and replaced with + * a MessageRecordValidator interface. + * + * @since 2020.06 + */ +interface MessageValidator { + public function getIssues( TMessage $message, string $targetLanguage ): ValidationIssues; +} diff --git a/MLEB/Translate/src/Validation/ValidationIssue.php b/MLEB/Translate/src/Validation/ValidationIssue.php new file mode 100644 index 00000000..f47d7000 --- /dev/null +++ b/MLEB/Translate/src/Validation/ValidationIssue.php @@ -0,0 +1,54 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Validation; + +/** + * Value object. + * + * @newable + * @since 2020.06 + */ +class ValidationIssue { + /** @var string */ + private $type; + /** @var string */ + private $subType; + /** @var string */ + private $messageKey; + /** @var array */ + private $messageParams; + + /** @stable for calling */ + public function __construct( + string $type, + string $subType, + string $messageKey, + array $messageParams = [] + ) { + $this->type = $type; + $this->subType = $subType; + $this->messageKey = $messageKey; + $this->messageParams = $messageParams; + } + + public function type(): string { + return $this->type; + } + + public function subType(): string { + return $this->subType; + } + + public function messageKey(): string { + return $this->messageKey; + } + + public function messageParams(): array { + return $this->messageParams; + } +} diff --git a/MLEB/Translate/src/Validation/ValidationIssues.php b/MLEB/Translate/src/Validation/ValidationIssues.php new file mode 100644 index 00000000..5ca17cc4 --- /dev/null +++ b/MLEB/Translate/src/Validation/ValidationIssues.php @@ -0,0 +1,53 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace MediaWiki\Extensions\Translate\Validation; + +use ArrayIterator; +use Countable; +use IteratorAggregate; +use Traversable; + +/** + * Mutable collection for validation issues. + * + * @newable + * @since 2020.06 + */ +class ValidationIssues implements Countable, IteratorAggregate { + /** @var ValidationIssue[] */ + private $issues = []; + + /** Add a new validation issue to the collection. */ + public function add( ValidationIssue $issue ) { + $this->issues[] = $issue; + } + + /** Merge another collection to this collection. */ + public function merge( ValidationIssues $issues ) { + $this->issues = array_merge( $this->issues, $issues->issues ); + } + + /** + * Check whether this collection is not empty. + * + * @return bool False if empty, true otherwise + */ + public function hasIssues(): bool { + return $this->issues !== []; + } + + /** @return Traversable<ValidationIssue> */ + public function getIterator(): Traversable { + return new ArrayIterator( $this->issues ); + } + + /** @inheritDoc */ + public function count(): int { + return count( $this->issues ); + } +} |