summaryrefslogtreecommitdiff
blob: 8e7788ed9ad45ce07208714951a1b2cfd501b7fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
<?php
/**
 * @file
 * @license GPL-2.0-or-later
 */

namespace MediaWiki\Extension\Translate\Utilities;

use RuntimeException;
use TranslateUtils;

/** @since 2019.09 */
class UnicodePlural {
	private const PRE = '{{PLURAL|';
	private const POST = '}}';

	/**
	 * Returns CLDR plural rule for given language.
	 *
	 * @param string $code Language tag in MediaWiki internal format.
	 * @return array|null Empty string if no plural rule found
	 */
	public static function getPluralKeywords( $code ) {
		$filePath = __DIR__ . '/../../data/plural-cldr.json';
		$ruleData = json_decode( file_get_contents( $filePath ), true );

		$ruleSet = $ruleData[ 'supplemental' ][ 'plurals-type-cardinal' ][ $code ] ?? null;
		if ( $ruleSet === null ) {
			return null;
		}

		$keywords = [];
		foreach ( array_keys( $ruleSet ) as $name ) {
			$keywords[] = str_replace( 'pluralRule-count-', '', $name );
		}

		return $keywords;
	}

	/**
	 * Quick way to check if the text contains plural syntax.
	 *
	 * @param string $text
	 * @return bool
	 */
	public static function hasPlural( $text ) {
		return strpos( $text, self::PRE ) !== false;
	}

	/**
	 * Format plural forms map as single string suitable for translation.
	 *
	 * This does not check validity of forms. Use ::convertFormListToFormMap for that.
	 * @param string[] $forms
	 * @return string
	 */
	public static function flattenMap( array $forms ) {
		$list = [];
		foreach ( $forms as $keyword => $value ) {
			$list[] = [ $keyword, $value ];
		}

		return self::flattenList( $list );
	}

	/**
	 * Format plural forms list as single string.
	 *
	 * This does not check validity of forms.
	 * @param array[] $formList [ keyword, form ] pairs.
	 * @return string
	 */
	public static function flattenList( array $formList ) {
		$formatted = [];
		foreach ( $formList as list( $keyword, $value ) ) {
			$formatted[] = self::formatForm( $keyword, $value );
		}

		return self::PRE . implode( '|', $formatted ) . self::POST;
	}

	private static function formatForm( $keyword, $value ) {
		$prefix = $keyword === 'other' ? '' : "$keyword=";
		return $prefix . $value;
	}

	/**
	 * Format translation with plural forms as array of forms.
	 *
	 * Reverse of flatten. Do note that A may be != flatten( unflatten( A ) ) because
	 * translators can place part of the text outside the plural markup or use multiple
	 * instances of the markup.
	 *
	 * @param string $text
	 * @param string[] $expectedKeywords
	 * @return string[]
	 */
	public static function unflatten( $text, $expectedKeywords ) {
		list( $template, $instanceMap ) = self::parsePluralForms( $text );
		return self::expandTemplate( $template, $instanceMap, $expectedKeywords );
	}

	/**
	 * Parses plural markup into a structure form.
	 *
	 * @param string $text
	 * @return array [ string $template, array $instanceMap ]
	 */
	public static function parsePluralForms( $text ) {
		$m = [];
		$pre = preg_quote( self::PRE, '/' );
		$post = preg_quote( self::POST, '/' );

		$ok = preg_match_all( "/$pre(.*)$post/Us", $text, $m );
		if ( $ok === false ) {
			throw new RuntimeException( "Plural regular expression failed for text: $text" );
		}

		$template = $text;
		$instanceMap = [];

		foreach ( $m[0] as $instanceIndex => $instanceText ) {
			$ph = TranslateUtils::getPlaceholder();

			// Using preg_replace instead of str_replace because of the limit parameter
			$pattern = '/' . preg_quote( $instanceText, '/' ) . '/';
			$template = preg_replace( $pattern, $ph, $template, 1 );

			$instanceForms = [];
			foreach ( explode( '|', $m[ 1 ][ $instanceIndex ] ) as $form ) {
				$m2 = [];
				$ok = preg_match( "~\s*([a-z]+)\s*=(.+)~s", $form, $m2 );
				$keyword = $ok ? $m2[ 1 ] : 'other';
				$value = $ok ? trim( $m2[ 2 ] ) : $form;
				$instanceForms[] = [ $keyword, $value ];
			}

			$instanceMap[$ph] = $instanceForms;
		}

		return [ $template, $instanceMap ];
	}

	/**
	 * Gives fully expanded forms given a template and parsed plural markup instances.
	 *
	 * @param string $template
	 * @param array $instanceMap
	 * @param string[] $expectedKeywords
	 * @return string[]
	 */
	public static function expandTemplate( $template, array $instanceMap, $expectedKeywords ) {
		$formArray = [];

		// Convert from list of forms to map of forms for easier processing
		foreach ( $instanceMap as $ph => $list ) {
			$instanceMap[ $ph ] = self::convertFormListToFormMap( $list, $expectedKeywords );
		}

		foreach ( $expectedKeywords as $keyword ) {
			// Start with the whole string
			$form = $template;

			// Loop over each plural markup instance and replace it with the plural form belonging
			// to the current index
			foreach ( $instanceMap as $ph => $instanceFormMap ) {
				// For missing forms, fall back to empty text.
				$replacement = $instanceFormMap[ $keyword ] ?? '';
				$form = str_replace( $ph, $replacement, $form );
			}

			$formArray[ $keyword ] = $form;
		}

		return $formArray;
	}

	public static function convertFormListToFormMap( array $formList, array $expectedKeywords ) {
		$formMap = [];
		foreach ( $formList as list( $keyword, $value ) ) {
			$formMap[ $keyword ] = $value;
		}

		$sortedFormMap = [];
		foreach ( $expectedKeywords as $keyword ) {
			$sortedFormMap[ $keyword ] = $formMap[ $keyword ] ?? null;
		}

		return $sortedFormMap;
	}
}