diff options
Diffstat (limited to 'MLEB/LocalisationUpdate/includes/Fetcher')
5 files changed, 196 insertions, 0 deletions
diff --git a/MLEB/LocalisationUpdate/includes/Fetcher/Fetcher.php b/MLEB/LocalisationUpdate/includes/Fetcher/Fetcher.php new file mode 100644 index 00000000..c9d38155 --- /dev/null +++ b/MLEB/LocalisationUpdate/includes/Fetcher/Fetcher.php @@ -0,0 +1,30 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate\Fetcher; + +/** + * Interface for classes which fetch files over different protocols and ways. + */ +interface Fetcher { + /** + * Fetches a single resource. + * + * @param string $url + * @return bool|string False on failure. + */ + public function fetchFile( $url ); + + /** + * Fetch a list of resources. This has the benefit of being able to pick up + * new languages as they appear if languages are stored in separate files. + * + * @param string $pattern + * @return array + */ + public function fetchDirectory( $pattern ); +} diff --git a/MLEB/LocalisationUpdate/includes/Fetcher/FetcherFactory.php b/MLEB/LocalisationUpdate/includes/Fetcher/FetcherFactory.php new file mode 100644 index 00000000..4bb05132 --- /dev/null +++ b/MLEB/LocalisationUpdate/includes/Fetcher/FetcherFactory.php @@ -0,0 +1,25 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate\Fetcher; + +/** + * Constructs fetchers based on the repository urls. + */ +class FetcherFactory { + public function getFetcher( $path ) { + if ( strpos( $path, 'https://raw.github.com/' ) === 0 ) { + return new GitHubFetcher(); + } elseif ( strpos( $path, 'http://' ) === 0 ) { + return new HttpFetcher(); + } elseif ( strpos( $path, 'https://' ) === 0 ) { + return new HttpFetcher(); + } else { + return new FileSystemFetcher(); + } + } +} diff --git a/MLEB/LocalisationUpdate/includes/Fetcher/FileSystemFetcher.php b/MLEB/LocalisationUpdate/includes/Fetcher/FileSystemFetcher.php new file mode 100644 index 00000000..17e6362d --- /dev/null +++ b/MLEB/LocalisationUpdate/includes/Fetcher/FileSystemFetcher.php @@ -0,0 +1,47 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate\Fetcher; + +/** + * Accesses file system directly. + */ +class FileSystemFetcher implements Fetcher { + /** + * @param string $url + * + * @return bool|string + */ + public function fetchFile( $url ) { + // Remove the protocol prefix + $url = preg_replace( '~^file://~', '', $url ); + + if ( !is_readable( $url ) ) { + return false; + } + + return file_get_contents( $url ); + } + + /** + * @param string $pattern + * + * @return array + */ + public function fetchDirectory( $pattern ) { + // Remove the protocol prefix + $pattern = preg_replace( '~^file://~', '', $pattern ); + + $data = []; + foreach ( glob( $pattern ) as $file ) { + if ( is_readable( $file ) ) { + $data["file://$file"] = file_get_contents( $file ); + } + } + return $data; + } +} diff --git a/MLEB/LocalisationUpdate/includes/Fetcher/GitHubFetcher.php b/MLEB/LocalisationUpdate/includes/Fetcher/GitHubFetcher.php new file mode 100644 index 00000000..eba89a9e --- /dev/null +++ b/MLEB/LocalisationUpdate/includes/Fetcher/GitHubFetcher.php @@ -0,0 +1,49 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate\Fetcher; + +/** + * This class uses GitHub api to obtain a list of files present in a directory + * to avoid fetching files that don't exist. + * + * @todo Could use file hashes to 1) avoid fetching files with same hash as + * the source. 2) avoid fetching files which haven't changed since last check + * if we store them. + */ +class GitHubFetcher extends HttpFetcher { + /** + * @param string $pattern + * + * @return array + * @throws \Exception + */ + public function fetchDirectory( $pattern ) { + global $wgLocalisationUpdateHttpRequestOptions; + + $domain = preg_quote( 'https://raw.github.com/', '~' ); + $p = "~^$domain(?P<org>[^/]+)/(?P<repo>[^/]+)/(?P<branch>[^/]+)/(?P<path>.+)/.+$~"; + preg_match( $p, $pattern, $m ); + + $apiURL = "https://api.github.com/repos/{$m['org']}/{$m['repo']}/contents/{$m['path']}"; + $json = \Http::get( $apiURL, $wgLocalisationUpdateHttpRequestOptions, __METHOD__ ); + if ( !$json ) { + throw new \Exception( "Unable to get directory listing for {$m['org']}/{$m['repo']}" ); + } + + $files = []; + $json = \FormatJson::decode( $json, true ); + foreach ( $json as $fileinfo ) { + $fileurl = dirname( $pattern ) . '/' . $fileinfo['name']; + $file = $this->fetchFile( $fileurl ); + if ( $file ) { + $files[$fileurl] = $file; + } + } + return $files; + } +} diff --git a/MLEB/LocalisationUpdate/includes/Fetcher/HttpFetcher.php b/MLEB/LocalisationUpdate/includes/Fetcher/HttpFetcher.php new file mode 100644 index 00000000..80c1394e --- /dev/null +++ b/MLEB/LocalisationUpdate/includes/Fetcher/HttpFetcher.php @@ -0,0 +1,45 @@ +<?php +/** + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +namespace LocalisationUpdate\Fetcher; + +/** + * Fetches files over HTTP(s). + */ +class HttpFetcher implements Fetcher { + /** + * @param string $url + * + * @return bool|string + */ + public function fetchFile( $url ) { + global $wgLocalisationUpdateHttpRequestOptions; + return \Http::get( $url, $wgLocalisationUpdateHttpRequestOptions, __METHOD__ ); + } + + /** + * This is horribly inefficient. Subclasses have more efficient + * implementation of this. + * @param string $pattern + * @return array + */ + public function fetchDirectory( $pattern ) { + $files = []; + + $languages = \Language::fetchLanguageNames( null, 'mwfile' ); + + foreach ( array_keys( $languages ) as $code ) { + $url = str_replace( '*', $code, $pattern ); + $file = $this->fetchFile( $url ); + if ( $file ) { + $files[$url] = $file; + } + } + + return $files; + } +} |