summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'MLEB/Translate/ttmserver/SolrTTMServer.php')
-rw-r--r--MLEB/Translate/ttmserver/SolrTTMServer.php445
1 files changed, 0 insertions, 445 deletions
diff --git a/MLEB/Translate/ttmserver/SolrTTMServer.php b/MLEB/Translate/ttmserver/SolrTTMServer.php
deleted file mode 100644
index bb6c244c..00000000
--- a/MLEB/Translate/ttmserver/SolrTTMServer.php
+++ /dev/null
@@ -1,445 +0,0 @@
-<?php
-/**
- * TTMServer - The Translate extension translation memory interface
- *
- * @file
- * @author Niklas Laxström
- * @copyright Copyright © 2012-2013, Niklas Laxström
- * @license GPL-2.0-or-later
- * @ingroup TTMServer
- */
-
-/**
- * TTMServer backed based on Solr instance. Depends on Solarium.
- * @since 2012-06-27
- * @ingroup TTMServer
- * @deprecated 1.27. Will be removed in 1.29.
- */
-class SolrTTMServer
- extends TTMServer
- implements ReadableTTMServer, SearchableTTMServer, WritableTTMServer
-{
- /**
- * In case auto-commit is not enabled, or even if it is, tell solr to
- * commit before this time has passed, in milliseconds.
- */
- const COMMIT_WITHIN = 5000;
-
- protected $client;
-
- /**
- * Reference to the maintenance script to relay logging output.
- */
- protected $logger;
-
- public function __construct( $config ) {
- wfDeprecated( __METHOD__, '1.24' );
-
- parent::__construct( $config );
-
- if ( isset( $config['config'] ) ) {
- $this->client = new Solarium_Client( $config['config'] );
- } else {
- $this->client = new Solarium_Client();
- }
- }
-
- public function isLocalSuggestion( array $suggestion ) {
- return $suggestion['wiki'] === wfWikiID();
- }
-
- public function expandLocation( array $suggestion ) {
- return $suggestion['uri'];
- }
-
- public function query( $sourceLanguage, $targetLanguage, $text ) {
- try {
- return $this->doQuery( $sourceLanguage, $targetLanguage, $text );
- } catch ( Solarium_Exception $e ) {
- throw new TranslationHelperException( 'Solarium exception: ' . $e );
- }
- }
-
- /// @see ReadableTTMServer::query
- protected function doQuery( $sourceLanguage, $targetLanguage, $text ) {
- /* Two query system:
- * 1) Find all strings in source language that match text
- * 2) Do another query for translations for those strings
- */
- // For now impose a length limit on query string to avoid doing
- // very slow queries. Magic number.
- if ( strlen( $text ) > 789 ) {
- return [];
- }
-
- $query = $this->client->createSelect();
- $query->setFields( [ 'globalid', 'content', 'score' ] );
-
- /* The interface usually displays three best candidates. These might
- * come from more than three matches, if the translation is the same.
- * This might not find all suggestions, if the top N best matching
- * source texts don't have translations, but worse matches do. We
- * could loop with start parameter to fetch more until we have enough
- * suggestions or the quality drops below the cutoff point. */
- $query->setRows( 25 );
-
- /* Our string can contain all kind of nasty characters, so we need
- * escape them with great pain. */
- $helper = $query->getHelper();
- $dist = $helper->escapePhrase( $text );
- // "edit" could also be ngram of other algorithm
- $dist = "strdist($dist,content,edit)";
- /* Note how we need to escape twice here, first the string for strdist
- * and then the strdist call itself for the query. And of course every-
- * thing will be URL encoded once sent over the line. */
- $query->setQuery( '_val_:%P1%', [ $dist ] );
-
- /* Filter queries are supposed to be efficient as they are separately
- * cached, but I haven't done any benchmarks. */
- $query->createFilterQuery( 'lang' )
- ->setQuery( 'language:%P1%', [ $sourceLanguage ] );
-
- $resultset = $this->client->select( $query );
-
- /* This query is doing two unrelated things:
- * 1) Collect the message contents and scores so that they can
- * be accessed later for the translations we found.
- * 2) Build the query string for the query that fetches the
- * translations.
- * This code is a bit uglier than I'd like it to be, since there
- * there is no field that globally identifies a message (message
- * definition and translations). */
- $contents = $scores = [];
- $queryString = '';
- foreach ( $resultset as $doc ) {
- $sourceId = preg_replace( '~/[^/]+$~', '', $doc->globalid );
- $contents[$sourceId] = $doc->content;
- $scores[$sourceId] = $doc->score;
-
- $globalid = $helper->escapePhrase( "$sourceId/$targetLanguage" );
- $queryString .= "globalid:$globalid ";
- }
-
- // Second query to fetch available translations
- $fetchQuery = $this->client->createSelect();
- $fetchQuery->setFields( [ 'wiki', 'uri', 'content', 'messageid', 'globalid' ] );
- // This come in random order, so have to fetch all and sort
- $fetchQuery->setRows( 25 );
- $fetchQuery->setQuery( $queryString );
- // With AND we would not find anything, obviously.
- $fetchQuery->setQueryDefaultOperator( Solarium_Query_Select::QUERY_OPERATOR_OR );
-
- $translations = $this->client->select( $fetchQuery );
-
- $suggestions = [];
- foreach ( $translations as $doc ) {
- /* Construct the matching source id */
- $sourceId = preg_replace( '~/[^/]+$~', '', $doc->globalid );
-
- /* Unfortunately we cannot do this on the search server,
- * because score is not a real field and thus cannot be
- * used in a filter query. */
- $quality = $scores[$sourceId];
- if ( $quality < $this->config['cutoff'] ) {
- continue;
- }
-
- $suggestions[] = [
- 'source' => $contents[$sourceId],
- 'target' => $doc->content,
- 'context' => $doc->messageid,
- 'quality' => $quality,
- 'wiki' => $doc->wiki,
- 'location' => $doc->messageid . '/' . $targetLanguage,
- 'uri' => $doc->uri,
- ];
- }
-
- /* Like mentioned above, we get results in random order. Sort them
- * now to have best matches first as expected by callers. */
- uasort( $suggestions, function ( $a, $b ) {
- if ( $a['quality'] === $b['quality'] ) {
- return 0;
- }
-
- return ( $a['quality'] < $b['quality'] ) ? 1 : -1;
- } );
-
- return $suggestions;
- }
-
- /* Write functions */
-
- public function update( MessageHandle $handle, $targetText ) {
- if ( $handle->getCode() === '' ) {
- return false;
- }
-
- /* There are various different cases here:
- * [new or updated] [fuzzy|non-fuzzy] [translation|definition]
- * 1) We don't distinguish between new or updated here.
- * 2) Delete old translation, but not definition
- * 3) Insert new translation or definition, if non-fuzzy
- * The definition should never be fuzzied anyway.
- *
- * These only apply to known messages.
- */
-
- $update = $this->client->createUpdate();
- $title = $handle->getTitle();
-
- $doDelete = true;
- $sourceLanguage = '';
- if ( $handle->isValid() ) {
- $sourceLanguage = $handle->getGroup()->getSourceLanguage();
- if ( $handle->getCode() === $sourceLanguage ) {
- $doDelete = false;
- }
- }
-
- if ( $doDelete ) {
- $base = Title::makeTitle( $title->getNamespace(), $handle->getKey() );
- $conds = [
- 'wiki' => wfWikiID(),
- 'language' => $handle->getCode(),
- 'messageid' => $base->getPrefixedText(),
- ];
- foreach ( $conds as $key => &$value ) {
- $value = "$key:" . $update->getHelper()->escapePhrase( $value );
- }
- $update->addDeleteQuery( implode( ' AND ', $conds ) );
- }
-
- if ( $targetText !== null ) {
- if ( $handle->isValid() ) {
- // Of the message definition page
- $targetTitle = $handle->getTitle();
- $sourceTitle = Title::makeTitle(
- $targetTitle->getNamespace(),
- $handle->getKey() . '/' . $sourceLanguage
- );
- $revId = (int)$sourceTitle->getLatestRevID();
- /* Note: in some cases the source page might not exist, in this case
- * we use 0 as message version identifier, to differentiate them from
- * orphan messages */
- } else {
- $revId = 'orphan';
- }
-
- $doc = $this->createDocument( $handle, $targetText, $revId );
- // Add document and commit within X seconds.
- $update->addDocument( $doc, null, self::COMMIT_WITHIN );
- }
-
- try {
- $this->client->update( $update );
- } catch ( Solarium_Exception $e ) {
- error_log( 'SolrTTMServer update-write failed' );
-
- return false;
- }
-
- return true;
- }
-
- /**
- * @see schema.xml
- * @param MessageHandle $handle
- * @param string $text
- * @param int $revId
- * @return Solarium_Document_ReadWrite
- */
- protected function createDocument( MessageHandle $handle, $text, $revId ) {
- $language = $handle->getCode();
- $translationTitle = $handle->getTitle();
-
- $title = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
- $wiki = wfWikiID();
- $messageid = $title->getPrefixedText();
- $globalid = "$wiki-$messageid-$revId/$language";
-
- $doc = new Solarium_Document_ReadWrite();
- $doc->wiki = $wiki;
- $doc->uri = $translationTitle->getCanonicalURL();
- $doc->messageid = $messageid;
- $doc->globalid = $globalid;
-
- $doc->language = $language;
- $doc->content = $text;
- $doc->setField( 'group', $handle->getGroupIds() );
-
- return $doc;
- }
-
- public function beginBootstrap() {
- $update = $this->client->createUpdate();
- $query = 'wiki:' . $update->getHelper()->escapePhrase( wfWikiID() );
- $update->addDeleteQuery( $query );
- $update->addCommit();
- $this->client->update( $update );
- }
-
- public function beginBatch() {
- // I hate the rule that forbids {}
- }
-
- public function batchInsertDefinitions( array $batch ) {
- $lb = new LinkBatch();
- foreach ( $batch as $data ) {
- $lb->addObj( $data[0]->getTitle() );
- }
- $lb->execute();
-
- $this->batchInsertTranslations( $batch );
- }
-
- public function batchInsertTranslations( array $batch ) {
- $update = $this->client->createUpdate();
- foreach ( $batch as $key => $data ) {
- list( $handle, $sourceLanguage, $text ) = $data;
- $revId = $handle->getTitleForLanguage( $sourceLanguage )->getLatestRevID();
- $doc = $this->createDocument( $handle, $text, $revId );
- // Add document and commit within X seconds.
- $update->addDocument( $doc, null, self::COMMIT_WITHIN );
- }
-
- $retries = 5;
-
- while ( $retries-- > 0 ) {
- try {
- $this->client->update( $update );
- break;
- } catch ( Solarium_Client_HttpException $e ) {
- if ( $retries === 0 ) {
- throw $e;
- } else {
- $c = get_class( $e );
- $msg = $e->getMessage();
- $this->logOutput( "Batch failed ($c: $msg), trying again in 10 seconds" );
- sleep( 10 );
- }
- }
- }
- }
-
- public function endBatch() {
- $update = $this->client->createUpdate();
- $this->client->update( $update );
- }
-
- public function endBootstrap() {
- $update = $this->client->createUpdate();
- $update->addCommit();
- $update->addOptimize();
- $this->client->update( $update );
- }
-
- public function getSolarium() {
- return $this->client;
- }
-
- public function setLogger( $logger ) {
- $this->logger = $logger;
- }
-
- // Can it get any uglier?
- protected function logOutput( $text ) {
- if ( $this->logger ) {
- $this->logger->statusLine( "$text\n" );
- }
- }
-
- /**
- * Search interface
- * @param string $queryString
- * @param array $opts
- * @param array $highlight
- * @return array
- */
- public function search( $queryString, $opts, $highlight ) {
- $client = $this->getSolarium();
-
- $query = $client->createSelect();
- $dismax = $query->getDisMax();
- $dismax->setQueryParser( 'edismax' );
- $query->setQuery( $queryString );
- $query->setRows( $opts['limit'] );
- $query->setStart( $opts['offset'] );
-
- list( $pre, $post ) = $highlight;
- $hl = $query->getHighlighting();
- $hl->setFields( 'text' );
- $hl->setSimplePrefix( $pre );
- $hl->setSimplePostfix( $post );
- $hl->setMaxAnalyzedChars( '5000' );
- $hl->setFragSize( '5000' );
- $hl->setSnippets( 1 );
-
- $languageFilter = $opts['language'];
- if ( $languageFilter !== '' ) {
- $query->createFilterQuery( 'languageFilter' )
- ->setQuery( 'language:%P1%', [ $languageFilter ] )
- ->addTag( 'filter' );
- }
-
- $groupFilter = $opts['group'];
- if ( $groupFilter !== '' ) {
- $query->createFilterQuery( 'groupFilter' )
- ->setQuery( 'group:%P1%', [ $groupFilter ] )
- ->addTag( 'filter' );
- }
-
- $facetSet = $query->getFacetSet();
-
- $language = $facetSet->createFacetField( 'language' );
- $language->setField( 'language' );
- $language->setMinCount( 1 );
- $language->addExclude( 'filter' );
-
- $group = $facetSet->createFacetField( 'group' );
- $group->setField( 'group' );
- $group->setMinCount( 1 );
- $group->setMissing( true );
- $group->addExclude( 'filter' );
-
- try {
- return $client->select( $query );
- } catch ( Solarium_Client_HttpException $e ) {
- throw new TTMServer( $e->getMessage() );
- }
- }
-
- public function getFacets( $resultset ) {
- return [
- 'language' => iterator_to_array( $resultset->getFacetSet()->getFacet( 'language' ) ),
- 'group' => iterator_to_array( $resultset->getFacetSet()->getFacet( 'group' ) ),
- ];
- }
-
- public function getTotalHits( $resultset ) {
- return $resultset->getNumFound();
- }
-
- public function getDocuments( $resultset ) {
- $highlighting = $resultset->getHighlighting();
- $ret = [];
- foreach ( $resultset as $document ) {
- $fields = iterator_to_array( $document );
- // Compatibility mapping
- $fields['localid'] = $fields['messageid'];
-
- $hdoc = $highlighting->getResult( $document->globalid );
- $text = $hdoc->getField( 'text' );
- if ( $text === [] ) {
- $text = $document->text;
- } else {
- $text = $text[0];
- }
-
- $fields['content'] = $text;
- $ret[] = $fields;
- }
-
- return $ret;
- }
-}