cat /opt/.deploy-meza/elastic-rebuild-all.sh
#!/bin/sh
if [ -z "$1" ]; then
do_wikis="*/"
else
do_wikis="$1"
fi
wiki_dir="/opt/htdocs/wikis"
cd "$wiki_dir"
for d in $do_wikis; do
if [ -z "$1" ]; then
wiki_id=${d%/}
else
wiki_id="$d"
fi
if [ ! -d "$wiki_dir/$wiki_id" ]; then
echo "\"$wiki_id\" not a valid wiki ID"
continue
fi
timestamp=$(date +"%F_%T")
out_log="/opt/data-meza/logs/search-index/$wiki_id.$timestamp.log"
echo "Rebuilding index for $wiki_id"
echo " Output log:"
echo " $out_log"
wiki_id="$wiki_id" bash "/opt/.deploy-meza/elastic-build-index.sh" > "$out_log" 2>&1
endtimestamp=$(date +"%F_%T")
# If the above command had a failing exit code
if [[ $? -ne 0 ]]; then
# FIXME #577 #681: add notification/warning system here
echo "elastic-build-index FAILED for \"$wiki_id\" at $endtimestamp"
#if the above command had a passing exit code (e.g. zero)
else
echo "elastic-build-index completed for \"$wiki_id\" at $endtimestamp"
fi
done
cat /opt/.deploy-meza/elastic-build-index.sh
echo "******* Generating elasticsearch index *******"
disable_search_file="/opt/.deploy-meza/public/wikis/$wiki_id/postLocalSettings.d/disable-search-update.php"
# disable search update in wiki-specific settings
echo -e "<?php\n\$wgDisableSearchUpdate = true;\n" > "$disable_search_file"
# Run script to generate elasticsearch index
cd "/opt/htdocs/mediawiki"
WIKI="$wiki_id" php "/opt/htdocs/mediawiki/extensions/CirrusSearch/maintenance/updateSearchIndexConfig.php" --startOver
# Remove search-update disable in wiki-specific settings
rm -f "$disable_search_file"
# Bootstrap the search index
#
# Note that this can take some time
# For large wikis read "Bootstrapping large wikis" in https://git.wikimedia.org/blob/mediawiki%2Fextensions%2FCirrusSearch.git/REL1_25/README
WIKI="$wiki_id" php "/opt/htdocs/mediawiki/extensions/CirrusSearch/maintenance/forceSearchIndex.php" --skipLinks --indexOnSkip
WIKI="$wiki_id" php "/opt/htdocs/mediawiki/extensions/CirrusSearch/maintenance/forceSearchIndex.php" --skipParse
echo "******* Elastic Search build index complete! *******"
examples of content not indexed
https://fswiki.qualitybox.us/wiki/en/Special:PrefixIndex?prefix=&namespace=3100
https://fswiki.qualitybox.us/wiki/en/GuidedResearch:Apples?action=cirrusdump
https://fswiki.qualitybox.us/wiki/en/GuidedResearch:Pears?action=cirrusdump
https://fswiki.qualitybox.us/wiki/en/Special:PrefixIndex?prefix=&namespace=3102
https://fswiki.qualitybox.us/wiki/en/AFOG:Contractors?action=cirrusdump
Error text
There are no reported errors, but looking at the indexing log, I get this:
[ wiki_en] Indexed 7 pages ending at 28517 at 8/second
[837a724cd8e9691cd1bedabc] [no req] MWException from line 333 of /opt/htdocs/mediawiki/includes/parser/ParserOutput.php: Bad parser output text.
Backtrace:
#0 [internal function]: ParserOutput->{closure}(array)
#1 /opt/htdocs/mediawiki/includes/parser/ParserOutput.php(344): preg_replace_callback(string, Closure, string)
#2 /opt/htdocs/mediawiki/includes/content/WikiTextStructure.php(152): ParserOutput->getText(array)
#3 /opt/htdocs/mediawiki/includes/content/WikiTextStructure.php(221): WikiTextStructure->extractWikitextParts()
#4 /opt/htdocs/mediawiki/includes/content/WikitextContentHandler.php(152): WikiTextStructure->getOpeningText()
#5 /opt/htdocs/mediawiki/extensions/CirrusSearch/includes/Updater.php(351): WikitextContentHandler->getDataForSearchIndex(WikiPage, ParserOutput, CirrusSearch)
#6 /opt/htdocs/mediawiki/extensions/CirrusSearch/includes/Updater.php(407): CirrusSearch\Updater::buildDocument(CirrusSearch, WikiPage, CirrusSearch\Connection, integer, integer, integer)
#7 /opt/htdocs/mediawiki/extensions/CirrusSearch/includes/Updater.php(205): CirrusSearch\Updater->buildDocumentsForPages(array, integer)
#8 /opt/htdocs/mediawiki/extensions/CirrusSearch/maintenance/forceSearchIndex.php(218): CirrusSearch\Updater->updatePages(array, integer)
#9 /opt/htdocs/mediawiki/maintenance/doMaintenance.php(94): CirrusSearch\ForceSearchIndex->execute()
#10 /opt/htdocs/mediawiki/extensions/CirrusSearch/maintenance/forceSearchIndex.php(680): require_once(string)
#11 {main}
[ wiki_en] Indexed 50 pages ending at 100 at 120/second
[snip]
[ wiki_en] Indexed 49 pages ending at 290919 at 134/second
[ wiki_en] Indexed 23 pages ending at 290942 at 134/second
Indexed a total of 191458 pages at 134/second
******* Elastic Search build index complete! *******
Notice how the count starts over, and it completes eventually without complaint. I don’t know how to find the article that is causing a parse problem.