From OrganicDesign Wiki
<?php
/**
*
* File for Parser and related classes
*
* @addtogroup Parser
*/
/**
* PHP Parser - Processes wiki markup (which uses a more user-friendly
* syntax, such as "[[link]]" for making links), and provides a one-way
* transformation of that wiki markup it into XHTML output / markup
* (which in turn the browser understands, and can display).
*
* <pre>
* There are five main entry points into the Parser class:
* parse()
* produces HTML output
* preSaveTransform().
* produces altered wiki markup.
* preprocess()
* removes HTML comments and expands templates
* cleanSig()
* Cleans a signature before saving it to preferences
* extractSections()
* Extracts sections from an article for section editing
*
* Globals used:
* objects: $wgLang, $wgContLang
*
* NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
*
* settings:
* $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
* $wgNamespacesWithSubpages, $wgAllowExternalImages*,
* $wgLocaltimezone, $wgAllowSpecialInclusion*,
* $wgMaxArticleSize*
*
* * only within ParserOptions
* </pre>
*
* @addtogroup Parser
*/
class Parser
{
/**
* Update this version number when the ParserOutput format
* changes in an incompatible way, so the parser cache
* can automatically discard old data.
*/
const VERSION = '1.6.4';
# Flags for Parser::setFunctionHook
# Also available as global constants from Defines.php
const SFH_NO_HASH = 1;
const SFH_OBJECT_ARGS = 2;
# Constants needed for external link processing
# Everything except bracket, space, or control characters
const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
// State constants for the definition list colon extraction
const COLON_STATE_TEXT = 0;
const COLON_STATE_TAG = 1;
const COLON_STATE_TAGSTART = 2;
const COLON_STATE_CLOSETAG = 3;
const COLON_STATE_TAGSLASH = 4;
const COLON_STATE_COMMENT = 5;
const COLON_STATE_COMMENTDASH = 6;
const COLON_STATE_COMMENTDASHDASH = 7;
// Flags for preprocessToDom
const PTD_FOR_INCLUSION = 1;
// Allowed values for $this->mOutputType
// Parameter to startExternalParse().
const OT_HTML = 1;
const OT_WIKI = 2;
const OT_PREPROCESS = 3;
const OT_MSG = 3;
/**#@+
* @private
*/
# Persistent:
var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
$mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix, $mMarkerIndex,
$mExtLinkBracketedRegex, $mPreprocessor, $mDefaultStripList, $mVarCache, $mConf;
# Cleared with clearState():
var $mOutput, $mAutonumber, $mDTopen, $mStripState;
var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
var $mInterwikiLinkHolders, $mLinkHolders;
var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
var $mTplExpandCache; // empty-frame expansion cache
var $mTplRedirCache, $mTplDomCache, $mHeadings;
# Temporary
# These are variables reset at least once per parse regardless of $clearState
var $mOptions, // ParserOptions object
$mTitle, // Title context, used for self-link rendering and similar things
$mOutputType, // Output type, one of the OT_xxx constants
$ot, // Shortcut alias, see setOutputType()
$mRevisionId, // ID to display in 81155 tags
$mRevisionTimestamp, // The timestamp of the specified revision ID
$mRevIdForTs; // The revision ID which was used to fetch the timestamp
/**#@-*/
/**
* Constructor
*
* @public
*/
function __construct( $conf = array() ) {
$this->mConf = $conf;
$this->mTagHooks = array();
$this->mTransparentTagHooks = array();
$this->mFunctionHooks = array();
$this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
$this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' );
$this->mMarkerSuffix = "-QINU\x7f";
$this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
$this->mVarCache = array();
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
} else {
$this->mPreprocessorClass = 'Preprocessor_DOM';
}
$this->mMarkerIndex = 0;
$this->mFirstCall = true;
}
/**
* Do various kinds of initialisation on the first call of the parser
*/
function firstCallInit() {
if ( !$this->mFirstCall ) {
return;
}
$this->mFirstCall = false;
wfProfileIn( __METHOD__ );
global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions;
$this->setHook( 'pre', array( $this, 'renderPreTag' ) );
# Syntax for arguments (see self::setFunctionHook):
# "name for lookup in localized magic words array",
# function callback,
# optional SFH_NO_HASH to omit the hash from calls (e.g. {{int:...}
# instead of {{#int:...}})
$this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH );
$this->setFunctionHook( 'ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH );
$this->setFunctionHook( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH );
$this->setFunctionHook( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH );
$this->setFunctionHook( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH );
$this->setFunctionHook( 'lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH );
$this->setFunctionHook( 'uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH );
$this->setFunctionHook( 'localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH );
$this->setFunctionHook( 'localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH );
$this->setFunctionHook( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH );
$this->setFunctionHook( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH );
$this->setFunctionHook( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH );
$this->setFunctionHook( 'grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH );
$this->setFunctionHook( 'plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH );
$this->setFunctionHook( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH );
$this->setFunctionHook( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH );
$this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH );
$this->setFunctionHook( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH );
$this->setFunctionHook( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH );
$this->setFunctionHook( 'numberofedits', array( 'CoreParserFunctions', 'numberofedits' ), SFH_NO_HASH );
$this->setFunctionHook( 'language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH );
$this->setFunctionHook( 'padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH );
$this->setFunctionHook( 'padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH );
$this->setFunctionHook( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH );
$this->setFunctionHook( 'special', array( 'CoreParserFunctions', 'special' ) );
$this->setFunctionHook( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ), SFH_NO_HASH );
$this->setFunctionHook( 'filepath', array( 'CoreParserFunctions', 'filepath' ), SFH_NO_HASH );
$this->setFunctionHook( 'tag', array( 'CoreParserFunctions', 'tagObj' ), SFH_OBJECT_ARGS );
if ( $wgAllowDisplayTitle ) {
$this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH );
}
if ( $wgAllowSlowParserFunctions ) {
$this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH );
}
$this->initialiseVariables();
wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
wfProfileOut( __METHOD__ );
}
/**
* Clear Parser state
*
* @private
*/
function clearState() {
wfProfileIn( __METHOD__ );
if ( $this->mFirstCall ) {
$this->firstCallInit();
}
$this->mOutput = new ParserOutput;
$this->mAutonumber = 0;
$this->mLastSection = '';
$this->mDTopen = false;
$this->mIncludeCount = array();
$this->mStripState = new StripState;
$this->mArgStack = false;
$this->mInPre = false;
$this->mInterwikiLinkHolders = array(
'texts' => array(),
'titles' => array()
);
$this->mLinkHolders = array(
'namespaces' => array(),
'dbkeys' => array(),
'queries' => array(),
'texts' => array(),
'titles' => array()
);
$this->mRevisionTimestamp = $this->mRevisionId = null;
/**
* Prefix for temporary replacement strings for the multipass parser.
* \x07 should never appear in input as it's disallowed in XML.
* Using it at the front also gives us a little extra robustness
* since it shouldn't match when butted up against identifier-like
* string constructs.
*
* Must not consist of all title characters, or else it will change
* the behaviour of <nowiki> in a link.
*/
#$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
# Changed to \x7f to allow XML double-parsing -- TS
$this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString();
# Clear these on every parse, bug 4549
$this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
$this->mShowToc = true;
$this->mForceTocPosition = false;
$this->mIncludeSizes = array(
'post-expand' => 0,
'arg' => 0,
);
$this->mPPNodeCount = 0;
$this->mDefaultSort = false;
$this->mHeadings = array();
# Fix cloning
if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
$this->mPreprocessor = null;
}
wfRunHooks( 'ParserClearState', array( &$this ) );
wfProfileOut( __METHOD__ );
}
function setOutputType( $ot ) {
$this->mOutputType = $ot;
// Shortcut alias
$this->ot = array(
'html' => $ot == self::OT_HTML,
'wiki' => $ot == self::OT_WIKI,
'pre' => $ot == self::OT_PREPROCESS,
);
}
/**
* Set the context title
*/
function setTitle( $t ) {
if ( !$t || $t instanceof FakeTitle ) {
$t = Title::newFromText( 'NO TITLE' );
}
if ( strval( $t->getFragment() ) !== '' ) {
# Strip the fragment to avoid various odd effects
$this->mTitle = clone $t;
$this->mTitle->setFragment( '' );
} else {
$this->mTitle = $t;
}
}
/**
* Accessor for mUniqPrefix.
*
* @public
*/
function uniqPrefix() {
if( !isset( $this->mUniqPrefix ) ) {
// @fixme this is probably *horribly wrong*
// LanguageConverter seems to want $wgParser's uniqPrefix, however
// if this is called for a parser cache hit, the parser may not
// have ever been initialized in the first place.
// Not really sure what the heck is supposed to be going on here.
return '';
//throw new MWException( "Accessing uninitialized mUniqPrefix" );
}
return $this->mUniqPrefix;
}
/**
* Convert wikitext to HTML
* Do not call this function recursively.
*
* @param string $text Text we want to parse
* @param Title &$title A title object
* @param array $options
* @param boolean $linestart
* @param boolean $clearState
* @param int $revid number to pass in 81155
* @return ParserOutput a ParserOutput
*/
public function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) {
/**
* First pass--just handle <nowiki> sections, pass the rest off
* to internalParse() which does all the real work.
*/
global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;
$fname = 'Parser::parse-' . wfGetCaller();
wfProfileIn( __METHOD__ );
wfProfileIn( $fname );
if ( $clearState ) {
$this->clearState();
}
$this->mOptions = $options;
$this->setTitle( $title );
$oldRevisionId = $this->mRevisionId;
$oldRevisionTimestamp = $this->mRevisionTimestamp;
if( $revid !== null ) {
$this->mRevisionId = $revid;
$this->mRevisionTimestamp = null;
}
$this->setOutputType( self::OT_HTML );
wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
# No more strip!
wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
$text = $this->internalParse( $text );
$text = $this->mStripState->unstripGeneral( $text );
# Clean up special characters, only run once, next-to-last before doBlockLevels
$fixtags = array(
# french spaces, last one Guillemet-left
# only if there is something before the space
'/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2',
# french spaces, Guillemet-right
'/(\\302\\253) /' => '\\1 ',
);
$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
# only once and last
$text = $this->doBlockLevels( $text, $linestart );
$this->replaceLinkHolders( $text );
# the position of the parserConvert() call should not be changed. it
# assumes that the links are all replaced and the only thing left
# is the <nowiki> mark.
# Side-effects: this calls $this->mOutput->setTitleText()
$text = $wgContLang->parserConvert( $text, $this );
$text = $this->mStripState->unstripNoWiki( $text );
wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
//!JF Move to its own function
$uniq_prefix = $this->mUniqPrefix;
$matches = array();
$elements = array_keys( $this->mTransparentTagHooks );
$text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
foreach( $matches as $marker => $data ) {
list( $element, $content, $params, $tag ) = $data;
$tagName = strtolower( $element );
if( isset( $this->mTransparentTagHooks[$tagName] ) ) {
$output = call_user_func_array( $this->mTransparentTagHooks[$tagName],
array( $content, $params, $this ) );
} else {
$output = $tag;
}
$this->mStripState->general->setPair( $marker, $output );
}
$text = $this->mStripState->unstripGeneral( $text );
$text = Sanitizer::normalizeCharReferences( $text );
if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {
$text = Parser::tidy($text);
} else {
# attempt to sanitize at least some nesting problems
# (bug #2702 and quite a few others)
$tidyregs = array(
# ''Something [http://www.cool.com cool''] -->
# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
# fix up an anchor inside another anchor, only
# at least for a single single nested link (bug 3695)
'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
'\\1\\2</a>\\3</a>\\1\\4</a>',
# fix div inside inline elements- doBlockLevels won't wrap a line which
# contains a div, so fix it up here; replace
# div with escaped text
'/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
'\\1\\3<div\\5>\\6</div>\\8\\9',
# remove empty italic or bold tag pairs, some
# introduced by rules above
'/<([bi])><\/\\1>/' => '',
);
$text = preg_replace(
array_keys( $tidyregs ),
array_values( $tidyregs ),
$text );
}
wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
# Information on include size limits, for the benefit of users who try to skirt them
if ( $this->mOptions->getEnableLimitReport() ) {
$max = $this->mOptions->getMaxIncludeSize();
$limitReport =
"NewPP limit report\n" .
"Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
"Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
"Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n";
wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
$text .= "\n<!-- \n$limitReport-->\n";
}
$this->mOutput->setText( $text );
$this->mRevisionId = $oldRevisionId;
$this->mRevisionTimestamp = $oldRevisionTimestamp;
wfProfileOut( $fname );
wfProfileOut( __METHOD__ );
return $this->mOutput;
}
/**
* Recursive parser entry point that can be called from an extension tag
* hook.
*/
function recursiveTagParse( $text ) {
wfProfileIn( __METHOD__ );
wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
$text = $this->internalParse( $text );
wfProfileOut( __METHOD__ );
return $text;
}
/**
* Expand templates and variables in the text, producing valid, static wikitext.
* Also removes comments.
*/
function preprocess( $text, $title, $options, $revid = null ) {
wfProfileIn( __METHOD__ );
$this->clearState();
$this->setOutputType( self::OT_PREPROCESS );
$this->mOptions = $options;
$this->setTitle( $title );
if( $revid !== null ) {
$this->mRevisionId = $revid;
}
wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
$text = $this->replaceVariables( $text );
$text = $this->mStripState->unstripBoth( $text );
wfProfileOut( __METHOD__ );
return $text;
}
/**
* Get a random string
*
* @private
* @static
*/
function getRandomString() {
return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
}
function &getTitle() { return $this->mTitle; }
function getOptions() { return $this->mOptions; }
function getFunctionLang() {
global $wgLang, $wgContLang;
return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
}
/**
* Get a preprocessor object
*/
function getPreprocessor() {
if ( !isset( $this->mPreprocessor ) ) {
$class = $this->mPreprocessorClass;
$this->mPreprocessor = new $class( $this );
}
return $this->mPreprocessor;
}
/**
* Replaces all occurrences of HTML-style comments and the given tags
* in the text with a random marker and returns the next text. The output
* parameter $matches will be an associative array filled with data in
* the form:
* 'UNIQ-xxxxx' => array(
* 'element',
* 'tag content',
* array( 'param' => 'x' ),
* '<element param="x">tag content</element>' ) )
*
* @param $elements list of element names. Comments are always extracted.
* @param $text Source text string.
* @param $uniq_prefix
*
* @public
* @static
*/
function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
static $n = 1;
$stripped = '';
$matches = array();
$taglist = implode( '|', $elements );
$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
while ( '' != $text ) {
$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
$stripped .= $p[0];
if( count( $p ) < 5 ) {
break;
}
if( count( $p ) > 5 ) {
// comment
$element = $p[4];
$attributes = '';
$close = '';
$inside = $p[5];
} else {
// tag
$element = $p[1];
$attributes = $p[2];
$close = $p[3];
$inside = $p[4];
}
$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . $this->mMarkerSuffix;
$stripped .= $marker;
if ( $close === '/>' ) {
// Empty element tag, <tag />
$content = null;
$text = $inside;
$tail = null;
} else {
if( $element == '!--' ) {
$end = '/(-->)/';
} else {
$end = "/(<\\/$element\\s*>)/i";
}
$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
$content = $q[0];
if( count( $q ) < 3 ) {
# No end tag -- let it run out to the end of the text.
$tail = '';
$text = '';
} else {
$tail = $q[1];
$text = $q[2];
}
}
$matches[$marker] = array( $element,
$content,
Sanitizer::decodeTagAttributes( $attributes ),
"<$element$attributes$close$content$tail" );
}
return $stripped;
}
/**
* Get a list of strippable XML-like elements
*/
function getStripList() {
global $wgRawHtml;
$elements = $this->mStripList;
if( $wgRawHtml ) {
$elements[] = 'html';
}
if( $this->mOptions->getUseTeX() ) {
$elements[] = 'math';
}
return $elements;
}
/**
* @deprecated use replaceVariables
*/
function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) {
return $text;
}
/**
* Restores pre, math, and other extensions removed by strip()
*
* always call unstripNoWiki() after this one
* @private
* @deprecated use $this->mStripState->unstrip()
*/
function unstrip( $text, $state ) {
return $state->unstripGeneral( $text );
}
/**
* Always call this after unstrip() to preserve the order
*
* @private
* @deprecated use $this->mStripState->unstrip()
*/
function unstripNoWiki( $text, $state ) {
return $state->unstripNoWiki( $text );
}
/**
* @deprecated use $this->mStripState->unstripBoth()
*/
function unstripForHTML( $text ) {
return $this->mStripState->unstripBoth( $text );
}
/**
* Add an item to the strip state
* Returns the unique tag which must be inserted into the stripped text
* The tag will be replaced with the original text in unstrip()
*
* @private
*/
function insertStripItem( $text ) {
$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-{$this->mMarkerSuffix}";
$this->mMarkerIndex++;
$this->mStripState->general->setPair( $rnd, $text );
return $rnd;
}
/**
* Interface with html tidy, used if $wgUseTidy = true.
* If tidy isn't able to correct the markup, the original will be
* returned in all its glory with a warning comment appended.
*
* Either the external tidy program or the in-process tidy extension
* will be used depending on availability. Override the default
* $wgTidyInternal setting to disable the internal if it's not working.
*
* @param string $text Hideous HTML input
* @return string Corrected HTML output
* @public
* @static
*/
function tidy( $text ) {
global $wgTidyInternal;
$wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
'<head><title>test</title></head><body>'.$text.'</body></html>';
if( $wgTidyInternal ) {
$correctedtext = Parser::internalTidy( $wrappedtext );
} else {
$correctedtext = Parser::externalTidy( $wrappedtext );
}
if( is_null( $correctedtext ) ) {
wfDebug( "Tidy error detected!\n" );
return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
}
return $correctedtext;
}
/**
* Spawn an external HTML tidy process and get corrected markup back from it.
*
* @private
* @static
*/
function externalTidy( $text ) {
global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
$fname = 'Parser::externalTidy';
wfProfileIn( $fname );
$cleansource = '';
$opts = ' -utf8';
$descriptorspec = array(
0 => array('pipe', 'r'),
1 => array('pipe', 'w'),
2 => array('file', wfGetNull(), 'a')
);
$pipes = array();
$process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
if (is_resource($process)) {
// Theoretically, this style of communication could cause a deadlock
// here. If the stdout buffer fills up, then writes to stdin could
// block. This doesn't appear to happen with tidy, because tidy only
// writes to stdout after it's finished reading from stdin. Search
// for tidyParseStdin and tidySaveStdout in console/tidy.c
fwrite($pipes[0], $text);
fclose($pipes[0]);
while (!feof($pipes[1])) {
$cleansource .= fgets($pipes[1], 1024);
}
fclose($pipes[1]);
proc_close($process);
}
wfProfileOut( $fname );
if( $cleansource == '' && $text != '') {
// Some kind of error happened, so we couldn't get the corrected text.
// Just give up; we'll use the source text and append a warning.
return null;
} else {
return $cleansource;
}
}
/**
* Use the HTML tidy PECL extension to use the tidy library in-process,
* saving the overhead of spawning a new process.
*
* 'pear install tidy' should be able to compile the extension module.
*
* @private
* @static
*/
function internalTidy( $text ) {
global $wgTidyConf, $IP, $wgDebugTidy;
$fname = 'Parser::internalTidy';
wfProfileIn( $fname );
$tidy = new tidy;
$tidy->parseString( $text, $wgTidyConf, 'utf8' );
$tidy->cleanRepair();
if( $tidy->getStatus() == 2 ) {
// 2 is magic number for fatal error
// http://www.php.net/manual/en/function.tidy-get-status.php
$cleansource = null;
} else {
$cleansource = tidy_get_output( $tidy );
}
if ( $wgDebugTidy && $tidy->getStatus() > 0 ) {
$cleansource .= "<!--\nTidy reports:\n" .
str_replace( '-->', '-->', $tidy->errorBuffer ) .
"\n-->";
}
wfProfileOut( $fname );
return $cleansource;
}
/**
* parse the wiki syntax used to render tables
*
* @private
*/
function doTableStuff ( $text ) {
$fname = 'Parser::doTableStuff';
wfProfileIn( $fname );
$lines = explode ( "\n" , $text );
$td_history = array (); // Is currently a td tag open?
$last_tag_history = array (); // Save history of last lag activated (td, th or caption)
$tr_history = array (); // Is currently a tr tag open?
$tr_attributes = array (); // history of tr attributes
$has_opened_tr = array(); // Did this table open a <tr> element?
$indent_level = 0; // indent level of the table
foreach ( $lines as $key => $line )
{
$line = trim ( $line );
if( $line == '' ) { // empty line, go to next line
continue;
}
$first_character = $line{0};
$matches = array();
if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) {
// First check if we are starting a new table
$indent_level = strlen( $matches[1] );
$attributes = $this->mStripState->unstripBoth( $matches[2] );
$attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' );
$lines[$key] = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
array_push ( $td_history , false );
array_push ( $last_tag_history , '' );
array_push ( $tr_history , false );
array_push ( $tr_attributes , '' );
array_push ( $has_opened_tr , false );
} else if ( count ( $td_history ) == 0 ) {
// Don't do any of the following
continue;
} else if ( substr ( $line , 0 , 2 ) == '|}' ) {
// We are ending a table
$line = '</table>' . substr ( $line , 2 );
$last_tag = array_pop ( $last_tag_history );
if ( !array_pop ( $has_opened_tr ) ) {
$line = "<tr><td></td></tr>{$line}";
}
if ( array_pop ( $tr_history ) ) {
$line = "</tr>{$line}";
}
if