Personal tools

Parser.php

From OrganicDesign Wiki

Jump to: navigation, search
<?php
 
/**
 *
 * File for Parser and related classes
 *
 * @addtogroup Parser
 */
 
 
/**
 * PHP Parser - Processes wiki markup (which uses a more user-friendly 
 * syntax, such as "[[link]]" for making links), and provides a one-way
 * transformation of that wiki markup it into XHTML output / markup
 * (which in turn the browser understands, and can display).
 *
 * <pre>
 * There are five main entry points into the Parser class:
 * parse()
 *   produces HTML output
 * preSaveTransform().
 *   produces altered wiki markup.
 * preprocess()
 *   removes HTML comments and expands templates
 * cleanSig()
 *   Cleans a signature before saving it to preferences
 * extractSections()
 *   Extracts sections from an article for section editing
 *
 * Globals used:
 *    objects:   $wgLang, $wgContLang
 *
 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
 *
 * settings:
 *  $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
 *  $wgNamespacesWithSubpages, $wgAllowExternalImages*,
 *  $wgLocaltimezone, $wgAllowSpecialInclusion*,
 *  $wgMaxArticleSize*
 *
 *  * only within ParserOptions
 * </pre>
 *
 * @addtogroup Parser
 */
class Parser
{
	/**
	 * Update this version number when the ParserOutput format
	 * changes in an incompatible way, so the parser cache
	 * can automatically discard old data.
	 */
	const VERSION = '1.6.4';
 
	# Flags for Parser::setFunctionHook
	# Also available as global constants from Defines.php
	const SFH_NO_HASH = 1;
	const SFH_OBJECT_ARGS = 2;
 
	# Constants needed for external link processing
	# Everything except bracket, space, or control characters
	const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
	const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
		\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
 
	// State constants for the definition list colon extraction
	const COLON_STATE_TEXT = 0;
	const COLON_STATE_TAG = 1;
	const COLON_STATE_TAGSTART = 2;
	const COLON_STATE_CLOSETAG = 3;
	const COLON_STATE_TAGSLASH = 4;
	const COLON_STATE_COMMENT = 5;
	const COLON_STATE_COMMENTDASH = 6;
	const COLON_STATE_COMMENTDASHDASH = 7;
 
	// Flags for preprocessToDom
	const PTD_FOR_INCLUSION = 1;
 
	// Allowed values for $this->mOutputType
	// Parameter to startExternalParse().
	const OT_HTML = 1;
	const OT_WIKI = 2;
	const OT_PREPROCESS = 3;
	const OT_MSG = 3;
 
	/**#@+
	 * @private
	 */
	# Persistent:
	var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
		$mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix, $mMarkerIndex,
		$mExtLinkBracketedRegex, $mPreprocessor, $mDefaultStripList, $mVarCache, $mConf;
 
 
	# Cleared with clearState():
	var $mOutput, $mAutonumber, $mDTopen, $mStripState;
	var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
	var $mInterwikiLinkHolders, $mLinkHolders;
	var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
	var $mTplExpandCache; // empty-frame expansion cache
	var $mTplRedirCache, $mTplDomCache, $mHeadings;
 
	# Temporary
	# These are variables reset at least once per parse regardless of $clearState
	var $mOptions,      // ParserOptions object
		$mTitle,        // Title context, used for self-link rendering and similar things
		$mOutputType,   // Output type, one of the OT_xxx constants
		$ot,            // Shortcut alias, see setOutputType()
		$mRevisionId,   // ID to display in 81155 tags
		$mRevisionTimestamp, // The timestamp of the specified revision ID
		$mRevIdForTs;   // The revision ID which was used to fetch the timestamp
 
	/**#@-*/
 
	/**
	 * Constructor
	 *
	 * @public
	 */
	function __construct( $conf = array() ) {
		$this->mConf = $conf;
		$this->mTagHooks = array();
		$this->mTransparentTagHooks = array();
		$this->mFunctionHooks = array();
		$this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
		$this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' );
		$this->mMarkerSuffix = "-QINU\x7f";
		$this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
			'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
		$this->mVarCache = array();
		if ( isset( $conf['preprocessorClass'] ) ) {
			$this->mPreprocessorClass = $conf['preprocessorClass'];
		} else {
			$this->mPreprocessorClass = 'Preprocessor_DOM';
		}
		$this->mMarkerIndex = 0;
		$this->mFirstCall = true;
	}
 
	/**
	 * Do various kinds of initialisation on the first call of the parser
	 */
	function firstCallInit() {
		if ( !$this->mFirstCall ) {
			return;
		}
		$this->mFirstCall = false;
 
		wfProfileIn( __METHOD__ );
		global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions;
 
		$this->setHook( 'pre', array( $this, 'renderPreTag' ) );
 
		# Syntax for arguments (see self::setFunctionHook):
		#  "name for lookup in localized magic words array",
		#  function callback,
		#  optional SFH_NO_HASH to omit the hash from calls (e.g. {{int:...}
		#    instead of {{#int:...}})
		$this->setFunctionHook( 'int',              array( 'CoreParserFunctions', 'intFunction'      ), SFH_NO_HASH );
		$this->setFunctionHook( 'ns',               array( 'CoreParserFunctions', 'ns'               ), SFH_NO_HASH );
		$this->setFunctionHook( 'urlencode',        array( 'CoreParserFunctions', 'urlencode'        ), SFH_NO_HASH );
		$this->setFunctionHook( 'lcfirst',          array( 'CoreParserFunctions', 'lcfirst'          ), SFH_NO_HASH );
		$this->setFunctionHook( 'ucfirst',          array( 'CoreParserFunctions', 'ucfirst'          ), SFH_NO_HASH );
		$this->setFunctionHook( 'lc',               array( 'CoreParserFunctions', 'lc'               ), SFH_NO_HASH );
		$this->setFunctionHook( 'uc',               array( 'CoreParserFunctions', 'uc'               ), SFH_NO_HASH );
		$this->setFunctionHook( 'localurl',         array( 'CoreParserFunctions', 'localurl'         ), SFH_NO_HASH );
		$this->setFunctionHook( 'localurle',        array( 'CoreParserFunctions', 'localurle'        ), SFH_NO_HASH );
		$this->setFunctionHook( 'fullurl',          array( 'CoreParserFunctions', 'fullurl'          ), SFH_NO_HASH );
		$this->setFunctionHook( 'fullurle',         array( 'CoreParserFunctions', 'fullurle'         ), SFH_NO_HASH );
		$this->setFunctionHook( 'formatnum',        array( 'CoreParserFunctions', 'formatnum'        ), SFH_NO_HASH );
		$this->setFunctionHook( 'grammar',          array( 'CoreParserFunctions', 'grammar'          ), SFH_NO_HASH );
		$this->setFunctionHook( 'plural',           array( 'CoreParserFunctions', 'plural'           ), SFH_NO_HASH );
		$this->setFunctionHook( 'numberofpages',    array( 'CoreParserFunctions', 'numberofpages'    ), SFH_NO_HASH );
		$this->setFunctionHook( 'numberofusers',    array( 'CoreParserFunctions', 'numberofusers'    ), SFH_NO_HASH );
		$this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH );
		$this->setFunctionHook( 'numberoffiles',    array( 'CoreParserFunctions', 'numberoffiles'    ), SFH_NO_HASH );
		$this->setFunctionHook( 'numberofadmins',   array( 'CoreParserFunctions', 'numberofadmins'   ), SFH_NO_HASH );
		$this->setFunctionHook( 'numberofedits',    array( 'CoreParserFunctions', 'numberofedits'    ), SFH_NO_HASH );
		$this->setFunctionHook( 'language',         array( 'CoreParserFunctions', 'language'         ), SFH_NO_HASH );
		$this->setFunctionHook( 'padleft',          array( 'CoreParserFunctions', 'padleft'          ), SFH_NO_HASH );
		$this->setFunctionHook( 'padright',         array( 'CoreParserFunctions', 'padright'         ), SFH_NO_HASH );
		$this->setFunctionHook( 'anchorencode',     array( 'CoreParserFunctions', 'anchorencode'     ), SFH_NO_HASH );
		$this->setFunctionHook( 'special',          array( 'CoreParserFunctions', 'special'          ) );
		$this->setFunctionHook( 'defaultsort',      array( 'CoreParserFunctions', 'defaultsort'      ), SFH_NO_HASH );
		$this->setFunctionHook( 'filepath',         array( 'CoreParserFunctions', 'filepath'         ), SFH_NO_HASH );
		$this->setFunctionHook( 'tag',              array( 'CoreParserFunctions', 'tagObj'           ), SFH_OBJECT_ARGS );
 
		if ( $wgAllowDisplayTitle ) {
			$this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH );
		}
		if ( $wgAllowSlowParserFunctions ) {
			$this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH );
		}
 
		$this->initialiseVariables();
 
		wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
		wfProfileOut( __METHOD__ );
	}
 
	/**
	 * Clear Parser state
	 *
	 * @private
	 */
	function clearState() {
		wfProfileIn( __METHOD__ );
		if ( $this->mFirstCall ) {
			$this->firstCallInit();
		}
		$this->mOutput = new ParserOutput;
		$this->mAutonumber = 0;
		$this->mLastSection = '';
		$this->mDTopen = false;
		$this->mIncludeCount = array();
		$this->mStripState = new StripState;
		$this->mArgStack = false;
		$this->mInPre = false;
		$this->mInterwikiLinkHolders = array(
			'texts' => array(),
			'titles' => array()
		);
		$this->mLinkHolders = array(
			'namespaces' => array(),
			'dbkeys' => array(),
			'queries' => array(),
			'texts' => array(),
			'titles' => array()
		);
		$this->mRevisionTimestamp = $this->mRevisionId = null;
 
		/**
		 * Prefix for temporary replacement strings for the multipass parser.
		 * \x07 should never appear in input as it's disallowed in XML.
		 * Using it at the front also gives us a little extra robustness
		 * since it shouldn't match when butted up against identifier-like
		 * string constructs.
		 *
		 * Must not consist of all title characters, or else it will change 
		 * the behaviour of <nowiki> in a link.
		 */
		#$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
		# Changed to \x7f to allow XML double-parsing -- TS
		$this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString();
 
 
		# Clear these on every parse, bug 4549
		$this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
 
		$this->mShowToc = true;
		$this->mForceTocPosition = false;
		$this->mIncludeSizes = array(
			'post-expand' => 0,
			'arg' => 0,
		);
		$this->mPPNodeCount = 0;
		$this->mDefaultSort = false;
		$this->mHeadings = array();
 
		# Fix cloning
		if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
			$this->mPreprocessor = null;
		}
 
		wfRunHooks( 'ParserClearState', array( &$this ) );
		wfProfileOut( __METHOD__ );
	}
 
	function setOutputType( $ot ) {
		$this->mOutputType = $ot;
		// Shortcut alias
		$this->ot = array(
			'html' => $ot == self::OT_HTML,
			'wiki' => $ot == self::OT_WIKI,
			'pre' => $ot == self::OT_PREPROCESS,
		);
	}
 
	/**
	 * Set the context title
	 */
	function setTitle( $t ) {
		if ( !$t || $t instanceof FakeTitle ) {
			$t = Title::newFromText( 'NO TITLE' );
		}
		if ( strval( $t->getFragment() ) !== '' ) {
			# Strip the fragment to avoid various odd effects
			$this->mTitle = clone $t;
			$this->mTitle->setFragment( '' );
		} else {
			$this->mTitle = $t;
		}
	}
 
	/**
	 * Accessor for mUniqPrefix.
	 *
	 * @public
	 */
	function uniqPrefix() {
		if( !isset( $this->mUniqPrefix ) ) {
			// @fixme this is probably *horribly wrong*
			// LanguageConverter seems to want $wgParser's uniqPrefix, however
			// if this is called for a parser cache hit, the parser may not
			// have ever been initialized in the first place.
			// Not really sure what the heck is supposed to be going on here.
			return '';
			//throw new MWException( "Accessing uninitialized mUniqPrefix" );
		}
		return $this->mUniqPrefix;
	}
 
	/**
	 * Convert wikitext to HTML
	 * Do not call this function recursively.
	 *
	 * @param string $text Text we want to parse
	 * @param Title &$title A title object
	 * @param array $options
	 * @param boolean $linestart
	 * @param boolean $clearState
	 * @param int $revid number to pass in 81155
	 * @return ParserOutput a ParserOutput
	 */
	public function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) {
		/**
		 * First pass--just handle <nowiki> sections, pass the rest off
		 * to internalParse() which does all the real work.
		 */
 
		global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;
		$fname = 'Parser::parse-' . wfGetCaller();
		wfProfileIn( __METHOD__ );
		wfProfileIn( $fname );
 
		if ( $clearState ) {
			$this->clearState();
		}
 
		$this->mOptions = $options;
		$this->setTitle( $title );
		$oldRevisionId = $this->mRevisionId;
		$oldRevisionTimestamp = $this->mRevisionTimestamp;
		if( $revid !== null ) {
			$this->mRevisionId = $revid;
			$this->mRevisionTimestamp = null;
		}
		$this->setOutputType( self::OT_HTML );
		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
		# No more strip!
		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
		$text = $this->internalParse( $text );
		$text = $this->mStripState->unstripGeneral( $text );
 
		# Clean up special characters, only run once, next-to-last before doBlockLevels
		$fixtags = array(
			# french spaces, last one Guillemet-left
			# only if there is something before the space
			'/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&nbsp;\\2',
			# french spaces, Guillemet-right
			'/(\\302\\253) /' => '\\1&nbsp;',
		);
		$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 
		# only once and last
		$text = $this->doBlockLevels( $text, $linestart );
 
		$this->replaceLinkHolders( $text );
 
		# the position of the parserConvert() call should not be changed. it
		# assumes that the links are all replaced and the only thing left
		# is the <nowiki> mark.
		# Side-effects: this calls $this->mOutput->setTitleText()
		$text = $wgContLang->parserConvert( $text, $this );
 
		$text = $this->mStripState->unstripNoWiki( $text );
 
		wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
 
//!JF Move to its own function
 
		$uniq_prefix = $this->mUniqPrefix;
		$matches = array();
		$elements = array_keys( $this->mTransparentTagHooks );
		$text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
 
		foreach( $matches as $marker => $data ) {
			list( $element, $content, $params, $tag ) = $data;
			$tagName = strtolower( $element );
			if( isset( $this->mTransparentTagHooks[$tagName] ) ) {
				$output = call_user_func_array( $this->mTransparentTagHooks[$tagName],
					array( $content, $params, $this ) );
			} else {
				$output = $tag;
			}
			$this->mStripState->general->setPair( $marker, $output );
		}
		$text = $this->mStripState->unstripGeneral( $text );
 
		$text = Sanitizer::normalizeCharReferences( $text );
 
		if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {
			$text = Parser::tidy($text);
		} else {
			# attempt to sanitize at least some nesting problems
			# (bug #2702 and quite a few others)
			$tidyregs = array(
				# ''Something [http://www.cool.com cool''] -->
				# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
				'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
				'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
				# fix up an anchor inside another anchor, only
				# at least for a single single nested link (bug 3695)
				'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
				'\\1\\2</a>\\3</a>\\1\\4</a>',
				# fix div inside inline elements- doBlockLevels won't wrap a line which
				# contains a div, so fix it up here; replace
				# div with escaped text
				'/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
				'\\1\\3<div\\5>\\6</div>\\8\\9',
				# remove empty italic or bold tag pairs, some
				# introduced by rules above
				'/<([bi])><\/\\1>/' => '',
			);
 
			$text = preg_replace(
				array_keys( $tidyregs ),
				array_values( $tidyregs ),
				$text );
		}
 
		wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
 
		# Information on include size limits, for the benefit of users who try to skirt them
		if ( $this->mOptions->getEnableLimitReport() ) {
			$max = $this->mOptions->getMaxIncludeSize();
			$limitReport = 
				"NewPP limit report\n" . 
				"Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
				"Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
				"Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n";
			wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
			$text .= "\n<!-- \n$limitReport-->\n";
		}
		$this->mOutput->setText( $text );
		$this->mRevisionId = $oldRevisionId;
		$this->mRevisionTimestamp = $oldRevisionTimestamp;
		wfProfileOut( $fname );
		wfProfileOut( __METHOD__ );
 
		return $this->mOutput;
	}
 
	/**
	 * Recursive parser entry point that can be called from an extension tag
	 * hook.
	 */
	function recursiveTagParse( $text ) {
		wfProfileIn( __METHOD__ );
		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
		$text = $this->internalParse( $text );
		wfProfileOut( __METHOD__ );
		return $text;
	}
 
	/**
	 * Expand templates and variables in the text, producing valid, static wikitext.
	 * Also removes comments.
	 */
	function preprocess( $text, $title, $options, $revid = null ) {
		wfProfileIn( __METHOD__ );
		$this->clearState();
		$this->setOutputType( self::OT_PREPROCESS );
		$this->mOptions = $options;
		$this->setTitle( $title );
		if( $revid !== null ) {
			$this->mRevisionId = $revid;
		}
		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
		$text = $this->replaceVariables( $text );
		$text = $this->mStripState->unstripBoth( $text );
		wfProfileOut( __METHOD__ );
		return $text;
	}
 
	/**
	 * Get a random string
	 *
	 * @private
	 * @static
	 */
	function getRandomString() {
		return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
	}
 
	function &getTitle() { return $this->mTitle; }
	function getOptions() { return $this->mOptions; }
 
	function getFunctionLang() {
		global $wgLang, $wgContLang;
		return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
	}
 
	/**
	 * Get a preprocessor object
	 */
	function getPreprocessor() {
		if ( !isset( $this->mPreprocessor ) ) {
			$class = $this->mPreprocessorClass;
			$this->mPreprocessor = new $class( $this );
		}
		return $this->mPreprocessor;
	}
 
	/**
	 * Replaces all occurrences of HTML-style comments and the given tags
	 * in the text with a random marker and returns the next text. The output
	 * parameter $matches will be an associative array filled with data in
	 * the form:
	 *   'UNIQ-xxxxx' => array(
	 *     'element',
	 *     'tag content',
	 *     array( 'param' => 'x' ),
	 *     '<element param="x">tag content</element>' ) )
	 *
	 * @param $elements list of element names. Comments are always extracted.
	 * @param $text Source text string.
	 * @param $uniq_prefix
	 *
	 * @public
	 * @static
	 */
	function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
		static $n = 1;
		$stripped = '';
		$matches = array();
 
		$taglist = implode( '|', $elements );
		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
 
		while ( '' != $text ) {
			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
			$stripped .= $p[0];
			if( count( $p ) < 5 ) {
				break;
			}
			if( count( $p ) > 5 ) {
				// comment
				$element    = $p[4];
				$attributes = '';
				$close      = '';
				$inside     = $p[5];
			} else {
				// tag
				$element    = $p[1];
				$attributes = $p[2];
				$close      = $p[3];
				$inside     = $p[4];
			}
 
			$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . $this->mMarkerSuffix;
			$stripped .= $marker;
 
			if ( $close === '/>' ) {
				// Empty element tag, <tag />
				$content = null;
				$text = $inside;
				$tail = null;
			} else {
				if( $element == '!--' ) {
					$end = '/(-->)/';
				} else {
					$end = "/(<\\/$element\\s*>)/i";
				}
				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
				$content = $q[0];
				if( count( $q ) < 3 ) {
					# No end tag -- let it run out to the end of the text.
					$tail = '';
					$text = '';
				} else {
					$tail = $q[1];
					$text = $q[2];
				}
			}
 
			$matches[$marker] = array( $element,
				$content,
				Sanitizer::decodeTagAttributes( $attributes ),
				"<$element$attributes$close$content$tail" );
		}
		return $stripped;
	}
 
	/**
	 * Get a list of strippable XML-like elements
	 */
	function getStripList() {
		global $wgRawHtml;
		$elements = $this->mStripList;
		if( $wgRawHtml ) {
			$elements[] = 'html';
		}
		if( $this->mOptions->getUseTeX() ) {
			$elements[] = 'math';
		}
		return $elements;
	}
 
	/**
	 * @deprecated use replaceVariables
	 */
	function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) {
		return $text;
	}
 
	/**
	 * Restores pre, math, and other extensions removed by strip()
	 *
	 * always call unstripNoWiki() after this one
	 * @private
	 * @deprecated use $this->mStripState->unstrip()
	 */
	function unstrip( $text, $state ) {
		return $state->unstripGeneral( $text );
	}
 
	/**
	 * Always call this after unstrip() to preserve the order
	 *
	 * @private
	 * @deprecated use $this->mStripState->unstrip()
	 */
	function unstripNoWiki( $text, $state ) {
		return $state->unstripNoWiki( $text );
	}
 
	/**
	 * @deprecated use $this->mStripState->unstripBoth()
	 */
	function unstripForHTML( $text ) {
		return $this->mStripState->unstripBoth( $text );
	}
 
	/**
	 * Add an item to the strip state
	 * Returns the unique tag which must be inserted into the stripped text
	 * The tag will be replaced with the original text in unstrip()
	 *
	 * @private
	 */
	function insertStripItem( $text ) {
		$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-{$this->mMarkerSuffix}";
		$this->mMarkerIndex++;
		$this->mStripState->general->setPair( $rnd, $text );
		return $rnd;
	}
 
	/**
	 * Interface with html tidy, used if $wgUseTidy = true.
	 * If tidy isn't able to correct the markup, the original will be
	 * returned in all its glory with a warning comment appended.
	 *
	 * Either the external tidy program or the in-process tidy extension
	 * will be used depending on availability. Override the default
	 * $wgTidyInternal setting to disable the internal if it's not working.
	 *
	 * @param string $text Hideous HTML input
	 * @return string Corrected HTML output
	 * @public
	 * @static
	 */
	function tidy( $text ) {
		global $wgTidyInternal;
		$wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
'<head><title>test</title></head><body>'.$text.'</body></html>';
		if( $wgTidyInternal ) {
			$correctedtext = Parser::internalTidy( $wrappedtext );
		} else {
			$correctedtext = Parser::externalTidy( $wrappedtext );
		}
		if( is_null( $correctedtext ) ) {
			wfDebug( "Tidy error detected!\n" );
			return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
		}
		return $correctedtext;
	}
 
	/**
	 * Spawn an external HTML tidy process and get corrected markup back from it.
	 *
	 * @private
	 * @static
	 */
	function externalTidy( $text ) {
		global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
		$fname = 'Parser::externalTidy';
		wfProfileIn( $fname );
 
		$cleansource = '';
		$opts = ' -utf8';
 
		$descriptorspec = array(
			0 => array('pipe', 'r'),
			1 => array('pipe', 'w'),
			2 => array('file', wfGetNull(), 'a')
		);
		$pipes = array();
		$process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
		if (is_resource($process)) {
			// Theoretically, this style of communication could cause a deadlock
			// here. If the stdout buffer fills up, then writes to stdin could
			// block. This doesn't appear to happen with tidy, because tidy only
			// writes to stdout after it's finished reading from stdin. Search
			// for tidyParseStdin and tidySaveStdout in console/tidy.c
			fwrite($pipes[0], $text);
			fclose($pipes[0]);
			while (!feof($pipes[1])) {
				$cleansource .= fgets($pipes[1], 1024);
			}
			fclose($pipes[1]);
			proc_close($process);
		}
 
		wfProfileOut( $fname );
 
		if( $cleansource == '' && $text != '') {
			// Some kind of error happened, so we couldn't get the corrected text.
			// Just give up; we'll use the source text and append a warning.
			return null;
		} else {
			return $cleansource;
		}
	}
 
	/**
	 * Use the HTML tidy PECL extension to use the tidy library in-process,
	 * saving the overhead of spawning a new process. 
	 *
	 * 'pear install tidy' should be able to compile the extension module.
	 *
	 * @private
	 * @static
	 */
	function internalTidy( $text ) {
		global $wgTidyConf, $IP, $wgDebugTidy;
		$fname = 'Parser::internalTidy';
		wfProfileIn( $fname );
 
		$tidy = new tidy;
		$tidy->parseString( $text, $wgTidyConf, 'utf8' );
		$tidy->cleanRepair();
		if( $tidy->getStatus() == 2 ) {
			// 2 is magic number for fatal error
			// http://www.php.net/manual/en/function.tidy-get-status.php
			$cleansource = null;
		} else {
			$cleansource = tidy_get_output( $tidy );
		}
		if ( $wgDebugTidy && $tidy->getStatus() > 0 ) {
			$cleansource .= "<!--\nTidy reports:\n" . 
				str_replace( '-->', '-->', $tidy->errorBuffer ) . 
				"\n-->";
		}
 
		wfProfileOut( $fname );
		return $cleansource;
	}
 
	/**
	 * parse the wiki syntax used to render tables
	 *
	 * @private
	 */
	function doTableStuff ( $text ) {
		$fname = 'Parser::doTableStuff';
		wfProfileIn( $fname );
 
		$lines = explode ( "\n" , $text );
		$td_history = array (); // Is currently a td tag open?
		$last_tag_history = array (); // Save history of last lag activated (td, th or caption)
		$tr_history = array (); // Is currently a tr tag open?
		$tr_attributes = array (); // history of tr attributes
		$has_opened_tr = array(); // Did this table open a <tr> element?
		$indent_level = 0; // indent level of the table
		foreach ( $lines as $key => $line )
		{
			$line = trim ( $line );
 
			if( $line == '' ) { // empty line, go to next line
				continue;
			}
			$first_character = $line{0};
			$matches = array();
 
			if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) {
				// First check if we are starting a new table
				$indent_level = strlen( $matches[1] );
 
				$attributes = $this->mStripState->unstripBoth( $matches[2] );
				$attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' );
 
				$lines[$key] = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
				array_push ( $td_history , false );
				array_push ( $last_tag_history , '' );
				array_push ( $tr_history , false );
				array_push ( $tr_attributes , '' );
				array_push ( $has_opened_tr , false );
			} else if ( count ( $td_history ) == 0 ) {
				// Don't do any of the following
				continue;
			} else if ( substr ( $line , 0 , 2 ) == '|}' ) { 
				// We are ending a table
				$line = '</table>' . substr ( $line , 2 );
				$last_tag = array_pop ( $last_tag_history );
 
				if ( !array_pop ( $has_opened_tr ) ) {
					$line = "<tr><td></td></tr>{$line}";
				}
 
				if ( array_pop ( $tr_history ) ) {
					$line = "</tr>{$line}";
				}
 
				if