All pastes #350352 Raw Edit

proposed fix for mediawiki bug 6790

public text v1 · immutable
#350352 ·published 2007-02-11 11:30 UTC
rendered paste body
--- includes/EditPage.php.orig	2007-02-05 02:14:58.000000000 +0300
+++ includes/EditPage.php
@@ -936,6 +936,9 @@ class EditPage {
 			if ( !$this->checkUnicodeCompliantBrowser() ) {
 				$wgOut->addWikiText( wfMsg( 'nonunicodebrowser') );
 			}
+			/* if ( !$this->checkNbspCompliantBrowser() ) {
+				$wgOut->addWikiText( wfMsg( 'nonnbspbrowser') );
+			} */
 			if ( isset( $this->mArticle )
 			     && isset( $this->mArticle->mRevision )
 			     && !$this->mArticle->mRevision->isCurrent() ) {
@@ -1191,7 +1194,7 @@ class EditPage {
 			$buttons['live'] = '';
 		}
 
-		$safemodehtml = $this->checkUnicodeCompliantBrowser()
+		$safemodehtml = ($this->checkUnicodeCompliantBrowser() && $this->checkNbspCompliantBrowser())
 			? ""
 			: "<input type='hidden' name=\"safemode\" value='1' />\n";
 
@@ -1605,6 +1608,28 @@ END
 	}
 
 	/**
+	 * Check if the browser is on a blacklist of user-agents known to
+	 * mangle non-breakable spaces on form submission. Returns true if non-breakable spaces
+	 * should make it through, false if it's known to be a problem.
+	 * @return bool
+	 * @private
+	 */
+	function checkNbspCompliantBrowser() {
+		global $wgNbspBrowserBlackList;
+		if( empty( $_SERVER["HTTP_USER_AGENT"] ) ) {
+			// No User-Agent header sent? Trust it by default...
+			return true;
+		}
+		$currentbrowser = $_SERVER["HTTP_USER_AGENT"];
+		foreach ( $wgNbspBrowserBlackList as $browser ) {
+			if ( preg_match($browser, $currentbrowser) ) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	/**
 	 * Format an anchor fragment as it would appear for a given section name
 	 * @param string $text
 	 * @return string
@@ -1834,6 +1859,22 @@ END
 	}
 
 	/**
+	 * Filter an output field through a non-breakable space armoring process if it is
+	 * going to an old browser with known broken non-breakable space editing issues.
+	 *
+	 * @param string $text
+	 * @return string
+	 * @private
+	 */
+	function safeNbspOutput( $text ) {
+		global $wgContLang;
+		$codedText = $wgContLang->recodeForEdit( $text );
+		return $this->checkNbspCompliantBrowser()
+			? $codedText
+			: $this->makenbspsafe( $codedText );
+	}
+
+	/**
 	 * A number of web browsers are known to corrupt non-ASCII characters
 	 * in a UTF-8 text editing environment. To protect against this,
 	 * detected browsers will be served an armored version of the text,
@@ -1880,6 +1921,24 @@ END
 	}
 
 	/**
+	 * A number of web browsers are known to replace non-breakable spaces
+	 * with regular spaces on form submission. To protect against this,
+	 * detected browsers will be served an armored version of the text,
+	 * with non-breakable spaces converted to numeric HTML character references.
+	 *
+	 * Preexisting such character references will have a 0 added to them
+	 * to ensure that round-trips do not alter the original data.
+	 *
+	 * @param string $invalue
+	 * @return string
+	 * @private
+	 */
+	function makenbspsafe( $invalue ) {
+		$invalue = strtr( $invalue, array( "&#x" => "&#x0", chr(160) => "&#xA0;" ) );
+		return $result;
+	}
+
+	/**
 	 * Reverse the previously applied transliteration of non-ASCII characters
 	 * back to UTF-8. Used to protect data from corruption by broken web browsers
 	 * as listed in $wgBrowserBlackList.
--- includes/DefaultSettings.php.orig	2007-02-05 02:14:59.000000000 +0300
+++ includes/DefaultSettings.php
@@ -1903,6 +1903,18 @@ $wgBrowserBlackList = array(
 );
 
 /**
+ * Browser Blacklist for browsers that mangle non-breakable space
+ * Contains a list of regexps : "/regexp/"  matching problematic browsers
+ */
+$wgNbspBrowserBlackList = array(
+	/**
+	 * All browsers based on pre-1.9 Gecko engine are broken -- Mozilla bug 218277.
+	 * Not sure how to check for 1.9, though.
+	 */
+	'/ Gecko\/([01].......|200[1-6]....)/',
+);
+
+/**
  * Fake out the timezone that the server thinks it's in. This will be used for
  * date display and not for what's stored in the DB. Leave to null to retain
  * your server's OS-based timezone value. This is the same as the timezone.