Update HTMLPurifier
This commit is contained in:
parent
e55ac3e4a2
commit
46907735fe
|
@ -14,10 +14,7 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
|
|||
spl_autoload_register('__autoload');
|
||||
}
|
||||
} elseif (!function_exists('__autoload')) {
|
||||
function __autoload($class)
|
||||
{
|
||||
return HTMLPurifier_Bootstrap::autoload($class);
|
||||
}
|
||||
require dirname(__FILE__) . '/HTMLPurifier.autoload-legacy.php';
|
||||
}
|
||||
|
||||
if (ini_get('zend.ze1_compatibility_mode')) {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 4.8.0 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 4.10.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006-2008 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
|
@ -58,12 +58,12 @@ class HTMLPurifier
|
|||
* Version of HTML Purifier.
|
||||
* @type string
|
||||
*/
|
||||
public $version = '4.8.0';
|
||||
public $version = '4.10.0';
|
||||
|
||||
/**
|
||||
* Constant with version of HTML Purifier.
|
||||
*/
|
||||
const VERSION = '4.8.0';
|
||||
const VERSION = '4.10.0';
|
||||
|
||||
/**
|
||||
* Global configuration object.
|
||||
|
|
|
@ -19,8 +19,8 @@ class HTMLPurifier_Arborize
|
|||
if ($token instanceof HTMLPurifier_Token_End) {
|
||||
$token->start = null; // [MUT]
|
||||
$r = array_pop($stack);
|
||||
assert($r->name === $token->name);
|
||||
assert(empty($token->attr));
|
||||
//assert($r->name === $token->name);
|
||||
//assert(empty($token->attr));
|
||||
$r->endCol = $token->col;
|
||||
$r->endLine = $token->line;
|
||||
$r->endArmor = $token->armor;
|
||||
|
@ -32,7 +32,7 @@ class HTMLPurifier_Arborize
|
|||
$stack[] = $node;
|
||||
}
|
||||
}
|
||||
assert(count($stack) == 1);
|
||||
//assert(count($stack) == 1);
|
||||
return $stack[0];
|
||||
}
|
||||
|
||||
|
|
|
@ -86,7 +86,13 @@ abstract class HTMLPurifier_AttrDef
|
|||
*/
|
||||
protected function mungeRgb($string)
|
||||
{
|
||||
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
|
||||
$p = '\s*(\d+(\.\d+)?([%]?))\s*';
|
||||
|
||||
if (preg_match('/(rgba|hsla)\(/', $string)) {
|
||||
return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
|
||||
}
|
||||
|
||||
return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -27,13 +27,38 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
|||
$definition = $config->getCSSDefinition();
|
||||
$allow_duplicates = $config->get("CSS.AllowDuplicates");
|
||||
|
||||
// we're going to break the spec and explode by semicolons.
|
||||
// This is because semicolon rarely appears in escaped form
|
||||
// Doing this is generally flaky but fast
|
||||
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
|
||||
// for details
|
||||
|
||||
$declarations = explode(';', $css);
|
||||
// According to the CSS2.1 spec, the places where a
|
||||
// non-delimiting semicolon can appear are in strings
|
||||
// escape sequences. So here is some dumb hack to
|
||||
// handle quotes.
|
||||
$len = strlen($css);
|
||||
$accum = "";
|
||||
$declarations = array();
|
||||
$quoted = false;
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$c = strcspn($css, ";'\"", $i);
|
||||
$accum .= substr($css, $i, $c);
|
||||
$i += $c;
|
||||
if ($i == $len) break;
|
||||
$d = $css[$i];
|
||||
if ($quoted) {
|
||||
$accum .= $d;
|
||||
if ($d == $quoted) {
|
||||
$quoted = false;
|
||||
}
|
||||
} else {
|
||||
if ($d == ";") {
|
||||
$declarations[] = $accum;
|
||||
$accum = "";
|
||||
} else {
|
||||
$accum .= $d;
|
||||
$quoted = $d;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($accum != "") $declarations[] = $accum;
|
||||
|
||||
$propvalues = array();
|
||||
$new_declarations = '';
|
||||
|
||||
|
|
|
@ -6,6 +6,16 @@
|
|||
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* @type HTMLPurifier_AttrDef_CSS_AlphaValue
|
||||
*/
|
||||
protected $alpha;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $color
|
||||
* @param HTMLPurifier_Config $config
|
||||
|
@ -29,59 +39,104 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
|||
return $colors[$lower];
|
||||
}
|
||||
|
||||
if (strpos($color, 'rgb(') !== false) {
|
||||
// rgb literal handling
|
||||
if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
|
||||
$length = strlen($color);
|
||||
if (strpos($color, ')') !== $length - 1) {
|
||||
return false;
|
||||
}
|
||||
$triad = substr($color, 4, $length - 4 - 1);
|
||||
$parts = explode(',', $triad);
|
||||
if (count($parts) !== 3) {
|
||||
|
||||
// get used function : rgb, rgba, hsl or hsla
|
||||
$function = $matches[1];
|
||||
|
||||
$parameters_size = 3;
|
||||
$alpha_channel = false;
|
||||
if (substr($function, -1) === 'a') {
|
||||
$parameters_size = 4;
|
||||
$alpha_channel = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allowed types for values :
|
||||
* parameter_position => [type => max_value]
|
||||
*/
|
||||
$allowed_types = array(
|
||||
1 => array('percentage' => 100, 'integer' => 255),
|
||||
2 => array('percentage' => 100, 'integer' => 255),
|
||||
3 => array('percentage' => 100, 'integer' => 255),
|
||||
);
|
||||
$allow_different_types = false;
|
||||
|
||||
if (strpos($function, 'hsl') !== false) {
|
||||
$allowed_types = array(
|
||||
1 => array('integer' => 360),
|
||||
2 => array('percentage' => 100),
|
||||
3 => array('percentage' => 100),
|
||||
);
|
||||
$allow_different_types = true;
|
||||
}
|
||||
|
||||
$values = trim(str_replace($function, '', $color), ' ()');
|
||||
|
||||
$parts = explode(',', $values);
|
||||
if (count($parts) !== $parameters_size) {
|
||||
return false;
|
||||
}
|
||||
$type = false; // to ensure that they're all the same type
|
||||
|
||||
$type = false;
|
||||
$new_parts = array();
|
||||
$i = 0;
|
||||
|
||||
foreach ($parts as $part) {
|
||||
$i++;
|
||||
$part = trim($part);
|
||||
|
||||
if ($part === '') {
|
||||
return false;
|
||||
}
|
||||
$length = strlen($part);
|
||||
if ($part[$length - 1] === '%') {
|
||||
// handle percents
|
||||
if (!$type) {
|
||||
$type = 'percentage';
|
||||
} elseif ($type !== 'percentage') {
|
||||
|
||||
// different check for alpha channel
|
||||
if ($alpha_channel === true && $i === count($parts)) {
|
||||
$result = $this->alpha->validate($part, $config, $context);
|
||||
|
||||
if ($result === false) {
|
||||
return false;
|
||||
}
|
||||
$num = (float)substr($part, 0, $length - 1);
|
||||
if ($num < 0) {
|
||||
$num = 0;
|
||||
}
|
||||
if ($num > 100) {
|
||||
$num = 100;
|
||||
}
|
||||
$new_parts[] = "$num%";
|
||||
|
||||
$new_parts[] = (string)$result;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (substr($part, -1) === '%') {
|
||||
$current_type = 'percentage';
|
||||
} else {
|
||||
// handle integers
|
||||
if (!$type) {
|
||||
$type = 'integer';
|
||||
} elseif ($type !== 'integer') {
|
||||
return false;
|
||||
}
|
||||
$num = (int)$part;
|
||||
if ($num < 0) {
|
||||
$num = 0;
|
||||
}
|
||||
if ($num > 255) {
|
||||
$num = 255;
|
||||
}
|
||||
$new_parts[] = (string)$num;
|
||||
$current_type = 'integer';
|
||||
}
|
||||
|
||||
if (!array_key_exists($current_type, $allowed_types[$i])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!$type) {
|
||||
$type = $current_type;
|
||||
}
|
||||
|
||||
if ($allow_different_types === false && $type != $current_type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$max_value = $allowed_types[$i][$current_type];
|
||||
|
||||
if ($current_type == 'integer') {
|
||||
// Return value between range 0 -> $max_value
|
||||
$new_parts[] = (int)max(min($part, $max_value), 0);
|
||||
} elseif ($current_type == 'percentage') {
|
||||
$new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
|
||||
}
|
||||
}
|
||||
$new_triad = implode(',', $new_parts);
|
||||
$color = "rgb($new_triad)";
|
||||
|
||||
$new_values = implode(',', $new_parts);
|
||||
|
||||
$color = $function . '(' . $new_values . ')';
|
||||
} else {
|
||||
// hexadecimal handling
|
||||
if ($color[0] === '#') {
|
||||
|
@ -100,6 +155,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
|||
}
|
||||
return $color;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
|
|
@ -97,7 +97,7 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
|||
|
||||
// PHP 5.3 and later support this functionality natively
|
||||
if (function_exists('idn_to_ascii')) {
|
||||
return idn_to_ascii($string);
|
||||
$string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46);
|
||||
|
||||
// If we have Net_IDNA2 support, we can support IRIs by
|
||||
// punycoding them. (This is the most portable thing to do,
|
||||
|
@ -123,13 +123,14 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
|||
}
|
||||
}
|
||||
$string = implode('.', $new_parts);
|
||||
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||
return $string;
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
// XXX error reporting
|
||||
}
|
||||
}
|
||||
// Try again
|
||||
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||
return $string;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
<?php
|
||||
|
||||
// must be called POST validation
|
||||
|
||||
/**
|
||||
* Adds rel="noopener" to any links which target a different window
|
||||
* than the current one. This is used to prevent malicious websites
|
||||
* from silently replacing the original window, which could be used
|
||||
* to do phishing.
|
||||
* This transform is controlled by %HTML.TargetNoopener.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_TargetNoopener extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
/**
|
||||
* @param array $attr
|
||||
* @param HTMLPurifier_Config $config
|
||||
* @param HTMLPurifier_Context $context
|
||||
* @return array
|
||||
*/
|
||||
public function transform($attr, $config, $context)
|
||||
{
|
||||
if (isset($attr['rel'])) {
|
||||
$rels = explode(' ', $attr['rel']);
|
||||
} else {
|
||||
$rels = array();
|
||||
}
|
||||
if (isset($attr['target']) && !in_array('noopener', $rels)) {
|
||||
$rels[] = 'noopener';
|
||||
}
|
||||
if (!empty($rels) || isset($attr['rel'])) {
|
||||
$attr['rel'] = implode(' ', $rels);
|
||||
}
|
||||
|
||||
return $attr;
|
||||
}
|
||||
}
|
||||
|
|
@ -225,6 +225,10 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||
);
|
||||
$max = $config->get('CSS.MaxImgLength');
|
||||
|
||||
$this->info['min-width'] =
|
||||
$this->info['max-width'] =
|
||||
$this->info['min-height'] =
|
||||
$this->info['max-height'] =
|
||||
$this->info['width'] =
|
||||
$this->info['height'] =
|
||||
$max === null ?
|
||||
|
|
|
@ -50,7 +50,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
|
|||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
$current_li = false;
|
||||
$current_li = null;
|
||||
|
||||
foreach ($children as $node) {
|
||||
if (!empty($node->is_whitespace)) {
|
||||
|
@ -71,7 +71,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
|
|||
// to handle non-list elements; non-list elements should
|
||||
// not be appended to an existing li; only li created
|
||||
// for non-list. This distinction is not currently made.
|
||||
if ($current_li === false) {
|
||||
if ($current_li === null) {
|
||||
$current_li = new HTMLPurifier_Node_Element('li');
|
||||
$result[] = $current_li;
|
||||
}
|
||||
|
|
|
@ -203,7 +203,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||
$current_tr_tbody->children[] = $node;
|
||||
break;
|
||||
case '#PCDATA':
|
||||
assert($node->is_whitespace);
|
||||
//assert($node->is_whitespace);
|
||||
if ($current_tr_tbody === null) {
|
||||
$ret[] = $node;
|
||||
} else {
|
||||
|
|
|
@ -21,7 +21,7 @@ class HTMLPurifier_Config
|
|||
* HTML Purifier's version
|
||||
* @type string
|
||||
*/
|
||||
public $version = '4.8.0';
|
||||
public $version = '4.10.0';
|
||||
|
||||
/**
|
||||
* Whether or not to automatically finalize
|
||||
|
@ -333,7 +333,7 @@ class HTMLPurifier_Config
|
|||
}
|
||||
|
||||
// Raw type might be negative when using the fully optimized form
|
||||
// of stdclass, which indicates allow_null == true
|
||||
// of stdClass, which indicates allow_null == true
|
||||
$rtype = is_int($def) ? $def : $def->type;
|
||||
if ($rtype < 0) {
|
||||
$type = -$rtype;
|
||||
|
|
|
@ -24,11 +24,11 @@ class HTMLPurifier_ConfigSchema
|
|||
*
|
||||
* array(
|
||||
* 'Namespace' => array(
|
||||
* 'Directive' => new stdclass(),
|
||||
* 'Directive' => new stdClass(),
|
||||
* )
|
||||
* )
|
||||
*
|
||||
* The stdclass may have the following properties:
|
||||
* The stdClass may have the following properties:
|
||||
*
|
||||
* - If isAlias isn't set:
|
||||
* - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
|
||||
|
@ -39,8 +39,8 @@ class HTMLPurifier_ConfigSchema
|
|||
* - namespace: Namespace this directive aliases to
|
||||
* - name: Directive name this directive aliases to
|
||||
*
|
||||
* In certain degenerate cases, stdclass will actually be an integer. In
|
||||
* that case, the value is equivalent to an stdclass with the type
|
||||
* In certain degenerate cases, stdClass will actually be an integer. In
|
||||
* that case, the value is equivalent to an stdClass with the type
|
||||
* property set to the integer. If the integer is negative, type is
|
||||
* equal to the absolute value of integer, and allow_null is true.
|
||||
*
|
||||
|
@ -105,7 +105,7 @@ class HTMLPurifier_ConfigSchema
|
|||
*/
|
||||
public function add($key, $default, $type, $allow_null)
|
||||
{
|
||||
$obj = new stdclass();
|
||||
$obj = new stdClass();
|
||||
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
|
||||
if ($allow_null) {
|
||||
$obj->allow_null = true;
|
||||
|
@ -152,14 +152,14 @@ class HTMLPurifier_ConfigSchema
|
|||
*/
|
||||
public function addAlias($key, $new_key)
|
||||
{
|
||||
$obj = new stdclass;
|
||||
$obj = new stdClass;
|
||||
$obj->key = $new_key;
|
||||
$obj->isAlias = true;
|
||||
$this->info[$key] = $obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces any stdclass that only has the type property with type integer.
|
||||
* Replaces any stdClass that only has the type property with type integer.
|
||||
*/
|
||||
public function postProcess()
|
||||
{
|
||||
|
|
Binary file not shown.
16
library/vendor/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
vendored
Normal file
16
library/vendor/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
Core.AggressivelyRemoveScript
|
||||
TYPE: bool
|
||||
VERSION: 4.9.0
|
||||
DEFAULT: true
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
This directive enables aggressive pre-filter removal of
|
||||
script tags. This is not necessary for security,
|
||||
but it can help work around a bug in libxml where embedded
|
||||
HTML elements inside script sections cause the parser to
|
||||
choke. To revert to pre-4.9.0 behavior, set this to false.
|
||||
This directive has no effect if %Core.Trusted is true,
|
||||
%Core.RemoveScriptContents is false, or %Core.HiddenElements
|
||||
does not contain script.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
36
library/vendor/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
vendored
Normal file
36
library/vendor/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
Core.LegacyEntityDecoder
|
||||
TYPE: bool
|
||||
VERSION: 4.9.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Prior to HTML Purifier 4.9.0, entities were decoded by performing
|
||||
a global search replace for all entities whose decoded versions
|
||||
did not have special meanings under HTML, and replaced them with
|
||||
their decoded versions. We would match all entities, even if they did
|
||||
not have a trailing semicolon, but only if there weren't any trailing
|
||||
alphanumeric characters.
|
||||
</p>
|
||||
<table>
|
||||
<tr><th>Original</th><th>Text</th><th>Attribute</th></tr>
|
||||
<tr><td>&yen;</td><td>¥</td><td>¥</td></tr>
|
||||
<tr><td>&yen</td><td>¥</td><td>¥</td></tr>
|
||||
<tr><td>&yena</td><td>&yena</td><td>&yena</td></tr>
|
||||
<tr><td>&yen=</td><td>¥=</td><td>¥=</td></tr>
|
||||
</table>
|
||||
<p>
|
||||
In HTML Purifier 4.9.0, we changed the behavior of entity parsing
|
||||
to match entities that had missing trailing semicolons in less
|
||||
cases, to more closely match HTML5 parsing behavior:
|
||||
</p>
|
||||
<table>
|
||||
<tr><th>Original</th><th>Text</th><th>Attribute</th></tr>
|
||||
<tr><td>&yen;</td><td>¥</td><td>¥</td></tr>
|
||||
<tr><td>&yen</td><td>¥</td><td>¥</td></tr>
|
||||
<tr><td>&yena</td><td>¥a</td><td>&yena</td></tr>
|
||||
<tr><td>&yen=</td><td>¥=</td><td>&yen=</td></tr>
|
||||
</table>
|
||||
<p>
|
||||
This flag reverts back to pre-HTML Purifier 4.9.0 behavior.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
|
@ -0,0 +1,10 @@
|
|||
--# vim: et sw=4 sts=4
|
||||
HTML.TargetNoopener
|
||||
TYPE: bool
|
||||
VERSION: 4.8.0
|
||||
DEFAULT: TRUE
|
||||
--DESCRIPTION--
|
||||
If enabled, noopener rel attributes are added to links which have
|
||||
a target attribute associated with them. This prevents malicious
|
||||
destinations from overwriting the original window.
|
||||
--# vim: et sw=4 sts=4
|
|
@ -1,5 +1,5 @@
|
|||
URI.DefaultScheme
|
||||
TYPE: string
|
||||
TYPE: string/null
|
||||
DEFAULT: 'http'
|
||||
--DESCRIPTION--
|
||||
|
||||
|
@ -7,4 +7,9 @@ DEFAULT: 'http'
|
|||
Defines through what scheme the output will be served, in order to
|
||||
select the proper object validator when no scheme information is present.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Starting with HTML Purifier 4.9.0, the default scheme can be null, in
|
||||
which case we reject all URIs which do not have explicit schemes.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
||||
|
|
|
@ -112,6 +112,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
|
|||
}
|
||||
unlink($dir . '/' . $filename);
|
||||
}
|
||||
closedir($dh);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -142,6 +143,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
|
|||
unlink($dir . '/' . $filename);
|
||||
}
|
||||
}
|
||||
closedir($dh);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -198,11 +200,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
|
|||
if ($result !== false) {
|
||||
// set permissions of the new file (no execute)
|
||||
$chmod = $config->get('Cache.SerializerPermissions');
|
||||
if ($chmod === null) {
|
||||
// don't do anything
|
||||
} else {
|
||||
$chmod = $chmod & 0666;
|
||||
chmod($file, $chmod);
|
||||
if ($chmod !== null) {
|
||||
chmod($file, $chmod & 0666);
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
|
@ -217,6 +216,16 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
|
|||
{
|
||||
$directory = $this->generateDirectoryPath($config);
|
||||
$chmod = $config->get('Cache.SerializerPermissions');
|
||||
if ($chmod === null) {
|
||||
if (!@mkdir($directory) && !is_dir($directory)) {
|
||||
trigger_error(
|
||||
'Could not create directory ' . $directory . '',
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (!is_dir($directory)) {
|
||||
$base = $this->generateBaseDirectoryPath($config);
|
||||
if (!is_dir($base)) {
|
||||
|
@ -229,25 +238,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
|
|||
} elseif (!$this->_testPermissions($base, $chmod)) {
|
||||
return false;
|
||||
}
|
||||
if ($chmod === null) {
|
||||
if (!@mkdir($directory, $chmod) && !is_dir($directory)) {
|
||||
trigger_error(
|
||||
'Base directory ' . $base . ' does not exist,
|
||||
please create or change using %Cache.SerializerPath',
|
||||
'Could not create directory ' . $directory . '',
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
}
|
||||
if ($chmod !== null) {
|
||||
mkdir($directory, $chmod);
|
||||
} else {
|
||||
mkdir($directory);
|
||||
}
|
||||
if (!$this->_testPermissions($directory, $chmod)) {
|
||||
trigger_error(
|
||||
'Base directory ' . $base . ' does not exist,
|
||||
please create or change using %Cache.SerializerPath',
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
}
|
||||
} elseif (!$this->_testPermissions($directory, $chmod)) {
|
||||
|
|
0
library/vendor/HTMLPurifier/DefinitionCache/Serializer/README
vendored
Normal file → Executable file
0
library/vendor/HTMLPurifier/DefinitionCache/Serializer/README
vendored
Normal file → Executable file
|
@ -101,6 +101,14 @@ class HTMLPurifier_Encoder
|
|||
* It will parse according to UTF-8 and return a valid UTF8 string, with
|
||||
* non-SGML codepoints excluded.
|
||||
*
|
||||
* Specifically, it will permit:
|
||||
* \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}
|
||||
* Source: https://www.w3.org/TR/REC-xml/#NT-Char
|
||||
* Arguably this function should be modernized to the HTML5 set
|
||||
* of allowed characters:
|
||||
* https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
|
||||
* which simultaneously expand and restrict the set of allowed characters.
|
||||
*
|
||||
* @param string $str The string to clean
|
||||
* @param bool $force_php
|
||||
* @return string
|
||||
|
@ -122,15 +130,12 @@ class HTMLPurifier_Encoder
|
|||
* function that needs to be able to understand UTF-8 characters.
|
||||
* As of right now, only smart lossless character encoding converters
|
||||
* would need that, and I'm probably not going to implement them.
|
||||
* Once again, PHP 6 should solve all our problems.
|
||||
*/
|
||||
public static function cleanUTF8($str, $force_php = false)
|
||||
{
|
||||
// UTF-8 validity is checked since PHP 4.3.5
|
||||
// This is an optimization: if the string is already valid UTF-8, no
|
||||
// need to do PHP stuff. 99% of the time, this will be the case.
|
||||
// The regexp matches the XML char production, as well as well as excluding
|
||||
// non-SGML codepoints U+007F to U+009F
|
||||
if (preg_match(
|
||||
'/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
|
||||
$str
|
||||
|
@ -255,6 +260,7 @@ class HTMLPurifier_Encoder
|
|||
// 7F-9F is not strictly prohibited by XML,
|
||||
// but it is non-SGML, and thus we don't allow it
|
||||
(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
|
||||
(0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
|
||||
(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
|
||||
)
|
||||
) {
|
||||
|
|
|
@ -16,6 +16,138 @@ class HTMLPurifier_EntityParser
|
|||
*/
|
||||
protected $_entity_lookup;
|
||||
|
||||
/**
|
||||
* Callback regex string for entities in text.
|
||||
* @type string
|
||||
*/
|
||||
protected $_textEntitiesRegex;
|
||||
|
||||
/**
|
||||
* Callback regex string for entities in attributes.
|
||||
* @type string
|
||||
*/
|
||||
protected $_attrEntitiesRegex;
|
||||
|
||||
/**
|
||||
* Tests if the beginning of a string is a semi-optional regex
|
||||
*/
|
||||
protected $_semiOptionalPrefixRegex;
|
||||
|
||||
public function __construct() {
|
||||
// From
|
||||
// http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
|
||||
$semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
|
||||
|
||||
// NB: three empty captures to put the fourth match in the right
|
||||
// place
|
||||
$this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
|
||||
|
||||
$this->_textEntitiesRegex =
|
||||
'/&(?:'.
|
||||
// hex
|
||||
'[#]x([a-fA-F0-9]+);?|'.
|
||||
// dec
|
||||
'[#]0*(\d+);?|'.
|
||||
// string (mandatory semicolon)
|
||||
// NB: order matters: match semicolon preferentially
|
||||
'([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
|
||||
// string (optional semicolon)
|
||||
"($semi_optional)".
|
||||
')/';
|
||||
|
||||
$this->_attrEntitiesRegex =
|
||||
'/&(?:'.
|
||||
// hex
|
||||
'[#]x([a-fA-F0-9]+);?|'.
|
||||
// dec
|
||||
'[#]0*(\d+);?|'.
|
||||
// string (mandatory semicolon)
|
||||
// NB: order matters: match semicolon preferentially
|
||||
'([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
|
||||
// string (optional semicolon)
|
||||
// don't match if trailing is equals or alphanumeric (URL
|
||||
// like)
|
||||
"($semi_optional)(?![=;A-Za-z0-9])".
|
||||
')/';
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Substitute entities with the parsed equivalents. Use this on
|
||||
* textual data in an HTML document (as opposed to attributes.)
|
||||
*
|
||||
* @param string $string String to have entities parsed.
|
||||
* @return string Parsed string.
|
||||
*/
|
||||
public function substituteTextEntities($string)
|
||||
{
|
||||
return preg_replace_callback(
|
||||
$this->_textEntitiesRegex,
|
||||
array($this, 'entityCallback'),
|
||||
$string
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Substitute entities with the parsed equivalents. Use this on
|
||||
* attribute contents in documents.
|
||||
*
|
||||
* @param string $string String to have entities parsed.
|
||||
* @return string Parsed string.
|
||||
*/
|
||||
public function substituteAttrEntities($string)
|
||||
{
|
||||
return preg_replace_callback(
|
||||
$this->_attrEntitiesRegex,
|
||||
array($this, 'entityCallback'),
|
||||
$string
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function for substituteNonSpecialEntities() that does the work.
|
||||
*
|
||||
* @param array $matches PCRE matches array, with 0 the entire match, and
|
||||
* either index 1, 2 or 3 set with a hex value, dec value,
|
||||
* or string (respectively).
|
||||
* @return string Replacement string.
|
||||
*/
|
||||
|
||||
protected function entityCallback($matches)
|
||||
{
|
||||
$entity = $matches[0];
|
||||
$hex_part = @$matches[1];
|
||||
$dec_part = @$matches[2];
|
||||
$named_part = empty($matches[3]) ? @$matches[4] : $matches[3];
|
||||
if ($hex_part !== NULL && $hex_part !== "") {
|
||||
return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
|
||||
} elseif ($dec_part !== NULL && $dec_part !== "") {
|
||||
return HTMLPurifier_Encoder::unichr((int) $dec_part);
|
||||
} else {
|
||||
if (!$this->_entity_lookup) {
|
||||
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
|
||||
}
|
||||
if (isset($this->_entity_lookup->table[$named_part])) {
|
||||
return $this->_entity_lookup->table[$named_part];
|
||||
} else {
|
||||
// exact match didn't match anything, so test if
|
||||
// any of the semicolon optional match the prefix.
|
||||
// Test that this is an EXACT match is important to
|
||||
// prevent infinite loop
|
||||
if (!empty($matches[3])) {
|
||||
return preg_replace_callback(
|
||||
$this->_semiOptionalPrefixRegex,
|
||||
array($this, 'entityCallback'),
|
||||
$entity
|
||||
);
|
||||
}
|
||||
return $entity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LEGACY CODE BELOW
|
||||
|
||||
/**
|
||||
* Callback regex string for parsing entities.
|
||||
* @type string
|
||||
|
@ -144,7 +276,7 @@ class HTMLPurifier_EntityParser
|
|||
$entity;
|
||||
} else {
|
||||
return isset($this->_special_ent2dec[$matches[3]]) ?
|
||||
$this->_special_ent2dec[$matches[3]] :
|
||||
$this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
|
||||
$entity;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,7 +95,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
|||
if ($tidy !== null) {
|
||||
$this->_tidy = $tidy;
|
||||
}
|
||||
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
|
||||
// NB: this must be NON-greedy because if we have
|
||||
// <style>foo</style> <style>bar</style>
|
||||
// we must not grab foo</style> <style>bar
|
||||
$html = preg_replace_callback('#<style(?:\s.*)?>(.*)<\/style>#isU', array($this, 'styleCallback'), $html);
|
||||
$style_blocks = $this->_styleMatches;
|
||||
$this->_styleMatches = array(); // reset
|
||||
$context->register('StyleBlocks', $style_blocks); // $context must not be reused
|
||||
|
|
|
@ -146,7 +146,7 @@ class HTMLPurifier_Generator
|
|||
$attr = $this->generateAttributes($token->attr, $token->name);
|
||||
if ($this->_flashCompat) {
|
||||
if ($token->name == "object") {
|
||||
$flash = new stdclass();
|
||||
$flash = new stdClass();
|
||||
$flash->attr = $token->attr;
|
||||
$flash->param = array();
|
||||
$this->_flashStack[] = $flash;
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Module adds the target-based noopener attribute transformation to a tags. It
|
||||
* is enabled by HTML.TargetNoopener
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_TargetNoopener extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
public $name = 'TargetNoopener';
|
||||
|
||||
/**
|
||||
* @param HTMLPurifier_Config $config
|
||||
*/
|
||||
public function setup($config) {
|
||||
$a = $this->addBlankElement('a');
|
||||
$a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetNoopener();
|
||||
}
|
||||
}
|
|
@ -271,11 +271,14 @@ class HTMLPurifier_HTMLModuleManager
|
|||
if ($config->get('HTML.TargetBlank')) {
|
||||
$modules[] = 'TargetBlank';
|
||||
}
|
||||
// NB: HTML.TargetNoreferrer must be AFTER HTML.TargetBlank
|
||||
// NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank
|
||||
// so that its post-attr-transform gets run afterwards.
|
||||
if ($config->get('HTML.TargetNoreferrer')) {
|
||||
$modules[] = 'TargetNoreferrer';
|
||||
}
|
||||
if ($config->get('HTML.TargetNoopener')) {
|
||||
$modules[] = 'TargetNoopener';
|
||||
}
|
||||
|
||||
// merge in custom modules
|
||||
$modules = array_merge($modules, $this->userModules);
|
||||
|
|
|
@ -157,11 +157,13 @@ abstract class HTMLPurifier_Injector
|
|||
return false;
|
||||
}
|
||||
// check for exclusion
|
||||
for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
|
||||
$node = $this->currentNesting[$i];
|
||||
$def = $this->htmlDefinition->info[$node->name];
|
||||
if (isset($def->excludes[$name])) {
|
||||
return false;
|
||||
if (!empty($this->currentNesting)) {
|
||||
for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
|
||||
$node = $this->currentNesting[$i];
|
||||
$def = $this->htmlDefinition->info[$node->name];
|
||||
if (isset($def->excludes[$name])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -26,12 +26,14 @@ class HTMLPurifier_Length
|
|||
protected $isValid;
|
||||
|
||||
/**
|
||||
* Array Lookup array of units recognized by CSS 2.1
|
||||
* Array Lookup array of units recognized by CSS 3
|
||||
* @type array
|
||||
*/
|
||||
protected static $allowedUnits = array(
|
||||
'em' => true, 'ex' => true, 'px' => true, 'in' => true,
|
||||
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
|
||||
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true,
|
||||
'ch' => true, 'rem' => true, 'vw' => true, 'vh' => true,
|
||||
'vmin' => true, 'vmax' => true
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
|
@ -96,7 +96,7 @@ class HTMLPurifier_Lexer
|
|||
break;
|
||||
}
|
||||
|
||||
if (class_exists('DOMDocument') &&
|
||||
if (class_exists('DOMDocument', false) &&
|
||||
method_exists('DOMDocument', 'loadHTML') &&
|
||||
!extension_loaded('domxml')
|
||||
) {
|
||||
|
@ -169,21 +169,24 @@ class HTMLPurifier_Lexer
|
|||
''' => "'"
|
||||
);
|
||||
|
||||
public function parseText($string, $config) {
|
||||
return $this->parseData($string, false, $config);
|
||||
}
|
||||
|
||||
public function parseAttr($string, $config) {
|
||||
return $this->parseData($string, true, $config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses special entities into the proper characters.
|
||||
*
|
||||
* This string will translate escaped versions of the special characters
|
||||
* into the correct ones.
|
||||
*
|
||||
* @warning
|
||||
* You should be able to treat the output of this function as
|
||||
* completely parsed, but that's only because all other entities should
|
||||
* have been handled previously in substituteNonSpecialEntities()
|
||||
*
|
||||
* @param string $string String character data to be parsed.
|
||||
* @return string Parsed character data.
|
||||
*/
|
||||
public function parseData($string)
|
||||
public function parseData($string, $is_attr, $config)
|
||||
{
|
||||
// following functions require at least one character
|
||||
if ($string === '') {
|
||||
|
@ -209,7 +212,15 @@ class HTMLPurifier_Lexer
|
|||
}
|
||||
|
||||
// hmm... now we have some uncommon entities. Use the callback.
|
||||
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
||||
if ($config->get('Core.LegacyEntityDecoder')) {
|
||||
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
||||
} else {
|
||||
if ($is_attr) {
|
||||
$string = $this->_entity_parser->substituteAttrEntities($string);
|
||||
} else {
|
||||
$string = $this->_entity_parser->substituteTextEntities($string);
|
||||
}
|
||||
}
|
||||
return $string;
|
||||
}
|
||||
|
||||
|
@ -323,7 +334,9 @@ class HTMLPurifier_Lexer
|
|||
}
|
||||
|
||||
// expand entities that aren't the big five
|
||||
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
|
||||
if ($config->get('Core.LegacyEntityDecoder')) {
|
||||
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
|
||||
}
|
||||
|
||||
// clean into wellformed UTF-8 string for an SGML context: this has
|
||||
// to be done after entity expansion because the entities sometimes
|
||||
|
@ -335,6 +348,13 @@ class HTMLPurifier_Lexer
|
|||
$html = preg_replace('#<\?.+?\?>#s', '', $html);
|
||||
}
|
||||
|
||||
$hidden_elements = $config->get('Core.HiddenElements');
|
||||
if ($config->get('Core.AggressivelyRemoveScript') &&
|
||||
!($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents')
|
||||
|| empty($hidden_elements["script"]))) {
|
||||
$html = preg_replace('#<script[^>]*>.*?</script>#i', '', $html);
|
||||
}
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
|
|
@ -72,12 +72,20 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
$doc->loadHTML($html);
|
||||
restore_error_handler();
|
||||
|
||||
$body = $doc->getElementsByTagName('html')->item(0)-> // <html>
|
||||
getElementsByTagName('body')->item(0); // <body>
|
||||
|
||||
$div = $body->getElementsByTagName('div')->item(0); // <div>
|
||||
$tokens = array();
|
||||
$this->tokenizeDOM(
|
||||
$doc->getElementsByTagName('html')->item(0)-> // <html>
|
||||
getElementsByTagName('body')->item(0), // <body>
|
||||
$tokens
|
||||
);
|
||||
$this->tokenizeDOM($div, $tokens, $config);
|
||||
// If the div has a sibling, that means we tripped across
|
||||
// a premature </div> tag. So remove the div we parsed,
|
||||
// and then tokenize the rest of body. We can't tokenize
|
||||
// the sibling directly as we'll lose the tags in that case.
|
||||
if ($div->nextSibling) {
|
||||
$body->removeChild($div);
|
||||
$this->tokenizeDOM($body, $tokens, $config);
|
||||
}
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
|
@ -88,7 +96,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
* @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens.
|
||||
* @return HTMLPurifier_Token of node appended to previously passed tokens.
|
||||
*/
|
||||
protected function tokenizeDOM($node, &$tokens)
|
||||
protected function tokenizeDOM($node, &$tokens, $config)
|
||||
{
|
||||
$level = 0;
|
||||
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
|
||||
|
@ -97,7 +105,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
while (!$nodes[$level]->isEmpty()) {
|
||||
$node = $nodes[$level]->shift(); // FIFO
|
||||
$collect = $level > 0 ? true : false;
|
||||
$needEndingTag = $this->createStartNode($node, $tokens, $collect);
|
||||
$needEndingTag = $this->createStartNode($node, $tokens, $collect, $config);
|
||||
if ($needEndingTag) {
|
||||
$closingNodes[$level][] = $node;
|
||||
}
|
||||
|
@ -118,6 +126,41 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
} while ($level > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Portably retrieve the tag name of a node; deals with older versions
|
||||
* of libxml like 2.7.6
|
||||
* @param DOMNode $node
|
||||
*/
|
||||
protected function getTagName($node)
|
||||
{
|
||||
if (property_exists($node, 'tagName')) {
|
||||
return $node->tagName;
|
||||
} else if (property_exists($node, 'nodeName')) {
|
||||
return $node->nodeName;
|
||||
} else if (property_exists($node, 'localName')) {
|
||||
return $node->localName;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portably retrieve the data of a node; deals with older versions
|
||||
* of libxml like 2.7.6
|
||||
* @param DOMNode $node
|
||||
*/
|
||||
protected function getData($node)
|
||||
{
|
||||
if (property_exists($node, 'data')) {
|
||||
return $node->data;
|
||||
} else if (property_exists($node, 'nodeValue')) {
|
||||
return $node->nodeValue;
|
||||
} else if (property_exists($node, 'textContent')) {
|
||||
return $node->textContent;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param DOMNode $node DOMNode to be tokenized.
|
||||
* @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens.
|
||||
|
@ -127,13 +170,16 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
* @return bool if the token needs an endtoken
|
||||
* @todo data and tagName properties don't seem to exist in DOMNode?
|
||||
*/
|
||||
protected function createStartNode($node, &$tokens, $collect)
|
||||
protected function createStartNode($node, &$tokens, $collect, $config)
|
||||
{
|
||||
// intercept non element nodes. WE MUST catch all of them,
|
||||
// but we're not getting the character reference nodes because
|
||||
// those should have been preprocessed
|
||||
if ($node->nodeType === XML_TEXT_NODE) {
|
||||
$tokens[] = $this->factory->createText($node->data);
|
||||
$data = $this->getData($node); // Handle variable data property
|
||||
if ($data !== null) {
|
||||
$tokens[] = $this->factory->createText($data);
|
||||
}
|
||||
return false;
|
||||
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||
// undo libxml's special treatment of <script> and <style> tags
|
||||
|
@ -151,7 +197,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
}
|
||||
}
|
||||
}
|
||||
$tokens[] = $this->factory->createText($this->parseData($data));
|
||||
$tokens[] = $this->factory->createText($this->parseText($data, $config));
|
||||
return false;
|
||||
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
||||
// this is code is only invoked for comments in script/style in versions
|
||||
|
@ -163,21 +209,20 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
// not-well tested: there may be other nodes we have to grab
|
||||
return false;
|
||||
}
|
||||
|
||||
$attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
|
||||
|
||||
$tag_name = $this->getTagName($node); // Handle variable tagName property
|
||||
if (empty($tag_name)) {
|
||||
return (bool) $node->childNodes->length;
|
||||
}
|
||||
// We still have to make sure that the element actually IS empty
|
||||
if (!$node->childNodes->length) {
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createEmpty($node->tagName, $attr);
|
||||
$tokens[] = $this->factory->createEmpty($tag_name, $attr);
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createStart(
|
||||
$tag_name = $node->tagName, // somehow, it get's dropped
|
||||
$attr
|
||||
);
|
||||
$tokens[] = $this->factory->createStart($tag_name, $attr);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -189,10 +234,10 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
*/
|
||||
protected function createEndNode($node, &$tokens)
|
||||
{
|
||||
$tokens[] = $this->factory->createEnd($node->tagName);
|
||||
$tag_name = $this->getTagName($node); // Handle variable tagName property
|
||||
$tokens[] = $this->factory->createEnd($tag_name);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
|
||||
*
|
||||
|
@ -252,7 +297,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
* @param HTMLPurifier_Context $context
|
||||
* @return string
|
||||
*/
|
||||
protected function wrapHTML($html, $config, $context)
|
||||
protected function wrapHTML($html, $config, $context, $use_div = true)
|
||||
{
|
||||
$def = $config->getDefinition('HTML');
|
||||
$ret = '';
|
||||
|
@ -271,7 +316,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||
$ret .= '<html><head>';
|
||||
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
|
||||
// No protection if $html contains a stray </div>!
|
||||
$ret .= '</head><body>' . $html . '</body></html>';
|
||||
$ret .= '</head><body>';
|
||||
if ($use_div) $ret .= '<div>';
|
||||
$ret .= $html;
|
||||
if ($use_div) $ret .= '</div>';
|
||||
$ret .= '</body></html>';
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -129,12 +129,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||
// We are not inside tag and there still is another tag to parse
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
$this->parseData(
|
||||
$this->parseText(
|
||||
substr(
|
||||
$html,
|
||||
$cursor,
|
||||
$position_next_lt - $cursor
|
||||
)
|
||||
), $config
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) {
|
||||
|
@ -154,11 +154,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||
// Create Text of rest of string
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
$this->parseData(
|
||||
$this->parseText(
|
||||
substr(
|
||||
$html,
|
||||
$cursor
|
||||
)
|
||||
), $config
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) {
|
||||
|
@ -324,8 +324,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
'<' .
|
||||
$this->parseData(
|
||||
substr($html, $cursor)
|
||||
$this->parseText(
|
||||
substr($html, $cursor), $config
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) {
|
||||
|
@ -429,7 +429,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||
if ($value === false) {
|
||||
$value = '';
|
||||
}
|
||||
return array($key => $this->parseData($value));
|
||||
return array($key => $this->parseAttr($value, $config));
|
||||
}
|
||||
|
||||
// setup loop environment
|
||||
|
@ -518,7 +518,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||
if ($value === false) {
|
||||
$value = '';
|
||||
}
|
||||
$array[$key] = $this->parseData($value);
|
||||
$array[$key] = $this->parseAttr($value, $config);
|
||||
$cursor++;
|
||||
} else {
|
||||
// boolattr
|
||||
|
|
|
@ -21,7 +21,7 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
|
|||
public function tokenizeHTML($html, $config, $context)
|
||||
{
|
||||
$new_html = $this->normalize($html, $config, $context);
|
||||
$new_html = $this->wrapHTML($new_html, $config, $context);
|
||||
$new_html = $this->wrapHTML($new_html, $config, $context, false /* no div */);
|
||||
try {
|
||||
$parser = new HTML5($new_html);
|
||||
$doc = $parser->save();
|
||||
|
@ -34,9 +34,9 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
|
|||
$tokens = array();
|
||||
$this->tokenizeDOM(
|
||||
$doc->getElementsByTagName('html')->item(0)-> // <html>
|
||||
getElementsByTagName('body')->item(0) // <body>
|
||||
getElementsByTagName('body')->item(0) // <body>
|
||||
,
|
||||
$tokens
|
||||
$tokens, $config
|
||||
);
|
||||
return $tokens;
|
||||
}
|
||||
|
@ -1507,7 +1507,7 @@ class HTML5
|
|||
$entity = $this->character($start, $this->char);
|
||||
$cond = strlen($e_name) > 0;
|
||||
|
||||
// The rest of the parsing happens bellow.
|
||||
// The rest of the parsing happens below.
|
||||
break;
|
||||
|
||||
// Anything else
|
||||
|
@ -1515,6 +1515,7 @@ class HTML5
|
|||
// Consume the maximum number of characters possible, with the
|
||||
// consumed characters case-sensitively matching one of the
|
||||
// identifiers in the first column of the entities table.
|
||||
|
||||
$e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
|
||||
$len = strlen($e_name);
|
||||
|
||||
|
@ -1534,7 +1535,7 @@ class HTML5
|
|||
}
|
||||
|
||||
$cond = isset($entity);
|
||||
// The rest of the parsing happens bellow.
|
||||
// The rest of the parsing happens below.
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1547,7 +1548,7 @@ class HTML5
|
|||
|
||||
// Return a character token for the character corresponding to the
|
||||
// entity name (as given by the second column of the entities table).
|
||||
return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
|
||||
return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
|
||||
}
|
||||
|
||||
private function emitToken($token)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
GLOBIGNORE=$0; rm -rf *
|
||||
rm ../HTMLPurifier*.php
|
||||
|
||||
curl https://codeload.github.com/ezyang/htmlpurifier/tar.gz/v4.8.0 -o htmlpurifier-4.8.0.tar.gz
|
||||
tar xzf htmlpurifier-4.8.0.tar.gz --strip-components 1 htmlpurifier-4.8.0/LICENSE
|
||||
tar xzf htmlpurifier-4.8.0.tar.gz --strip-components 1 htmlpurifier-4.8.0/VERSION
|
||||
tar xzf htmlpurifier-4.8.0.tar.gz -C ../ --strip-components 2 htmlpurifier-4.8.0/library/HTMLPurifier.php
|
||||
tar xzf htmlpurifier-4.8.0.tar.gz -C ../ --strip-components 2 htmlpurifier-4.8.0/library/HTMLPurifier.autoload.php
|
||||
tar xzf htmlpurifier-4.8.0.tar.gz --strip-components 3 htmlpurifier-4.8.0/library/HTMLPurifier/*
|
||||
rm htmlpurifier-4.8.0.tar.gz
|
||||
curl https://codeload.github.com/ezyang/htmlpurifier/tar.gz/v4.10.0 -o htmlpurifier-4.10.0.tar.gz
|
||||
tar xzf htmlpurifier-4.10.0.tar.gz --strip-components 1 htmlpurifier-4.10.0/LICENSE
|
||||
tar xzf htmlpurifier-4.10.0.tar.gz --strip-components 1 htmlpurifier-4.10.0/VERSION
|
||||
tar xzf htmlpurifier-4.10.0.tar.gz -C ../ --strip-components 2 htmlpurifier-4.10.0/library/HTMLPurifier.php
|
||||
tar xzf htmlpurifier-4.10.0.tar.gz -C ../ --strip-components 2 htmlpurifier-4.10.0/library/HTMLPurifier.autoload.php
|
||||
tar xzf htmlpurifier-4.10.0.tar.gz --strip-components 3 htmlpurifier-4.10.0/library/HTMLPurifier/*
|
||||
rm htmlpurifier-4.10.0.tar.gz
|
||||
|
|
|
@ -165,7 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||
if (empty($zipper->front)) break;
|
||||
$token = $zipper->prev($token);
|
||||
// indicate that other injectors should not process this token,
|
||||
// but we need to reprocess it
|
||||
// but we need to reprocess it. See Note [Injector skips]
|
||||
unset($token->skip[$i]);
|
||||
$token->rewind = $i;
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
|
@ -210,6 +210,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||
if ($token instanceof HTMLPurifier_Token_Text) {
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
if (isset($token->skip[$i])) {
|
||||
// See Note [Injector skips]
|
||||
continue;
|
||||
}
|
||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||
|
@ -367,6 +368,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||
if ($ok) {
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
if (isset($token->skip[$i])) {
|
||||
// See Note [Injector skips]
|
||||
continue;
|
||||
}
|
||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||
|
@ -422,6 +424,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||
$token->start = $current_parent;
|
||||
foreach ($this->injectors as $i => $injector) {
|
||||
if (isset($token->skip[$i])) {
|
||||
// See Note [Injector skips]
|
||||
continue;
|
||||
}
|
||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||
|
@ -534,12 +537,17 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||
*/
|
||||
protected function processToken($token, $injector = -1)
|
||||
{
|
||||
// Zend OpCache miscompiles $token = array($token), so
|
||||
// avoid this pattern. See: https://github.com/ezyang/htmlpurifier/issues/108
|
||||
|
||||
// normalize forms of token
|
||||
if (is_object($token)) {
|
||||
$token = array(1, $token);
|
||||
$tmp = $token;
|
||||
$token = array(1, $tmp);
|
||||
}
|
||||
if (is_int($token)) {
|
||||
$token = array($token);
|
||||
$tmp = $token;
|
||||
$token = array($tmp);
|
||||
}
|
||||
if ($token === false) {
|
||||
$token = array(1);
|
||||
|
@ -561,7 +569,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||
list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
|
||||
|
||||
if ($injector > -1) {
|
||||
// determine appropriate skips
|
||||
// See Note [Injector skips]
|
||||
// Determine appropriate skips. Here's what the code does:
|
||||
// *If* we deleted one or more tokens, copy the skips
|
||||
// of those tokens into the skips of the new tokens (in $token).
|
||||
// Also, mark the newly inserted tokens as having come from
|
||||
// $injector.
|
||||
$oldskip = isset($old[0]) ? $old[0]->skip : array();
|
||||
foreach ($token as $object) {
|
||||
$object->skip = $oldskip;
|
||||
|
@ -597,4 +610,50 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||
}
|
||||
}
|
||||
|
||||
// Note [Injector skips]
|
||||
// ~~~~~~~~~~~~~~~~~~~~~
|
||||
// When I originally designed this class, the idea behind the 'skip'
|
||||
// property of HTMLPurifier_Token was to help avoid infinite loops
|
||||
// in injector processing. For example, suppose you wrote an injector
|
||||
// that bolded swear words. Naively, you might write it so that
|
||||
// whenever you saw ****, you replaced it with <strong>****</strong>.
|
||||
//
|
||||
// When this happens, we will reprocess all of the tokens with the
|
||||
// other injectors. Now there is an opportunity for infinite loop:
|
||||
// if we rerun the swear-word injector on these tokens, we might
|
||||
// see **** and then reprocess again to get
|
||||
// <strong><strong>****</strong></strong> ad infinitum.
|
||||
//
|
||||
// Thus, the idea of a skip is that once we process a token with
|
||||
// an injector, we mark all of those tokens as having "come from"
|
||||
// the injector, and we never run the injector again on these
|
||||
// tokens.
|
||||
//
|
||||
// There were two more complications, however:
|
||||
//
|
||||
// - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if
|
||||
// you had <b><i></i></b>, after you removed the <i></i>, you
|
||||
// really would like this injector to go back and reprocess
|
||||
// the <b> tag, discovering that it is now empty and can be
|
||||
// removed. So we reintroduced the possibility of infinite looping
|
||||
// by adding a "rewind" function, which let you go back to an
|
||||
// earlier point in the token stream and reprocess it with injectors.
|
||||
// Needless to say, we need to UN-skip the token so it gets
|
||||
// reprocessed.
|
||||
//
|
||||
// - Suppose that you successfuly process a token, replace it with
|
||||
// one with your skip mark, but now another injector wants to
|
||||
// process the skipped token with another token. Should you continue
|
||||
// to skip that new token, or reprocess it? If you reprocess,
|
||||
// you can end up with an infinite loop where one injector converts
|
||||
// <a> to <b>, and then another injector converts it back. So
|
||||
// we inherit the skips, but for some reason, I thought that we
|
||||
// should inherit the skip from the first token of the token
|
||||
// that we deleted. Why? Well, it seems to work OK.
|
||||
//
|
||||
// If I were to redesign this functionality, I would absolutely not
|
||||
// go about doing it this way: the semantics are just not very well
|
||||
// defined, and in any case you probably wanted to operate on trees,
|
||||
// not token streams.
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
|
|
@ -26,7 +26,7 @@ abstract class HTMLPurifier_Token
|
|||
public $armor = array();
|
||||
|
||||
/**
|
||||
* Used during MakeWellFormed.
|
||||
* Used during MakeWellFormed. See Note [Injector skips]
|
||||
* @type
|
||||
*/
|
||||
public $skip;
|
||||
|
|
|
@ -85,11 +85,13 @@ class HTMLPurifier_URI
|
|||
$def = $config->getDefinition('URI');
|
||||
$scheme_obj = $def->getDefaultScheme($config, $context);
|
||||
if (!$scheme_obj) {
|
||||
// something funky happened to the default scheme object
|
||||
trigger_error(
|
||||
'Default scheme object "' . $def->defaultScheme . '" was not readable',
|
||||
E_USER_WARNING
|
||||
);
|
||||
if ($def->defaultScheme !== null) {
|
||||
// something funky happened to the default scheme object
|
||||
trigger_error(
|
||||
'Default scheme object "' . $def->defaultScheme . '" was not readable',
|
||||
E_USER_WARNING
|
||||
);
|
||||
} // suppress error if it's null
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
4.8.0
|
||||
4.10.0
|
Loading…
Reference in New Issue