From 46907735fe19746c263766c3ae2fe93afde87289 Mon Sep 17 00:00:00 2001
From: Eric Lippmann
Date: Mon, 25 Jun 2018 16:04:10 +0200
Subject: [PATCH] Update HTMLPurifier
---
library/vendor/HTMLPurifier.autoload.php | 5 +-
library/vendor/HTMLPurifier.php | 6 +-
library/vendor/HTMLPurifier/Arborize.php | 6 +-
library/vendor/HTMLPurifier/AttrDef.php | 8 +-
library/vendor/HTMLPurifier/AttrDef/CSS.php | 37 ++++-
.../vendor/HTMLPurifier/AttrDef/CSS/Color.php | 128 ++++++++++++-----
.../vendor/HTMLPurifier/AttrDef/URI/Host.php | 9 +-
.../AttrTransform/TargetNoopener.php | 37 +++++
library/vendor/HTMLPurifier/CSSDefinition.php | 4 +
library/vendor/HTMLPurifier/ChildDef/List.php | 4 +-
.../vendor/HTMLPurifier/ChildDef/Table.php | 2 +-
library/vendor/HTMLPurifier/Config.php | 4 +-
library/vendor/HTMLPurifier/ConfigSchema.php | 14 +-
.../HTMLPurifier/ConfigSchema/schema.ser | Bin 15598 -> 15923 bytes
.../schema/Core.AggressivelyRemoveScript.txt | 16 +++
.../schema/Core.LegacyEntityDecoder.txt | 36 +++++
.../schema/HTML.TargetNoopener.txt | 10 ++
.../ConfigSchema/schema/URI.DefaultScheme.txt | 7 +-
.../DefinitionCache/Serializer.php | 34 +++--
.../DefinitionCache/Serializer/README | 0
library/vendor/HTMLPurifier/Encoder.php | 12 +-
library/vendor/HTMLPurifier/EntityParser.php | 134 +++++++++++++++++-
.../Filter/ExtractStyleBlocks.php | 5 +-
library/vendor/HTMLPurifier/Generator.php | 2 +-
.../HTMLModule/TargetNoopener.php | 21 +++
.../vendor/HTMLPurifier/HTMLModuleManager.php | 5 +-
library/vendor/HTMLPurifier/Injector.php | 12 +-
library/vendor/HTMLPurifier/Length.php | 6 +-
library/vendor/HTMLPurifier/Lexer.php | 38 +++--
library/vendor/HTMLPurifier/Lexer/DOMLex.php | 91 +++++++++---
.../vendor/HTMLPurifier/Lexer/DirectLex.php | 16 +--
library/vendor/HTMLPurifier/Lexer/PH5P.php | 13 +-
library/vendor/HTMLPurifier/SOURCE | 14 +-
.../HTMLPurifier/Strategy/MakeWellFormed.php | 67 ++++++++-
library/vendor/HTMLPurifier/Token.php | 2 +-
library/vendor/HTMLPurifier/URI.php | 12 +-
library/vendor/HTMLPurifier/VERSION | 2 +-
37 files changed, 655 insertions(+), 164 deletions(-)
create mode 100644 library/vendor/HTMLPurifier/AttrTransform/TargetNoopener.php
create mode 100644 library/vendor/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
create mode 100644 library/vendor/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
create mode 100644 library/vendor/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt
mode change 100644 => 100755 library/vendor/HTMLPurifier/DefinitionCache/Serializer/README
create mode 100644 library/vendor/HTMLPurifier/HTMLModule/TargetNoopener.php
diff --git a/library/vendor/HTMLPurifier.autoload.php b/library/vendor/HTMLPurifier.autoload.php
index c3ea67e81..9d8d29926 100644
--- a/library/vendor/HTMLPurifier.autoload.php
+++ b/library/vendor/HTMLPurifier.autoload.php
@@ -14,10 +14,7 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
spl_autoload_register('__autoload');
}
} elseif (!function_exists('__autoload')) {
- function __autoload($class)
- {
- return HTMLPurifier_Bootstrap::autoload($class);
- }
+ require dirname(__FILE__) . '/HTMLPurifier.autoload-legacy.php';
}
if (ini_get('zend.ze1_compatibility_mode')) {
diff --git a/library/vendor/HTMLPurifier.php b/library/vendor/HTMLPurifier.php
index 38a78e8da..bada5188b 100644
--- a/library/vendor/HTMLPurifier.php
+++ b/library/vendor/HTMLPurifier.php
@@ -19,7 +19,7 @@
*/
/*
- HTML Purifier 4.8.0 - Standards Compliant HTML Filtering
+ HTML Purifier 4.10.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -58,12 +58,12 @@ class HTMLPurifier
* Version of HTML Purifier.
* @type string
*/
- public $version = '4.8.0';
+ public $version = '4.10.0';
/**
* Constant with version of HTML Purifier.
*/
- const VERSION = '4.8.0';
+ const VERSION = '4.10.0';
/**
* Global configuration object.
diff --git a/library/vendor/HTMLPurifier/Arborize.php b/library/vendor/HTMLPurifier/Arborize.php
index 9e6617be5..d2e9d22a2 100644
--- a/library/vendor/HTMLPurifier/Arborize.php
+++ b/library/vendor/HTMLPurifier/Arborize.php
@@ -19,8 +19,8 @@ class HTMLPurifier_Arborize
if ($token instanceof HTMLPurifier_Token_End) {
$token->start = null; // [MUT]
$r = array_pop($stack);
- assert($r->name === $token->name);
- assert(empty($token->attr));
+ //assert($r->name === $token->name);
+ //assert(empty($token->attr));
$r->endCol = $token->col;
$r->endLine = $token->line;
$r->endArmor = $token->armor;
@@ -32,7 +32,7 @@ class HTMLPurifier_Arborize
$stack[] = $node;
}
}
- assert(count($stack) == 1);
+ //assert(count($stack) == 1);
return $stack[0];
}
diff --git a/library/vendor/HTMLPurifier/AttrDef.php b/library/vendor/HTMLPurifier/AttrDef.php
index 5ac06522b..739646fa7 100644
--- a/library/vendor/HTMLPurifier/AttrDef.php
+++ b/library/vendor/HTMLPurifier/AttrDef.php
@@ -86,7 +86,13 @@ abstract class HTMLPurifier_AttrDef
*/
protected function mungeRgb($string)
{
- return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
+ $p = '\s*(\d+(\.\d+)?([%]?))\s*';
+
+ if (preg_match('/(rgba|hsla)\(/', $string)) {
+ return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
+ }
+
+ return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
}
/**
diff --git a/library/vendor/HTMLPurifier/AttrDef/CSS.php b/library/vendor/HTMLPurifier/AttrDef/CSS.php
index 2b977ca38..ad2cb90ad 100644
--- a/library/vendor/HTMLPurifier/AttrDef/CSS.php
+++ b/library/vendor/HTMLPurifier/AttrDef/CSS.php
@@ -27,13 +27,38 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$definition = $config->getCSSDefinition();
$allow_duplicates = $config->get("CSS.AllowDuplicates");
- // we're going to break the spec and explode by semicolons.
- // This is because semicolon rarely appears in escaped form
- // Doing this is generally flaky but fast
- // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
- // for details
- $declarations = explode(';', $css);
+ // According to the CSS2.1 spec, the places where a
+ // non-delimiting semicolon can appear are in strings
+ // escape sequences. So here is some dumb hack to
+ // handle quotes.
+ $len = strlen($css);
+ $accum = "";
+ $declarations = array();
+ $quoted = false;
+ for ($i = 0; $i < $len; $i++) {
+ $c = strcspn($css, ";'\"", $i);
+ $accum .= substr($css, $i, $c);
+ $i += $c;
+ if ($i == $len) break;
+ $d = $css[$i];
+ if ($quoted) {
+ $accum .= $d;
+ if ($d == $quoted) {
+ $quoted = false;
+ }
+ } else {
+ if ($d == ";") {
+ $declarations[] = $accum;
+ $accum = "";
+ } else {
+ $accum .= $d;
+ $quoted = $d;
+ }
+ }
+ }
+ if ($accum != "") $declarations[] = $accum;
+
$propvalues = array();
$new_declarations = '';
diff --git a/library/vendor/HTMLPurifier/AttrDef/CSS/Color.php b/library/vendor/HTMLPurifier/AttrDef/CSS/Color.php
index 16d2a6b98..d7287a00c 100644
--- a/library/vendor/HTMLPurifier/AttrDef/CSS/Color.php
+++ b/library/vendor/HTMLPurifier/AttrDef/CSS/Color.php
@@ -6,6 +6,16 @@
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
+ /**
+ * @type HTMLPurifier_AttrDef_CSS_AlphaValue
+ */
+ protected $alpha;
+
+ public function __construct()
+ {
+ $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ }
+
/**
* @param string $color
* @param HTMLPurifier_Config $config
@@ -29,59 +39,104 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
return $colors[$lower];
}
- if (strpos($color, 'rgb(') !== false) {
- // rgb literal handling
+ if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) {
return false;
}
- $triad = substr($color, 4, $length - 4 - 1);
- $parts = explode(',', $triad);
- if (count($parts) !== 3) {
+
+ // get used function : rgb, rgba, hsl or hsla
+ $function = $matches[1];
+
+ $parameters_size = 3;
+ $alpha_channel = false;
+ if (substr($function, -1) === 'a') {
+ $parameters_size = 4;
+ $alpha_channel = true;
+ }
+
+ /*
+ * Allowed types for values :
+ * parameter_position => [type => max_value]
+ */
+ $allowed_types = array(
+ 1 => array('percentage' => 100, 'integer' => 255),
+ 2 => array('percentage' => 100, 'integer' => 255),
+ 3 => array('percentage' => 100, 'integer' => 255),
+ );
+ $allow_different_types = false;
+
+ if (strpos($function, 'hsl') !== false) {
+ $allowed_types = array(
+ 1 => array('integer' => 360),
+ 2 => array('percentage' => 100),
+ 3 => array('percentage' => 100),
+ );
+ $allow_different_types = true;
+ }
+
+ $values = trim(str_replace($function, '', $color), ' ()');
+
+ $parts = explode(',', $values);
+ if (count($parts) !== $parameters_size) {
return false;
}
- $type = false; // to ensure that they're all the same type
+
+ $type = false;
$new_parts = array();
+ $i = 0;
+
foreach ($parts as $part) {
+ $i++;
$part = trim($part);
+
if ($part === '') {
return false;
}
- $length = strlen($part);
- if ($part[$length - 1] === '%') {
- // handle percents
- if (!$type) {
- $type = 'percentage';
- } elseif ($type !== 'percentage') {
+
+ // different check for alpha channel
+ if ($alpha_channel === true && $i === count($parts)) {
+ $result = $this->alpha->validate($part, $config, $context);
+
+ if ($result === false) {
return false;
}
- $num = (float)substr($part, 0, $length - 1);
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 100) {
- $num = 100;
- }
- $new_parts[] = "$num%";
+
+ $new_parts[] = (string)$result;
+ continue;
+ }
+
+ if (substr($part, -1) === '%') {
+ $current_type = 'percentage';
} else {
- // handle integers
- if (!$type) {
- $type = 'integer';
- } elseif ($type !== 'integer') {
- return false;
- }
- $num = (int)$part;
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 255) {
- $num = 255;
- }
- $new_parts[] = (string)$num;
+ $current_type = 'integer';
+ }
+
+ if (!array_key_exists($current_type, $allowed_types[$i])) {
+ return false;
+ }
+
+ if (!$type) {
+ $type = $current_type;
+ }
+
+ if ($allow_different_types === false && $type != $current_type) {
+ return false;
+ }
+
+ $max_value = $allowed_types[$i][$current_type];
+
+ if ($current_type == 'integer') {
+ // Return value between range 0 -> $max_value
+ $new_parts[] = (int)max(min($part, $max_value), 0);
+ } elseif ($current_type == 'percentage') {
+ $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
}
}
- $new_triad = implode(',', $new_parts);
- $color = "rgb($new_triad)";
+
+ $new_values = implode(',', $new_parts);
+
+ $color = $function . '(' . $new_values . ')';
} else {
// hexadecimal handling
if ($color[0] === '#') {
@@ -100,6 +155,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
}
return $color;
}
+
}
// vim: et sw=4 sts=4
diff --git a/library/vendor/HTMLPurifier/AttrDef/URI/Host.php b/library/vendor/HTMLPurifier/AttrDef/URI/Host.php
index 151f7aff7..e54a3344a 100644
--- a/library/vendor/HTMLPurifier/AttrDef/URI/Host.php
+++ b/library/vendor/HTMLPurifier/AttrDef/URI/Host.php
@@ -97,7 +97,7 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// PHP 5.3 and later support this functionality natively
if (function_exists('idn_to_ascii')) {
- return idn_to_ascii($string);
+ $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46);
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
@@ -123,13 +123,14 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
}
}
$string = implode('.', $new_parts);
- if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
- return $string;
- }
} catch (Exception $e) {
// XXX error reporting
}
}
+ // Try again
+ if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
+ return $string;
+ }
return false;
}
}
diff --git a/library/vendor/HTMLPurifier/AttrTransform/TargetNoopener.php b/library/vendor/HTMLPurifier/AttrTransform/TargetNoopener.php
new file mode 100644
index 000000000..1db3c6c09
--- /dev/null
+++ b/library/vendor/HTMLPurifier/AttrTransform/TargetNoopener.php
@@ -0,0 +1,37 @@
+get('CSS.MaxImgLength');
+ $this->info['min-width'] =
+ $this->info['max-width'] =
+ $this->info['min-height'] =
+ $this->info['max-height'] =
$this->info['width'] =
$this->info['height'] =
$max === null ?
diff --git a/library/vendor/HTMLPurifier/ChildDef/List.php b/library/vendor/HTMLPurifier/ChildDef/List.php
index 5a53a4b49..4fc70e0ef 100644
--- a/library/vendor/HTMLPurifier/ChildDef/List.php
+++ b/library/vendor/HTMLPurifier/ChildDef/List.php
@@ -50,7 +50,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
- $current_li = false;
+ $current_li = null;
foreach ($children as $node) {
if (!empty($node->is_whitespace)) {
@@ -71,7 +71,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
// to handle non-list elements; non-list elements should
// not be appended to an existing li; only li created
// for non-list. This distinction is not currently made.
- if ($current_li === false) {
+ if ($current_li === null) {
$current_li = new HTMLPurifier_Node_Element('li');
$result[] = $current_li;
}
diff --git a/library/vendor/HTMLPurifier/ChildDef/Table.php b/library/vendor/HTMLPurifier/ChildDef/Table.php
index 3e4a0f218..cb6b3e6cd 100644
--- a/library/vendor/HTMLPurifier/ChildDef/Table.php
+++ b/library/vendor/HTMLPurifier/ChildDef/Table.php
@@ -203,7 +203,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
$current_tr_tbody->children[] = $node;
break;
case '#PCDATA':
- assert($node->is_whitespace);
+ //assert($node->is_whitespace);
if ($current_tr_tbody === null) {
$ret[] = $node;
} else {
diff --git a/library/vendor/HTMLPurifier/Config.php b/library/vendor/HTMLPurifier/Config.php
index 7b9dcf0ec..f37cf3713 100644
--- a/library/vendor/HTMLPurifier/Config.php
+++ b/library/vendor/HTMLPurifier/Config.php
@@ -21,7 +21,7 @@ class HTMLPurifier_Config
* HTML Purifier's version
* @type string
*/
- public $version = '4.8.0';
+ public $version = '4.10.0';
/**
* Whether or not to automatically finalize
@@ -333,7 +333,7 @@ class HTMLPurifier_Config
}
// Raw type might be negative when using the fully optimized form
- // of stdclass, which indicates allow_null == true
+ // of stdClass, which indicates allow_null == true
$rtype = is_int($def) ? $def : $def->type;
if ($rtype < 0) {
$type = -$rtype;
diff --git a/library/vendor/HTMLPurifier/ConfigSchema.php b/library/vendor/HTMLPurifier/ConfigSchema.php
index bfbb0f92f..655c0e97a 100644
--- a/library/vendor/HTMLPurifier/ConfigSchema.php
+++ b/library/vendor/HTMLPurifier/ConfigSchema.php
@@ -24,11 +24,11 @@ class HTMLPurifier_ConfigSchema
*
* array(
* 'Namespace' => array(
- * 'Directive' => new stdclass(),
+ * 'Directive' => new stdClass(),
* )
* )
*
- * The stdclass may have the following properties:
+ * The stdClass may have the following properties:
*
* - If isAlias isn't set:
* - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
@@ -39,8 +39,8 @@ class HTMLPurifier_ConfigSchema
* - namespace: Namespace this directive aliases to
* - name: Directive name this directive aliases to
*
- * In certain degenerate cases, stdclass will actually be an integer. In
- * that case, the value is equivalent to an stdclass with the type
+ * In certain degenerate cases, stdClass will actually be an integer. In
+ * that case, the value is equivalent to an stdClass with the type
* property set to the integer. If the integer is negative, type is
* equal to the absolute value of integer, and allow_null is true.
*
@@ -105,7 +105,7 @@ class HTMLPurifier_ConfigSchema
*/
public function add($key, $default, $type, $allow_null)
{
- $obj = new stdclass();
+ $obj = new stdClass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) {
$obj->allow_null = true;
@@ -152,14 +152,14 @@ class HTMLPurifier_ConfigSchema
*/
public function addAlias($key, $new_key)
{
- $obj = new stdclass;
+ $obj = new stdClass;
$obj->key = $new_key;
$obj->isAlias = true;
$this->info[$key] = $obj;
}
/**
- * Replaces any stdclass that only has the type property with type integer.
+ * Replaces any stdClass that only has the type property with type integer.
*/
public function postProcess()
{
diff --git a/library/vendor/HTMLPurifier/ConfigSchema/schema.ser b/library/vendor/HTMLPurifier/ConfigSchema/schema.ser
index 0a7a406e132dd5dfe79e4b52f8f44b7d4f8d5481..371e948f1c76d99bacea65b4735454656858edbf 100644
GIT binary patch
delta 384
zcmaD?xw&S7Iiu-jKVddDc}puL=lr5nJ;(I)qSWHz%(B#+%AnNT{Ib;G&7YIsj=Cyvv2mt72fR+FN
delta 101
zcmdm7^R9A&Iiu0$UBYZ^n_o&UW8S=3Hk5VpBMa%x2l#(7
+ This directive enables aggressive pre-filter removal of
+ script tags. This is not necessary for security,
+ but it can help work around a bug in libxml where embedded
+ HTML elements inside script sections cause the parser to
+ choke. To revert to pre-4.9.0 behavior, set this to false.
+ This directive has no effect if %Core.Trusted is true,
+ %Core.RemoveScriptContents is false, or %Core.HiddenElements
+ does not contain script.
+
+--# vim: et sw=4 sts=4
diff --git a/library/vendor/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt b/library/vendor/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
new file mode 100644
index 000000000..392b43649
--- /dev/null
+++ b/library/vendor/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
@@ -0,0 +1,36 @@
+Core.LegacyEntityDecoder
+TYPE: bool
+VERSION: 4.9.0
+DEFAULT: false
+--DESCRIPTION--
+
+ Prior to HTML Purifier 4.9.0, entities were decoded by performing
+ a global search replace for all entities whose decoded versions
+ did not have special meanings under HTML, and replaced them with
+ their decoded versions. We would match all entities, even if they did
+ not have a trailing semicolon, but only if there weren't any trailing
+ alphanumeric characters.
+
+
+Original | Text | Attribute |
+¥ | ¥ | ¥ |
+¥ | ¥ | ¥ |
+¥a | ¥a | ¥a |
+¥= | ¥= | ¥= |
+
+
+ In HTML Purifier 4.9.0, we changed the behavior of entity parsing
+ to match entities that had missing trailing semicolons in less
+ cases, to more closely match HTML5 parsing behavior:
+
+
+Original | Text | Attribute |
+¥ | ¥ | ¥ |
+¥ | ¥ | ¥ |
+¥a | ¥a | ¥a |
+¥= | ¥= | ¥= |
+
+
+ This flag reverts back to pre-HTML Purifier 4.9.0 behavior.
+
+--# vim: et sw=4 sts=4
diff --git a/library/vendor/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt b/library/vendor/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt
new file mode 100644
index 000000000..dd514c0de
--- /dev/null
+++ b/library/vendor/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt
@@ -0,0 +1,10 @@
+--# vim: et sw=4 sts=4
+HTML.TargetNoopener
+TYPE: bool
+VERSION: 4.8.0
+DEFAULT: TRUE
+--DESCRIPTION--
+If enabled, noopener rel attributes are added to links which have
+a target attribute associated with them. This prevents malicious
+destinations from overwriting the original window.
+--# vim: et sw=4 sts=4
diff --git a/library/vendor/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt b/library/vendor/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
index 728e378cb..834bc08c0 100644
--- a/library/vendor/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
+++ b/library/vendor/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
@@ -1,5 +1,5 @@
URI.DefaultScheme
-TYPE: string
+TYPE: string/null
DEFAULT: 'http'
--DESCRIPTION--
@@ -7,4 +7,9 @@ DEFAULT: 'http'
Defines through what scheme the output will be served, in order to
select the proper object validator when no scheme information is present.
+
+
+ Starting with HTML Purifier 4.9.0, the default scheme can be null, in
+ which case we reject all URIs which do not have explicit schemes.
+
--# vim: et sw=4 sts=4
diff --git a/library/vendor/HTMLPurifier/DefinitionCache/Serializer.php b/library/vendor/HTMLPurifier/DefinitionCache/Serializer.php
index f930c6b94..b82c6bb20 100644
--- a/library/vendor/HTMLPurifier/DefinitionCache/Serializer.php
+++ b/library/vendor/HTMLPurifier/DefinitionCache/Serializer.php
@@ -112,6 +112,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
}
unlink($dir . '/' . $filename);
}
+ closedir($dh);
return true;
}
@@ -142,6 +143,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
unlink($dir . '/' . $filename);
}
}
+ closedir($dh);
return true;
}
@@ -198,11 +200,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
if ($result !== false) {
// set permissions of the new file (no execute)
$chmod = $config->get('Cache.SerializerPermissions');
- if ($chmod === null) {
- // don't do anything
- } else {
- $chmod = $chmod & 0666;
- chmod($file, $chmod);
+ if ($chmod !== null) {
+ chmod($file, $chmod & 0666);
}
}
return $result;
@@ -217,6 +216,16 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
{
$directory = $this->generateDirectoryPath($config);
$chmod = $config->get('Cache.SerializerPermissions');
+ if ($chmod === null) {
+ if (!@mkdir($directory) && !is_dir($directory)) {
+ trigger_error(
+ 'Could not create directory ' . $directory . '',
+ E_USER_WARNING
+ );
+ return false;
+ }
+ return true;
+ }
if (!is_dir($directory)) {
$base = $this->generateBaseDirectoryPath($config);
if (!is_dir($base)) {
@@ -229,25 +238,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
} elseif (!$this->_testPermissions($base, $chmod)) {
return false;
}
- if ($chmod === null) {
+ if (!@mkdir($directory, $chmod) && !is_dir($directory)) {
trigger_error(
- 'Base directory ' . $base . ' does not exist,
- please create or change using %Cache.SerializerPath',
+ 'Could not create directory ' . $directory . '',
E_USER_WARNING
);
return false;
}
- if ($chmod !== null) {
- mkdir($directory, $chmod);
- } else {
- mkdir($directory);
- }
if (!$this->_testPermissions($directory, $chmod)) {
- trigger_error(
- 'Base directory ' . $base . ' does not exist,
- please create or change using %Cache.SerializerPath',
- E_USER_WARNING
- );
return false;
}
} elseif (!$this->_testPermissions($directory, $chmod)) {
diff --git a/library/vendor/HTMLPurifier/DefinitionCache/Serializer/README b/library/vendor/HTMLPurifier/DefinitionCache/Serializer/README
old mode 100644
new mode 100755
diff --git a/library/vendor/HTMLPurifier/Encoder.php b/library/vendor/HTMLPurifier/Encoder.php
index fef9b5890..b94f17542 100644
--- a/library/vendor/HTMLPurifier/Encoder.php
+++ b/library/vendor/HTMLPurifier/Encoder.php
@@ -101,6 +101,14 @@ class HTMLPurifier_Encoder
* It will parse according to UTF-8 and return a valid UTF8 string, with
* non-SGML codepoints excluded.
*
+ * Specifically, it will permit:
+ * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}
+ * Source: https://www.w3.org/TR/REC-xml/#NT-Char
+ * Arguably this function should be modernized to the HTML5 set
+ * of allowed characters:
+ * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
+ * which simultaneously expand and restrict the set of allowed characters.
+ *
* @param string $str The string to clean
* @param bool $force_php
* @return string
@@ -122,15 +130,12 @@ class HTMLPurifier_Encoder
* function that needs to be able to understand UTF-8 characters.
* As of right now, only smart lossless character encoding converters
* would need that, and I'm probably not going to implement them.
- * Once again, PHP 6 should solve all our problems.
*/
public static function cleanUTF8($str, $force_php = false)
{
// UTF-8 validity is checked since PHP 4.3.5
// This is an optimization: if the string is already valid UTF-8, no
// need to do PHP stuff. 99% of the time, this will be the case.
- // The regexp matches the XML char production, as well as well as excluding
- // non-SGML codepoints U+007F to U+009F
if (preg_match(
'/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
$str
@@ -255,6 +260,7 @@ class HTMLPurifier_Encoder
// 7F-9F is not strictly prohibited by XML,
// but it is non-SGML, and thus we don't allow it
(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
+ (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
)
) {
diff --git a/library/vendor/HTMLPurifier/EntityParser.php b/library/vendor/HTMLPurifier/EntityParser.php
index 61529dcd9..c372b5a6a 100644
--- a/library/vendor/HTMLPurifier/EntityParser.php
+++ b/library/vendor/HTMLPurifier/EntityParser.php
@@ -16,6 +16,138 @@ class HTMLPurifier_EntityParser
*/
protected $_entity_lookup;
+ /**
+ * Callback regex string for entities in text.
+ * @type string
+ */
+ protected $_textEntitiesRegex;
+
+ /**
+ * Callback regex string for entities in attributes.
+ * @type string
+ */
+ protected $_attrEntitiesRegex;
+
+ /**
+ * Tests if the beginning of a string is a semi-optional regex
+ */
+ protected $_semiOptionalPrefixRegex;
+
+ public function __construct() {
+ // From
+ // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
+ $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
+
+ // NB: three empty captures to put the fourth match in the right
+ // place
+ $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
+
+ $this->_textEntitiesRegex =
+ '/&(?:'.
+ // hex
+ '[#]x([a-fA-F0-9]+);?|'.
+ // dec
+ '[#]0*(\d+);?|'.
+ // string (mandatory semicolon)
+ // NB: order matters: match semicolon preferentially
+ '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
+ // string (optional semicolon)
+ "($semi_optional)".
+ ')/';
+
+ $this->_attrEntitiesRegex =
+ '/&(?:'.
+ // hex
+ '[#]x([a-fA-F0-9]+);?|'.
+ // dec
+ '[#]0*(\d+);?|'.
+ // string (mandatory semicolon)
+ // NB: order matters: match semicolon preferentially
+ '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
+ // string (optional semicolon)
+ // don't match if trailing is equals or alphanumeric (URL
+ // like)
+ "($semi_optional)(?![=;A-Za-z0-9])".
+ ')/';
+
+ }
+
+ /**
+ * Substitute entities with the parsed equivalents. Use this on
+ * textual data in an HTML document (as opposed to attributes.)
+ *
+ * @param string $string String to have entities parsed.
+ * @return string Parsed string.
+ */
+ public function substituteTextEntities($string)
+ {
+ return preg_replace_callback(
+ $this->_textEntitiesRegex,
+ array($this, 'entityCallback'),
+ $string
+ );
+ }
+
+ /**
+ * Substitute entities with the parsed equivalents. Use this on
+ * attribute contents in documents.
+ *
+ * @param string $string String to have entities parsed.
+ * @return string Parsed string.
+ */
+ public function substituteAttrEntities($string)
+ {
+ return preg_replace_callback(
+ $this->_attrEntitiesRegex,
+ array($this, 'entityCallback'),
+ $string
+ );
+ }
+
+ /**
+ * Callback function for substituteNonSpecialEntities() that does the work.
+ *
+ * @param array $matches PCRE matches array, with 0 the entire match, and
+ * either index 1, 2 or 3 set with a hex value, dec value,
+ * or string (respectively).
+ * @return string Replacement string.
+ */
+
+ protected function entityCallback($matches)
+ {
+ $entity = $matches[0];
+ $hex_part = @$matches[1];
+ $dec_part = @$matches[2];
+ $named_part = empty($matches[3]) ? @$matches[4] : $matches[3];
+ if ($hex_part !== NULL && $hex_part !== "") {
+ return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
+ } elseif ($dec_part !== NULL && $dec_part !== "") {
+ return HTMLPurifier_Encoder::unichr((int) $dec_part);
+ } else {
+ if (!$this->_entity_lookup) {
+ $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
+ }
+ if (isset($this->_entity_lookup->table[$named_part])) {
+ return $this->_entity_lookup->table[$named_part];
+ } else {
+ // exact match didn't match anything, so test if
+ // any of the semicolon optional match the prefix.
+ // Test that this is an EXACT match is important to
+ // prevent infinite loop
+ if (!empty($matches[3])) {
+ return preg_replace_callback(
+ $this->_semiOptionalPrefixRegex,
+ array($this, 'entityCallback'),
+ $entity
+ );
+ }
+ return $entity;
+ }
+ }
+ }
+
+ // LEGACY CODE BELOW
+
/**
* Callback regex string for parsing entities.
* @type string
@@ -144,7 +276,7 @@ class HTMLPurifier_EntityParser
$entity;
} else {
return isset($this->_special_ent2dec[$matches[3]]) ?
- $this->_special_ent2dec[$matches[3]] :
+ $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
$entity;
}
}
diff --git a/library/vendor/HTMLPurifier/Filter/ExtractStyleBlocks.php b/library/vendor/HTMLPurifier/Filter/ExtractStyleBlocks.php
index 08e62c16b..66f70b0fc 100644
--- a/library/vendor/HTMLPurifier/Filter/ExtractStyleBlocks.php
+++ b/library/vendor/HTMLPurifier/Filter/ExtractStyleBlocks.php
@@ -95,7 +95,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
if ($tidy !== null) {
$this->_tidy = $tidy;
}
- $html = preg_replace_callback('##isU', array($this, 'styleCallback'), $html);
+ // NB: this must be NON-greedy because if we have
+ //
+ // we must not grab foo