mirror of
				https://github.com/Icinga/icingaweb2.git
				synced 2025-11-03 20:54:27 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			182 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			182 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
/**
 | 
						|
 * Takes a well formed list of tokens and fixes their nesting.
 | 
						|
 *
 | 
						|
 * HTML elements dictate which elements are allowed to be their children,
 | 
						|
 * for example, you can't have a p tag in a span tag.  Other elements have
 | 
						|
 * much more rigorous definitions: tables, for instance, require a specific
 | 
						|
 * order for their elements.  There are also constraints not expressible by
 | 
						|
 * document type definitions, such as the chameleon nature of ins/del
 | 
						|
 * tags and global child exclusions.
 | 
						|
 *
 | 
						|
 * The first major objective of this strategy is to iterate through all
 | 
						|
 * the nodes and determine whether or not their children conform to the
 | 
						|
 * element's definition.  If they do not, the child definition may
 | 
						|
 * optionally supply an amended list of elements that is valid or
 | 
						|
 * require that the entire node be deleted (and the previous node
 | 
						|
 * rescanned).
 | 
						|
 *
 | 
						|
 * The second objective is to ensure that explicitly excluded elements of
 | 
						|
 * an element do not appear in its children.  Code that accomplishes this
 | 
						|
 * task is pervasive through the strategy, though the two are distinct tasks
 | 
						|
 * and could, theoretically, be seperated (although it's not recommended).
 | 
						|
 *
 | 
						|
 * @note Whether or not unrecognized children are silently dropped or
 | 
						|
 *       translated into text depends on the child definitions.
 | 
						|
 *
 | 
						|
 * @todo Enable nodes to be bubbled out of the structure.  This is
 | 
						|
 *       easier with our new algorithm.
 | 
						|
 */
 | 
						|
 | 
						|
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
 | 
						|
{
 | 
						|
 | 
						|
    /**
 | 
						|
     * @param HTMLPurifier_Token[] $tokens
 | 
						|
     * @param HTMLPurifier_Config $config
 | 
						|
     * @param HTMLPurifier_Context $context
 | 
						|
     * @return array|HTMLPurifier_Token[]
 | 
						|
     */
 | 
						|
    public function execute($tokens, $config, $context)
 | 
						|
    {
 | 
						|
 | 
						|
        //####################################################################//
 | 
						|
        // Pre-processing
 | 
						|
 | 
						|
        // O(n) pass to convert to a tree, so that we can efficiently
 | 
						|
        // refer to substrings
 | 
						|
        $top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context);
 | 
						|
 | 
						|
        // get a copy of the HTML definition
 | 
						|
        $definition = $config->getHTMLDefinition();
 | 
						|
 | 
						|
        $excludes_enabled = !$config->get('Core.DisableExcludes');
 | 
						|
 | 
						|
        // setup the context variable 'IsInline', for chameleon processing
 | 
						|
        // is 'false' when we are not inline, 'true' when it must always
 | 
						|
        // be inline, and an integer when it is inline for a certain
 | 
						|
        // branch of the document tree
 | 
						|
        $is_inline = $definition->info_parent_def->descendants_are_inline;
 | 
						|
        $context->register('IsInline', $is_inline);
 | 
						|
 | 
						|
        // setup error collector
 | 
						|
        $e =& $context->get('ErrorCollector', true);
 | 
						|
 | 
						|
        //####################################################################//
 | 
						|
        // Loop initialization
 | 
						|
 | 
						|
        // stack that contains all elements that are excluded
 | 
						|
        // it is organized by parent elements, similar to $stack,
 | 
						|
        // but it is only populated when an element with exclusions is
 | 
						|
        // processed, i.e. there won't be empty exclusions.
 | 
						|
        $exclude_stack = array($definition->info_parent_def->excludes);
 | 
						|
 | 
						|
        // variable that contains the start token while we are processing
 | 
						|
        // nodes. This enables error reporting to do its job
 | 
						|
        $node = $top_node;
 | 
						|
        // dummy token
 | 
						|
        list($token, $d) = $node->toTokenPair();
 | 
						|
        $context->register('CurrentNode', $node);
 | 
						|
        $context->register('CurrentToken', $token);
 | 
						|
 | 
						|
        //####################################################################//
 | 
						|
        // Loop
 | 
						|
 | 
						|
        // We need to implement a post-order traversal iteratively, to
 | 
						|
        // avoid running into stack space limits.  This is pretty tricky
 | 
						|
        // to reason about, so we just manually stack-ify the recursive
 | 
						|
        // variant:
 | 
						|
        //
 | 
						|
        //  function f($node) {
 | 
						|
        //      foreach ($node->children as $child) {
 | 
						|
        //          f($child);
 | 
						|
        //      }
 | 
						|
        //      validate($node);
 | 
						|
        //  }
 | 
						|
        //
 | 
						|
        // Thus, we will represent a stack frame as array($node,
 | 
						|
        // $is_inline, stack of children)
 | 
						|
        // e.g. array_reverse($node->children) - already processed
 | 
						|
        // children.
 | 
						|
 | 
						|
        $parent_def = $definition->info_parent_def;
 | 
						|
        $stack = array(
 | 
						|
            array($top_node,
 | 
						|
                  $parent_def->descendants_are_inline,
 | 
						|
                  $parent_def->excludes, // exclusions
 | 
						|
                  0)
 | 
						|
            );
 | 
						|
 | 
						|
        while (!empty($stack)) {
 | 
						|
            list($node, $is_inline, $excludes, $ix) = array_pop($stack);
 | 
						|
            // recursive call
 | 
						|
            $go = false;
 | 
						|
            $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name];
 | 
						|
            while (isset($node->children[$ix])) {
 | 
						|
                $child = $node->children[$ix++];
 | 
						|
                if ($child instanceof HTMLPurifier_Node_Element) {
 | 
						|
                    $go = true;
 | 
						|
                    $stack[] = array($node, $is_inline, $excludes, $ix);
 | 
						|
                    $stack[] = array($child,
 | 
						|
                        // ToDo: I don't think it matters if it's def or
 | 
						|
                        // child_def, but double check this...
 | 
						|
                        $is_inline || $def->descendants_are_inline,
 | 
						|
                        empty($def->excludes) ? $excludes
 | 
						|
                                              : array_merge($excludes, $def->excludes),
 | 
						|
                        0);
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
            };
 | 
						|
            if ($go) continue;
 | 
						|
            list($token, $d) = $node->toTokenPair();
 | 
						|
            // base case
 | 
						|
            if ($excludes_enabled && isset($excludes[$node->name])) {
 | 
						|
                $node->dead = true;
 | 
						|
                if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
 | 
						|
            } else {
 | 
						|
                // XXX I suppose it would be slightly more efficient to
 | 
						|
                // avoid the allocation here and have children
 | 
						|
                // strategies handle it
 | 
						|
                $children = array();
 | 
						|
                foreach ($node->children as $child) {
 | 
						|
                    if (!$child->dead) $children[] = $child;
 | 
						|
                }
 | 
						|
                $result = $def->child->validateChildren($children, $config, $context);
 | 
						|
                if ($result === true) {
 | 
						|
                    // nop
 | 
						|
                    $node->children = $children;
 | 
						|
                } elseif ($result === false) {
 | 
						|
                    $node->dead = true;
 | 
						|
                    if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
 | 
						|
                } else {
 | 
						|
                    $node->children = $result;
 | 
						|
                    if ($e) {
 | 
						|
                        // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators
 | 
						|
                        if (empty($result) && !empty($children)) {
 | 
						|
                            $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
 | 
						|
                        } else if ($result != $children) {
 | 
						|
                            $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        //####################################################################//
 | 
						|
        // Post-processing
 | 
						|
 | 
						|
        // remove context variables
 | 
						|
        $context->destroy('IsInline');
 | 
						|
        $context->destroy('CurrentNode');
 | 
						|
        $context->destroy('CurrentToken');
 | 
						|
 | 
						|
        //####################################################################//
 | 
						|
        // Return
 | 
						|
 | 
						|
        return HTMLPurifier_Arborize::flatten($node, $config, $context);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
// vim: et sw=4 sts=4
 |