Name: Password: Want to register?

API Documentation

[Parent dir]

Information about libs/HtmlFilter.php

Classes

Code

1<?php
2class HtmlFilter{ //Version: 3.1
3    const OPENS = 1;
4    const CLOSES = 2;
5    const SINGLE = 3;
6
7    public $goodTagsRE = 'i|b|a|em|strong|hr|ul|ol|li|img';
8    public $goodAttrs = array('href', 'src', 'class', 'alt', 'title');
9    public $goodProtocolsRE = 'https?|s?ftp|mailto|torrent';
10    public $defaultProtocol = 'http://'; //prepended to URLs without above protocols
11    public $linkAttrs = array('href', 'src'); //these are checked for protocols
12    public $wrapLength = 80;
13    public $encoding = 'UTF-8'; //used for htmlspecialchars
14    
15    function __construct($goodTags = NULL, $goodAttrs = NULL, $goodProtocols = NULL, $wrapLength = NULL){
16        if (isset($goodTags)) { $this->goodTagsRE = join('|', $goodTags); }
17        if (isset($goodAttrs)) { $this->goodAttrs = $goodAttrs; }
18        if (isset($goodProtocols)) { $this->goodProtocolsRE = join('|', $goodProtocols); }
19        if (isset($wrapLength)) { $this->wrapLength = $wrapLength; }
20    }
21
22    function cleanBreak($html) {
23        if ($this->wrapLength > 0) {
24            $html = wordwrap($html, $this->wrapLength, ' ', TRUE);
25        }
26        return $this->clean($html);
27    }
28
29    protected function escape($htmls){ //need for preg_replace_callback in clean
30        return htmlspecialchars($htmls[0], ENT_QUOTES, $this->encoding, FALSE);
31    }
32    
33    function clean($html){
34        $html = preg_replace_callback('#(<!--.*?-->)#s', array($this, 'escape'), $html); //escape comments
35        //match approved tags in all possible forms
36        $tagOpenRE = "<(?:$this->goodTagsRE)(?:\s.*?)?/?>"; //opening or empty
37        $tagCloseRE = "</(?:$this->goodTagsRE)\s*?>"; //closing
38        $parts = preg_split("#($tagOpenRE|$tagCloseRE)#si", $html, -1, PREG_SPLIT_DELIM_CAPTURE);
39
40        $openTags = array(); //a stack to find mismatched tags
41        foreach ($parts as $key => &$part) {
42            if ($key % 2 == 0) { //even - text, possibly with bad tags
43                $part = htmlspecialchars($part, ENT_QUOTES, $this->encoding, FALSE);
44            } else { //odd - an aproved tag
45                $tagInfo = $this->parseTag($part);
46                $worthyTag = TRUE;
47                foreach ($tagInfo['attrs'] as $attrName => &$attrValue) {
48                    if (!in_array($attrName, $this->goodAttrs)) {
49                        $worthyTag = FALSE;
50                        break;
51                    }
52                    if (in_array($attrName, $this->linkAttrs)) { //if attribute is a link, enforce a good protocol
53                        if (!preg_match("#^$this->goodProtocolsRE:#i", $attrValue)) {
54                            $attrValue = $this->defaultProtocol.$attrValue;
55                        }
56                    }
57                }
58                if ($tagInfo['type'] == self::OPENS) {
59                    $openTags[] = array('name' => $tagInfo['name'], 'key' => $key);
60                } elseif ($tagInfo['type'] == self::CLOSES) {
61                    $open = $openTags[sizeof($openTags) - 1];
62                    if ($open['name'] == $tagInfo['name']) {
63                        array_pop($openTags); //this tag closes an open one
64                    } else {
65                        $worthyTag = FALSE; //we expect another tag
66                    }
67                }
68                if ($worthyTag) {
69                    $part = $this->unparseTag($tagInfo);
70                } else {
71                    $part = htmlspecialchars($part, ENT_QUOTES, $this->encoding, FALSE);
72                }
73            }
74        }
75        foreach ($openTags as $open) { //comment out all unclosed tags
76            $key = $open['key'];
77            $parts[$key] = htmlspecialchars($parts[$key], ENT_QUOTES, $this->encoding, FALSE);
78        }
79        return join($parts);
80    }
81    
82    function unparseTag($tagInfo){ //escapes only attr content, the rest is handled in clean()
83        $attrs = '';
84        foreach ($tagInfo['attrs'] as $name => $value) {
85            $value = htmlspecialchars($value, ENT_QUOTES, $this->encoding, FALSE);
86            $attrs .= ' '.$name.'="'.$value.'"';
87        }
88
89        if ($tagInfo['type'] == self::OPENS) {
90            return "<$tagInfo[name]$attrs>";
91        } elseif ($tagInfo['type'] == self::CLOSES) {
92            return "</$tagInfo[name]>";
93        } else {
94            return "<$tagInfo[name]$attrs />";
95        }
96    }
97
Takes a string representing a single tag (including angled braces) and puts it into machine-readable form (array). (2008.12.12 11:40)[update][delete]
98 function parseTag($tagStr){ 99 $tagInfo = array('type' => self::OPENS, 'name' => '', 'attrs' => array()); 100 if ($tagStr{1} == '/') { 101 $tagInfo['type'] = self::CLOSES; 102 } elseif ($tagStr{strlen($tagStr) - 2} == '/') { 103 $tagInfo['type'] = self::SINGLE; 104 } 105 106 $matches = array(); 107 preg_match("#^</?([\w\d_:\-]+)#", $tagStr, $matches); 108 $tagInfo['name'] = $matches[1]; 109 110 if ($tagInfo['type'] != self::CLOSES) { 111 $attrsRE = '#\s+([\w\d_:\-]+)\w*=\w*(["|\'])(.*?)\2#'; 112 $offset = strlen($tagInfo['name']) + 1; 113 preg_match_all($attrsRE, $tagStr, $matches, PREG_SET_ORDER, $offset); 114 foreach ($matches as $match) { 115 $tagInfo['attrs'][ $match[1] ] = $match[3]; 116 } 117 } 118 119 return $tagInfo; 120 } 121}
[download]
Made with Notepad++ Also on SourceForge.net