1<?php
2class HtmlFilter{ const OPENS = 1;
4 const CLOSES = 2;
5 const SINGLE = 3;
6
7 public $goodTagsRE = 'i|b|a|em|strong|hr|ul|ol|li|img';
8 public $goodAttrs = array('href', 'src', 'class', 'alt', 'title');
9 public $goodProtocolsRE = 'https?|s?ftp|mailto|torrent';
10 public $defaultProtocol = 'http://'; public $linkAttrs = array('href', 'src'); public $wrapLength = 80;
13 public $encoding = 'UTF-8';
15 function __construct($goodTags = NULL, $goodAttrs = NULL, $goodProtocols = NULL, $wrapLength = NULL){
16 if (isset($goodTags)) { $this->goodTagsRE = join('|', $goodTags); }
17 if (isset($goodAttrs)) { $this->goodAttrs = $goodAttrs; }
18 if (isset($goodProtocols)) { $this->goodProtocolsRE = join('|', $goodProtocols); }
19 if (isset($wrapLength)) { $this->wrapLength = $wrapLength; }
20 }
21
22 function cleanBreak($html) {
23 if ($this->wrapLength > 0) {
24 $html = wordwrap($html, $this->wrapLength, ' ', TRUE);
25 }
26 return $this->clean($html);
27 }
28
29 protected function escape($htmls){ return htmlspecialchars($htmls[0], ENT_QUOTES, $this->encoding, FALSE);
31 }
32
33 function clean($html){
34 $html = preg_replace_callback('#(<!--.*?-->)#s', array($this, 'escape'), $html); $tagOpenRE = "<(?:$this->goodTagsRE)(?:\s.*?)?/?>"; $tagCloseRE = "</(?:$this->goodTagsRE)\s*?>"; $parts = preg_split("#($tagOpenRE|$tagCloseRE)#si", $html, -1, PREG_SPLIT_DELIM_CAPTURE);
39
40 $openTags = array(); foreach ($parts as $key => &$part) {
42 if ($key % 2 == 0) { $part = htmlspecialchars($part, ENT_QUOTES, $this->encoding, FALSE);
44 } else { $tagInfo = $this->parseTag($part);
46 $worthyTag = TRUE;
47 foreach ($tagInfo['attrs'] as $attrName => &$attrValue) {
48 if (!in_array($attrName, $this->goodAttrs)) {
49 $worthyTag = FALSE;
50 break;
51 }
52 if (in_array($attrName, $this->linkAttrs)) { if (!preg_match("#^$this->goodProtocolsRE:#i", $attrValue)) {
54 $attrValue = $this->defaultProtocol.$attrValue;
55 }
56 }
57 }
58 if ($tagInfo['type'] == self::OPENS) {
59 $openTags[] = array('name' => $tagInfo['name'], 'key' => $key);
60 } elseif ($tagInfo['type'] == self::CLOSES) {
61 $open = $openTags[sizeof($openTags) - 1];
62 if ($open['name'] == $tagInfo['name']) {
63 array_pop($openTags); } else {
65 $worthyTag = FALSE; }
67 }
68 if ($worthyTag) {
69 $part = $this->unparseTag($tagInfo);
70 } else {
71 $part = htmlspecialchars($part, ENT_QUOTES, $this->encoding, FALSE);
72 }
73 }
74 }
75 foreach ($openTags as $open) { $key = $open['key'];
77 $parts[$key] = htmlspecialchars($parts[$key], ENT_QUOTES, $this->encoding, FALSE);
78 }
79 return join($parts);
80 }
81
82 function unparseTag($tagInfo){ $attrs = '';
84 foreach ($tagInfo['attrs'] as $name => $value) {
85 $value = htmlspecialchars($value, ENT_QUOTES, $this->encoding, FALSE);
86 $attrs .= ' '.$name.'="'.$value.'"';
87 }
88
89 if ($tagInfo['type'] == self::OPENS) {
90 return "<$tagInfo[name]$attrs>";
91 } elseif ($tagInfo['type'] == self::CLOSES) {
92 return "</$tagInfo[name]>";
93 } else {
94 return "<$tagInfo[name]$attrs />";
95 }
96 }
97
98 function parseTag($tagStr){
99 $tagInfo = array('type' => self::OPENS, 'name' => '', 'attrs' => array());
100 if ($tagStr{1} == '/') {
101 $tagInfo['type'] = self::CLOSES;
102 } elseif ($tagStr{strlen($tagStr) - 2} == '/') {
103 $tagInfo['type'] = self::SINGLE;
104 }
105
106 $matches = array();
107 preg_match("#^</?([\w\d_:\-]+)#", $tagStr, $matches);
108 $tagInfo['name'] = $matches[1];
109
110 if ($tagInfo['type'] != self::CLOSES) {
111 $attrsRE = '#\s+([\w\d_:\-]+)\w*=\w*(["|\'])(.*?)\2#';
112 $offset = strlen($tagInfo['name']) + 1;
113 preg_match_all($attrsRE, $tagStr, $matches, PREG_SET_ORDER, $offset);
114 foreach ($matches as $match) {
115 $tagInfo['attrs'][ $match[1] ] = $match[3];
116 }
117 }
118
119 return $tagInfo;
120 }
121}