ssage' => 'Invalid \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
// If the tag isn't in the whitelist, remove it and continue with next iteration
if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
$this->cleanHrefs($currentElement);
$this->cleanXlinkHrefs($currentElement);
$this->cleanAttributesOnWhitelist($currentElement);
if (strtolower($currentElement->tagName) === 'use') {
if ($this->isUseTagDirty($currentElement)
|| $this->isUseTagExceedingThreshold($currentElement)
) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
}
}
}
/**
* Only allow attributes that are on the whitelist
*
* @param \DOMElement $element
*/
protected function cleanAttributesOnWhitelist(\DOMElement $element)
{
for ($x = $element->attributes->length - 1; $x >= 0; $x--) {
// get attribute name
$attrName = $element->attributes->item($x)->name;
// Remove attribute if not in whitelist
if (!in_array(strtolower($attrName), $this->allowedAttrs) && !$this->isAriaAttribute(strtolower($attrName)) && !$this->isDataAttribute(strtolower($attrName))) {
$element->removeAttribute($attrName);
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'' . $attrName . '\'',
'line' => $element->getLineNo(),
);
}
/**
* This is used for when a namespace isn't imported properly.
* Such as xlink:href when the xlink namespace isn't imported.
* We have to do this as the link is still ran in this case.
*/
if (false !== strpos($attrName, 'href')) {
$href = $element->getAttribute($attrName);
if (false === $this->isHrefSafeValue($href)) {
$element->removeAttribute($attrName);
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
}
// Do we want to strip remote references?
if($this->removeRemoteReferences) {
// Remove attribute if it has a remote reference
if (isset($element->attributes->item($x)->value) && $this->hasRemoteReference($element->attributes->item($x)->value)) {
$element->removeAttribute($attrName);
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'' . $attrName . '\'',
'line' => $element->getLineNo(),
);
}
}
}
}
/**
* Clean the xlink:hrefs of script and data embeds
*
* @param \DOMElement $element
*/
protected function cleanXlinkHrefs(\DOMElement $element)
{
$xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href');
if (false === $this->isHrefSafeValue($xlinks)) {
$element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
}
/**
* Clean the hrefs of script and data embeds
*
* @param \DOMElement $element
*/
protected function cleanHrefs(\DOMElement $element)
{
$href = $element->getAttribute('href');
if (false === $this->isHrefSafeValue($href)) {
$element->removeAttribute('href');
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
}
/**
* Only allow whitelisted starts to be within the href.
*
* This will stop scripts etc from being passed through, with or without attempting to hide bypasses.
* This stops the need for us to use a complicated script regex.
*
* @param $value
* @return bool
*/
protected function isHrefSafeValue($value) {
// Allow empty values
if (empty($value)) {
return true;
}
// Allow fragment identifiers.
if ('#' === substr($value, 0, 1)) {
return true;
}
// Allow relative URIs.
if ('/' === substr($value, 0, 1)) {
return true;
}
// Allow HTTPS domains.
if ('https://' === substr($value, 0, 8)) {
return true;
}
// Allow HTTP domains.
if ('http://' === substr($value, 0, 7)) {
return true;
}
// Allow known data URIs.
if (in_array(substr($value, 0, 14), array(
'data:image/png', // PNG
'data:image/gif', // GIF
'data:image/jpg', // JPG
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
return true;
}
// Allow known short data URIs.
if (in_array(substr($value, 0, 12), array(
'data:img/png', // PNG
'data:img/gif', // GIF
'data:img/jpg', // JPG
'data:img/jpe', // JPEG
'data:img/pjp', // PJPEG
))) {
return true;
}
return false;
}
/**
* Removes non-printable ASCII characters from string & trims it
*
* @param string $value
* @return bool
*/
protected function removeNonPrintableCharacters($value)
{
return trim(preg_replace('/[^ -~]/xu','',$value));
}
/**
* Does this attribute value have a remote reference?
*
* @param $value
* @return bool
*/
protected function hasRemoteReference($value)
{
$value = $this->removeNonPrintableCharacters($value);
$wrapped_in_url = preg_match('~^url\(\s*[\'"]\s*(.*)\s*[\'"]\s*\)$~xi', $value, $match);
if (!$wrapped_in_url){
return false;
}
$value = trim($match[1], '\'"');
return preg_match('~^((https?|ftp|file):)?//~xi', $value);
}
/**
* Should we minify the output?
*
* @param bool $shouldMinify
*/
public function minify($shouldMinify = false)
{
$this->minifyXML = (bool) $shouldMinify;
}
/**
* Should we remove the XML tag in the header?
*
* @param bool $removeXMLTag
*/
public function removeXMLTag($removeXMLTag = false)
{
$this->removeXMLTag = (bool) $removeXMLTag;
}
/**
* Whether `