the element isn't allowed (look ma, no * `strip_tags()`!). Otherwise it splits the tag into an element and an attribute * list. * * After the tag is split into an element and an attribute list, it is run * through another filter which will remove illegal attributes and once that is * completed, will be returned. * * @access private * @ignore * @since 1.0.0 * * @param string $content Content to filter. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return string Fixed HTML element */ function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) { $content = wp_kses_stripslashes( $content ); // It matched a ">" character. if ( ! str_starts_with( $content, '<' ) ) { return '>'; } // Allow HTML comments. if ( str_starts_with( $content, '' ), '', $content ); while ( ( $newstring = wp_kses( $content, $allowed_html, $allowed_protocols ) ) !== $content ) { $content = $newstring; } if ( '' === $content ) { return ''; } // Prevent multiple dashes in comments. $content = preg_replace( '/--+/', '-', $content ); // Prevent three dashes closing a comment. $content = preg_replace( '/-$/', '', $content ); return ""; } // It's seriously malformed. if ( ! preg_match( '%^<\s*(/\s*)?([a-zA-Z0-9-]+)([^>]*)>?$%', $content, $matches ) ) { return ''; } $slash = trim( $matches[1] ); $elem = $matches[2]; $attrlist = $matches[3]; if ( ! is_array( $allowed_html ) ) { $allowed_html = wp_kses_allowed_html( $allowed_html ); } // They are using a not allowed HTML element. if ( ! isset( $allowed_html[ strtolower( $elem ) ] ) ) { return ''; } // No attributes are allowed for closing elements. if ( '' !== $slash ) { return ""; } return wp_kses_attr( $elem, $attrlist, $allowed_html, $allowed_protocols ); } /** * Removes all attributes, if none are allowed for this element. * * If some are allowed it calls `wp_kses_hair()` to split them further, and then * it builds up new HTML code from the data that `wp_kses_hair()` returns. It also * removes `<` and `>` characters, if there are any left. One more thing it does * is to check if the tag has a closing XHTML slash, and if it does, it puts one * in the returned code as well. * * An array of allowed values can be defined for attributes. If the attribute value * doesn't fall into the list, the attribute will be removed from the tag. * * Attributes can be marked as required. If a required attribute is not present, * KSES will remove all attributes from the tag. As KSES doesn't match opening and * closing tags, it's not possible to safely remove the tag itself, the safest * fallback is to strip all attributes from the tag, instead. * * @since 1.0.0 * @since 5.9.0 Added support for an array of allowed values for attributes. * Added support for required attributes. * * @param string $element HTML element/tag. * @param string $attr HTML attributes from HTML element to closing HTML element tag. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return string Sanitized HTML element. */ function wp_kses_attr( $element, $attr, $allowed_html, $allowed_protocols ) { if ( ! is_array( $allowed_html ) ) { $allowed_html = wp_kses_allowed_html( $allowed_html ); } // Is there a closing XHTML slash at the end of the attributes? $xhtml_slash = ''; if ( preg_match( '%\s*/\s*$%', $attr ) ) { $xhtml_slash = ' /'; } // Are any attributes allowed at all for this element? $element_low = strtolower( $element ); if ( empty( $allowed_html[ $element_low ] ) || true === $allowed_html[ $element_low ] ) { return "<$element$xhtml_slash>"; } // Split it. $attrarr = wp_kses_hair( $attr, $allowed_protocols ); // Check if there are attributes that are required. $required_attrs = array_filter( $allowed_html[ $element_low ], static function ( $required_attr_limits ) { return isset( $required_attr_limits['required'] ) && true === $required_attr_limits['required']; } ); /* * If a required attribute check fails, we can return nothing for a self-closing tag, * but for a non-self-closing tag the best option is to return the element with attributes, * as KSES doesn't handle matching the relevant closing tag. */ $stripped_tag = ''; if ( empty( $xhtml_slash ) ) { $stripped_tag = "<$element>"; } // Go through $attrarr, and save the allowed attributes for this element in $attr2. $attr2 = ''; foreach ( $attrarr as $arreach ) { // Check if this attribute is required. $required = isset( $required_attrs[ strtolower( $arreach['name'] ) ] ); if ( wp_kses_attr_check( $arreach['name'], $arreach['value'], $arreach['whole'], $arreach['vless'], $element, $allowed_html ) ) { $attr2 .= ' ' . $arreach['whole']; // If this was a required attribute, we can mark it as found. if ( $required ) { unset( $required_attrs[ strtolower( $arreach['name'] ) ] ); } } elseif ( $required ) { // This attribute was required, but didn't pass the check. The entire tag is not allowed. return $stripped_tag; } } // If some required attributes weren't set, the entire tag is not allowed. if ( ! empty( $required_attrs ) ) { return $stripped_tag; } // Remove any "<" or ">" characters. $attr2 = preg_replace( '/[<>]/', '', $attr2 ); return "<$element$attr2$xhtml_slash>"; } /** * Determines whether an attribute is allowed. * * @since 4.2.3 * @since 5.0.0 Added support for `data-*` wildcard attributes. * * @param string $name The attribute name. Passed by reference. Returns empty string when not allowed. * @param string $value The attribute value. Passed by reference. Returns a filtered value. * @param string $whole The `name=value` input. Passed by reference. Returns filtered input. * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. * @param string $element The name of the element to which this attribute belongs. * @param array $allowed_html The full list of allowed elements and attributes. * @return bool Whether or not the attribute is allowed. */ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowed_html ) { $name_low = strtolower( $name ); $element_low = strtolower( $element ); if ( ! isset( $allowed_html[ $element_low ] ) ) { $name = ''; $value = ''; $whole = ''; return false; } $allowed_attr = $allowed_html[ $element_low ]; if ( ! isset( $allowed_attr[ $name_low ] ) || '' === $allowed_attr[ $name_low ] ) { /* * Allow `data-*` attributes. * * When specifying `$allowed_html`, the attribute name should be set as * `data-*` (not to be mixed with the HTML 4.0 `data` attribute, see * https://www.w3.org/TR/html40/struct/objects.html#adef-data). * * Note: the attribute name should only contain `A-Za-z0-9_-` chars, * double hyphens `--` are not accepted by WordPress. */ if ( str_starts_with( $name_low, 'data-' ) && ! empty( $allowed_attr['data-*'] ) && preg_match( '/^data(?:-[a-z0-9_]+)+$/', $name_low, $match ) ) { /* * Add the whole attribute name to the allowed attributes and set any restrictions * for the `data-*` attribute values for the current element. */ $allowed_attr[ $match[0] ] = $allowed_attr['data-*']; } else { $name = ''; $value = ''; $whole = ''; return false; } } if ( 'style' === $name_low ) { $new_value = safecss_filter_attr( $value ); if ( empty( $new_value ) ) { $name = ''; $value = ''; $whole = ''; return false; } $whole = str_replace( $value, $new_value, $whole ); $value = $new_value; } if ( is_array( $allowed_attr[ $name_low ] ) ) { // There are some checks. foreach ( $allowed_attr[ $name_low ] as $currkey => $currval ) { if ( ! wp_kses_check_attr_val( $value, $vless, $currkey, $currval ) ) { $name = ''; $value = ''; $whole = ''; return false; } } } return true; } /** * Builds an attribute list from string containing attributes. * * This function does a lot of work. It parses an attribute list into an array * with attribute data, and tries to do the right thing even if it gets weird * input. It will add quotes around attribute values that don't have any quotes * or apostrophes around them, to make it easier to produce HTML code that will * conform to W3C's HTML specification. It will also remove bad URL protocols * from attribute values. It also reduces duplicate attributes by using the * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`). * * @since 1.0.0 * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return array[] Array of attribute information after parsing. */ function wp_kses_hair( $attr, $allowed_protocols ) { $attrarr = array(); $mode = 0; $attrname = ''; $uris = wp_kses_uri_attributes(); // Loop through the whole attribute list. while ( strlen( $attr ) !== 0 ) { $working = 0; // Was the last operation successful? switch ( $mode ) { case 0: if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) { $attrname = $match[1]; $working = 1; $mode = 1; $attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr ); } break; case 1: if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign. $working = 1; $mode = 2; $attr = preg_replace( '/^\s*=\s*/', '', $attr ); break; } if ( preg_match( '/^\s+/', $attr ) ) { // Valueless. $working = 1; $mode = 0; if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y', ); } $attr = preg_replace( '/^\s+/', '', $attr ); } break; case 2: if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) { // "value" $thisval = $match[1]; if ( in_array( strtolower( $attrname ), $uris, true ) ) { $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); } if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n', ); } $working = 1; $mode = 0; $attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr ); break; } if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) { // 'value' $thisval = $match[1]; if ( in_array( strtolower( $attrname ), $uris, true ) ) { $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); } if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n', ); } $working = 1; $mode = 0; $attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr ); break; } if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) { // value $thisval = $match[1]; if ( in_array( strtolower( $attrname ), $uris, true ) ) { $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); } if ( false === array_key_exists( $attrname, $attrarr ) ) { $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n', ); } // We add quotes to conform to W3C's HTML spec. $working = 1; $mode = 0; $attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr ); } break; } // End switch. if ( 0 === $working ) { // Not well-formed, remove and try again. $attr = wp_kses_html_error( $attr ); $mode = 0; } } // End while. if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) { /* * Special case, for when the attribute list ends with a valueless * attribute like "selected". */ $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y', ); } return $attrarr; } /** * Finds all attributes of an HTML element. * * Does not modify input. May return "evil" output. * * Based on `wp_kses_split2()` and `wp_kses_attr()`. * * @since 4.2.3 * * @param string $element HTML element. * @return array|false List of attributes found in the element. Returns false on failure. */ function wp_kses_attr_parse( $element ) { $valid = preg_match( '%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches ); if ( 1 !== $valid ) { return false; } $begin = $matches[1]; $slash = $matches[2]; $elname = $matches[3]; $attr = $matches[4]; $end = $matches[5]; if ( '' !== $slash ) { // Closing elements do not get parsed. return false; } // Is there a closing XHTML slash at the end of the attributes? if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) { $xhtml_slash = $matches[0]; $attr = substr( $attr, 0, -strlen( $xhtml_slash ) ); } else { $xhtml_slash = ''; } // Split it. $attrarr = wp_kses_hair_parse( $attr ); if ( false === $attrarr ) { return false; } // Make sure all input is returned by adding front and back matter. array_unshift( $attrarr, $begin . $slash . $elname ); array_push( $attrarr, $xhtml_slash . $end ); return $attrarr; } /** * Builds an attribute list from string containing attributes. * * Does not modify input. May return "evil" output. * In case of unexpected input, returns false instead of stripping things. * * Based on `wp_kses_hair()` but does not return a multi-dimensional array. * * @since 4.2.3 * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @return array|false List of attributes found in $attr. Returns false on failure. */ function wp_kses_hair_parse( $attr ) { if ( '' === $attr ) { return array(); } // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation $regex = '(?:' . '[_a-zA-Z][-_a-zA-Z0-9:.]*' // Attribute name. . '|' . '\[\[?[^\[\]]+\]\]?' // Shortcode in the name position implies unfiltered_html. . ')' . '(?:' // Attribute value. . '\s*=\s*' // All values begin with '='. . '(?:' . '"[^"]*"' // Double-quoted. . '|' . "'[^']*'" // Single-quoted. . '|' . '[^\s"\']+' // Non-quoted. . '(?:\s|$)' // Must have a space. . ')' . '|' . '(?:\s|$)' // If attribute has no value, space is required. . ')' . '\s*'; // Trailing space is optional except as mentioned above. // phpcs:enable /* * Although it is possible to reduce this procedure to a single regexp, * we must run that regexp twice to get exactly the expected result. */ $validation = "%^($regex)+$%"; $extraction = "%$regex%"; if ( 1 === preg_match( $validation, $attr ) ) { preg_match_all( $extraction, $attr, $attrarr ); return $attrarr[0]; } else { return false; } } /** * Performs different checks for attribute values. * * The currently implemented checks are "maxlen", "minlen", "maxval", "minval", * and "valueless". * * @since 1.0.0 * * @param string $value Attribute value. * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. * @param string $checkname What $checkvalue is checking for. * @param mixed $checkvalue What constraint the value should pass. * @return bool Whether check passes. */ function wp_kses_check_attr_val( $value, $vless, $checkname, $checkvalue ) { $ok = true; switch ( strtolower( $checkname ) ) { case 'maxlen': /* * The maxlen check makes sure that the attribute value has a length not * greater than the given value. This can be used to avoid Buffer Overflows * in WWW clients and various Internet servers. */ if ( strlen( $value ) > $checkvalue ) { $ok = false; } break; case 'minlen': /* * The minlen check makes sure that the attribute value has a length not * smaller than the given value. */ if ( strlen( $value ) < $checkvalue ) { $ok = false; } break; case 'maxval': /* * The maxval check does two things: it checks that the attribute value is * an integer from 0 and up, without an excessive amount of zeroes or * whitespace (to avoid Buffer Overflows). It also checks that the attribute * value is not greater than the given value. * This check can be used to avoid Denial of Service attacks. */ if ( ! preg_match( '/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value ) ) { $ok = false; } if ( $value > $checkvalue ) { $ok = false; } break; case 'minval': /* * The minval check makes sure that the attribute value is a positive integer, * and that it is not smaller than the given value. */ if ( ! preg_match( '/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value ) ) { $ok = false; } if ( $value < $checkvalue ) { $ok = false; } break; case 'valueless': /* * The valueless check makes sure if the attribute has a value * (like ``) or not (`