WP_HTML_Tag_Processor::parse_next_tag() │ private │ WP 6.2.0
Parses the next tag.
This will find and start parsing the next tag, including the opening <, the potential closer /, and the tag name. It does not parse the attributes or scan to the closing >; these are left for other methods.
Метод класса: WP_HTML_Tag_Processor{}
Хуков нет.
Возвращает
true|false
. Whether a tag was found before the end of the document.
Использование
// private - только в коде основоного (родительского) класса $result = $this->parse_next_tag();
Список изменений
С версии 6.2.0 | Введена. |
С версии 6.2.1 | Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements. |
Код WP_HTML_Tag_Processor::parse_next_tag() WP HTML Tag Processor::parse next tag WP 6.3.1
private function parse_next_tag() { $this->after_tag(); $html = $this->html; $doc_length = strlen( $html ); $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { $at = strpos( $html, '<', $at ); if ( false === $at ) { return false; } if ( '/' === $this->html[ $at + 1 ] ) { $this->is_closing_tag = true; $at++; } else { $this->is_closing_tag = false; } /* * HTML tag names must start with [a-zA-Z] otherwise they are not tags. * For example, "<3" is rendered as text, not a tag opener. If at least * one letter follows the "<" then _it is_ a tag, but if the following * character is anything else it _is not a tag_. * * It's not uncommon to find non-tags starting with `<` in an HTML * document, so it's good for performance to make this pre-check before * continuing to attempt to parse a tag name. * * Reference: * * https://html.spec.whatwg.org/multipage/parsing.html#data-state * * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ $tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 ); if ( $tag_name_prefix_length > 0 ) { ++$at; $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length ); $this->tag_name_starts_at = $at; $this->bytes_already_parsed = $at + $this->tag_name_length; return true; } /* * Abort if no tag is found before the end of * the document. There is nothing left to parse. */ if ( $at + 1 >= strlen( $html ) ) { return false; } /* * <! transitions to markup declaration open state * https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state */ if ( '!' === $html[ $at + 1 ] ) { /* * <!-- transitions to a bogus comment state – skip to the nearest --> * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( strlen( $html ) > $at + 3 && '-' === $html[ $at + 2 ] && '-' === $html[ $at + 3 ] ) { $closer_at = $at + 4; // If it's not possible to close the comment then there is nothing more to scan. if ( strlen( $html ) <= $closer_at ) { return false; } // Abruptly-closed empty comments are a sequence of dashes followed by `>`. $span_of_dashes = strspn( $html, '-', $closer_at ); if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { $at = $closer_at + $span_of_dashes + 1; continue; } /* * Comments may be closed by either a --> or an invalid --!>. * The first occurrence closes the comment. * * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment */ $closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping. while ( ++$closer_at < strlen( $html ) ) { $closer_at = strpos( $html, '--', $closer_at ); if ( false === $closer_at ) { return false; } if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) { $at = $closer_at + 3; continue 2; } if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { $at = $closer_at + 4; continue 2; } } } /* * <![CDATA[ transitions to CDATA section state – skip to the nearest ]]> * The CDATA is case-sensitive. * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( strlen( $html ) > $at + 8 && '[' === $html[ $at + 2 ] && 'C' === $html[ $at + 3 ] && 'D' === $html[ $at + 4 ] && 'A' === $html[ $at + 5 ] && 'T' === $html[ $at + 6 ] && 'A' === $html[ $at + 7 ] && '[' === $html[ $at + 8 ] ) { $closer_at = strpos( $html, ']]>', $at + 9 ); if ( false === $closer_at ) { return false; } $at = $closer_at + 3; continue; } /* * <!DOCTYPE transitions to DOCTYPE state – skip to the nearest > * These are ASCII-case-insensitive. * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( strlen( $html ) > $at + 8 && ( 'D' === $html[ $at + 2 ] || 'd' === $html[ $at + 2 ] ) && ( 'O' === $html[ $at + 3 ] || 'o' === $html[ $at + 3 ] ) && ( 'C' === $html[ $at + 4 ] || 'c' === $html[ $at + 4 ] ) && ( 'T' === $html[ $at + 5 ] || 't' === $html[ $at + 5 ] ) && ( 'Y' === $html[ $at + 6 ] || 'y' === $html[ $at + 6 ] ) && ( 'P' === $html[ $at + 7 ] || 'p' === $html[ $at + 7 ] ) && ( 'E' === $html[ $at + 8 ] || 'e' === $html[ $at + 8 ] ) ) { $closer_at = strpos( $html, '>', $at + 9 ); if ( false === $closer_at ) { return false; } $at = $closer_at + 1; continue; } /* * Anything else here is an incorrectly-opened comment and transitions * to the bogus comment state - skip to the nearest >. */ $at = strpos( $html, '>', $at + 1 ); continue; } /* * </> is a missing end tag name, which is ignored. * * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name */ if ( '>' === $html[ $at + 1 ] ) { $at++; continue; } /* * <? transitions to a bogus comment state – skip to the nearest > * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( '?' === $html[ $at + 1 ] ) { $closer_at = strpos( $html, '>', $at + 2 ); if ( false === $closer_at ) { return false; } $at = $closer_at + 1; continue; } /* * If a non-alpha starts the tag name in a tag closer it's a comment. * Find the first `>`, which closes the comment. * * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name */ if ( $this->is_closing_tag ) { $closer_at = strpos( $html, '>', $at + 3 ); if ( false === $closer_at ) { return false; } $at = $closer_at + 1; continue; } ++$at; } return false; }