WP_HTML_Tag_Processor::parse_next_tag()privateWP 6.2.0

Parses the next tag.

This will find and start parsing the next tag, including the opening <, the potential closer /, and the tag name. It does not parse the attributes or scan to the closing >; these are left for other methods.

Метод класса: WP_HTML_Tag_Processor{}

Хуков нет.

Возвращает

true|false. Whether a tag was found before the end of the document.

Использование

// private - только в коде основоного (родительского) класса
$result = $this->parse_next_tag();

Список изменений

С версии 6.2.0 Введена.
С версии 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements.

Код WP_HTML_Tag_Processor::parse_next_tag() WP 6.3.1

private function parse_next_tag() {
	$this->after_tag();

	$html       = $this->html;
	$doc_length = strlen( $html );
	$at         = $this->bytes_already_parsed;

	while ( false !== $at && $at < $doc_length ) {
		$at = strpos( $html, '<', $at );
		if ( false === $at ) {
			return false;
		}

		if ( '/' === $this->html[ $at + 1 ] ) {
			$this->is_closing_tag = true;
			$at++;
		} else {
			$this->is_closing_tag = false;
		}

		/*
		 * HTML tag names must start with [a-zA-Z] otherwise they are not tags.
		 * For example, "<3" is rendered as text, not a tag opener. If at least
		 * one letter follows the "<" then _it is_ a tag, but if the following
		 * character is anything else it _is not a tag_.
		 *
		 * It's not uncommon to find non-tags starting with `<` in an HTML
		 * document, so it's good for performance to make this pre-check before
		 * continuing to attempt to parse a tag name.
		 *
		 * Reference:
		 * * https://html.spec.whatwg.org/multipage/parsing.html#data-state
		 * * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
		 */
		$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
		if ( $tag_name_prefix_length > 0 ) {
			++$at;
			$this->tag_name_length      = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
			$this->tag_name_starts_at   = $at;
			$this->bytes_already_parsed = $at + $this->tag_name_length;
			return true;
		}

		/*
		 * Abort if no tag is found before the end of
		 * the document. There is nothing left to parse.
		 */
		if ( $at + 1 >= strlen( $html ) ) {
			return false;
		}

		/*
		 * <! transitions to markup declaration open state
		 * https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
		 */
		if ( '!' === $html[ $at + 1 ] ) {
			/*
			 * <!-- transitions to a bogus comment state – skip to the nearest -->
			 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
			 */
			if (
				strlen( $html ) > $at + 3 &&
				'-' === $html[ $at + 2 ] &&
				'-' === $html[ $at + 3 ]
			) {
				$closer_at = $at + 4;
				// If it's not possible to close the comment then there is nothing more to scan.
				if ( strlen( $html ) <= $closer_at ) {
					return false;
				}

				// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
				$span_of_dashes = strspn( $html, '-', $closer_at );
				if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
					$at = $closer_at + $span_of_dashes + 1;
					continue;
				}

				/*
				 * Comments may be closed by either a --> or an invalid --!>.
				 * The first occurrence closes the comment.
				 *
				 * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
				 */
				$closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
				while ( ++$closer_at < strlen( $html ) ) {
					$closer_at = strpos( $html, '--', $closer_at );
					if ( false === $closer_at ) {
						return false;
					}

					if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
						$at = $closer_at + 3;
						continue 2;
					}

					if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
						$at = $closer_at + 4;
						continue 2;
					}
				}
			}

			/*
			 * <![CDATA[ transitions to CDATA section state – skip to the nearest ]]>
			 * The CDATA is case-sensitive.
			 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
			 */
			if (
				strlen( $html ) > $at + 8 &&
				'[' === $html[ $at + 2 ] &&
				'C' === $html[ $at + 3 ] &&
				'D' === $html[ $at + 4 ] &&
				'A' === $html[ $at + 5 ] &&
				'T' === $html[ $at + 6 ] &&
				'A' === $html[ $at + 7 ] &&
				'[' === $html[ $at + 8 ]
			) {
				$closer_at = strpos( $html, ']]>', $at + 9 );
				if ( false === $closer_at ) {
					return false;
				}

				$at = $closer_at + 3;
				continue;
			}

			/*
			 * <!DOCTYPE transitions to DOCTYPE state – skip to the nearest >
			 * These are ASCII-case-insensitive.
			 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
			 */
			if (
				strlen( $html ) > $at + 8 &&
				( 'D' === $html[ $at + 2 ] || 'd' === $html[ $at + 2 ] ) &&
				( 'O' === $html[ $at + 3 ] || 'o' === $html[ $at + 3 ] ) &&
				( 'C' === $html[ $at + 4 ] || 'c' === $html[ $at + 4 ] ) &&
				( 'T' === $html[ $at + 5 ] || 't' === $html[ $at + 5 ] ) &&
				( 'Y' === $html[ $at + 6 ] || 'y' === $html[ $at + 6 ] ) &&
				( 'P' === $html[ $at + 7 ] || 'p' === $html[ $at + 7 ] ) &&
				( 'E' === $html[ $at + 8 ] || 'e' === $html[ $at + 8 ] )
			) {
				$closer_at = strpos( $html, '>', $at + 9 );
				if ( false === $closer_at ) {
					return false;
				}

				$at = $closer_at + 1;
				continue;
			}

			/*
			 * Anything else here is an incorrectly-opened comment and transitions
			 * to the bogus comment state - skip to the nearest >.
			 */
			$at = strpos( $html, '>', $at + 1 );
			continue;
		}

		/*
		 * </> is a missing end tag name, which is ignored.
		 *
		 * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
		 */
		if ( '>' === $html[ $at + 1 ] ) {
			$at++;
			continue;
		}

		/*
		 * <? transitions to a bogus comment state – skip to the nearest >
		 * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
		 */
		if ( '?' === $html[ $at + 1 ] ) {
			$closer_at = strpos( $html, '>', $at + 2 );
			if ( false === $closer_at ) {
				return false;
			}

			$at = $closer_at + 1;
			continue;
		}

		/*
		 * If a non-alpha starts the tag name in a tag closer it's a comment.
		 * Find the first `>`, which closes the comment.
		 *
		 * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
		 */
		if ( $this->is_closing_tag ) {
			$closer_at = strpos( $html, '>', $at + 3 );
			if ( false === $closer_at ) {
				return false;
			}

			$at = $closer_at + 1;
			continue;
		}

		++$at;
	}

	return false;
}