WP_Block_Processor::next_tokenpublicWP 6.9.0

Advance to the next block delimiter or HTML span in a document, indicating if one was found.

This function steps through every syntactic chunk in a document. This includes explicit block comment delimiters, freeform non-block content, and inner HTML segments.

Example tokens:

<!-- wp:paragraph {"dropCap": true} -->
<!-- wp:separator /-->
<!-- /wp:paragraph -->
<p>Normal HTML content</p>
Plaintext content too!

Example:

// Find span containing wrapping HTML element surrounding inner blocks.
$processor = new WP_Block_Processor( $html );
if ( ! $processor->next_block( 'gallery' ) ) {
	return null;
}
$containing_span = null;
while ( $processor->next_token() && $processor->is_html() ) {
	$containing_span = $processor->get_span();
}

This method will visit all HTML spans including those forming freeform non-block content as well as those which are part of a block’s inner HTML.

Метод класса: WP_Block_Processor{}

Хуков нет.

Возвращает

true|false. Whether a token was matched or the end of the document was reached without finding any.

Использование

$WP_Block_Processor = new WP_Block_Processor();
$WP_Block_Processor->next_token(): bool;

Список изменений

С версии 6.9.0 Введена.

Код WP_Block_Processor::next_token() WP 6.9.1

public function next_token(): bool {
	if ( $this->last_error || self::COMPLETE === $this->state || self::INCOMPLETE_INPUT === $this->state ) {
		return false;
	}

	// Void tokens automatically pop off the stack of open blocks.
	if ( $this->was_void ) {
		array_pop( $this->open_blocks_at );
		array_pop( $this->open_blocks_length );
		$this->was_void = false;
	}

	$text = $this->source_text;
	$end  = strlen( $text );

	/*
	 * Because HTML spans are inferred after finding the next delimiter, it means that
	 * the parser must transition out of that HTML state and reuse the token boundaries
	 * it found after the HTML span. If those boundaries are before the end of the
	 * document it implies that a real delimiter was found; otherwise this must be the
	 * terminating HTML span and the parsing is complete.
	 */
	if ( self::HTML_SPAN === $this->state ) {
		if ( $this->matched_delimiter_at >= $end ) {
			$this->state = self::COMPLETE;
			return false;
		}

		switch ( $this->next_stack_op ) {
			case 'void':
				$this->was_void             = true;
				$this->open_blocks_at[]     = $this->namespace_at;
				$this->open_blocks_length[] = $this->name_at + $this->name_length - $this->namespace_at;
				break;

			case 'push':
				$this->open_blocks_at[]     = $this->namespace_at;
				$this->open_blocks_length[] = $this->name_at + $this->name_length - $this->namespace_at;
				break;

			case 'pop':
				array_pop( $this->open_blocks_at );
				array_pop( $this->open_blocks_length );
				break;
		}

		$this->next_stack_op = null;
		$this->state         = self::MATCHED;
		return true;
	}

	$this->state          = self::READY;
	$after_prev_delimiter = $this->matched_delimiter_at + $this->matched_delimiter_length;
	$at                   = $after_prev_delimiter;

	while ( $at < $end ) {
		/*
		 * Find the next possible start of a delimiter.
		 *
		 * This follows the behavior in the official block parser, which segments a post
		 * by the block comment delimiters. It is possible for an HTML attribute to contain
		 * what looks like a block comment delimiter but which is actually an HTML attribute
		 * value. In such a case, the parser here will break apart the HTML and create the
		 * block boundary inside the HTML attribute. In other words, the block parser
		 * isolates sections of HTML from each other, even if that leads to malformed markup.
		 *
		 * For a more robust parse, scan through the document with the HTML API and parse
		 * comments once they are matched to see if they are also block delimiters. In
		 * practice, this nuance has not caused any known problems since developing blocks.
		 *
		 * <⃨!⃨-⃨-⃨ /wp:core/paragraph {"dropCap":true} /-->
		 */
		$comment_opening_at = strpos( $text, '<!--', $at );

		/*
		 * Even if the start of a potential block delimiter is not found, the document
		 * might end in a prefix of such, and in that case there is incomplete input.
		 */
		if ( false === $comment_opening_at ) {
			if ( str_ends_with( $text, '<!-' ) ) {
				$backup = 3;
			} elseif ( str_ends_with( $text, '<!' ) ) {
				$backup = 2;
			} elseif ( str_ends_with( $text, '<' ) ) {
				$backup = 1;
			} else {
				$backup = 0;
			}

			// Whether or not there is a potential delimiter, there might be an HTML span.
			if ( $after_prev_delimiter < ( $end - $backup ) ) {
				$this->state                    = self::HTML_SPAN;
				$this->after_previous_delimiter = $after_prev_delimiter;
				$this->matched_delimiter_at     = $end - $backup;
				$this->matched_delimiter_length = $backup;
				$this->open_blocks_at[]         = $after_prev_delimiter;
				$this->open_blocks_length[]     = 0;
				$this->was_void                 = true;
				return true;
			}

			/*
			 * In the case that there is the start of an HTML comment, it means that there
			 * might be a block delimiter, but it’s not possible know, therefore it’s incomplete.
			 */
			if ( $backup > 0 ) {
				goto incomplete;
			}

			// Otherwise this is the end.
			$this->state = self::COMPLETE;
			return false;
		}

		// <!-- ⃨/wp:core/paragraph {"dropCap":true} /-->
		$opening_whitespace_at = $comment_opening_at + 4;
		if ( $opening_whitespace_at >= $end ) {
			goto incomplete;
		}

		$opening_whitespace_length = strspn( $text, " \t\f\r\n", $opening_whitespace_at );

		/*
		 * The `wp` prefix cannot come before this point, but it may come after it
		 * depending on the presence of the closer. This is detected next.
		 */
		$wp_prefix_at = $opening_whitespace_at + $opening_whitespace_length;
		if ( $wp_prefix_at >= $end ) {
			goto incomplete;
		}

		if ( 0 === $opening_whitespace_length ) {
			$at = $this->find_html_comment_end( $comment_opening_at, $end );
			continue;
		}

		// <!-- /⃨wp:core/paragraph {"dropCap":true} /-->
		$has_closer = false;
		if ( '/' === $text[ $wp_prefix_at ] ) {
			$has_closer = true;
			++$wp_prefix_at;
		}

		// <!-- /w⃨p⃨:⃨core/paragraph {"dropCap":true} /-->
		if ( $wp_prefix_at < $end && 0 !== substr_compare( $text, 'wp:', $wp_prefix_at, 3 ) ) {
			if (
				( $wp_prefix_at + 2 >= $end && str_ends_with( $text, 'wp' ) ) ||
				( $wp_prefix_at + 1 >= $end && str_ends_with( $text, 'w' ) )
			) {
				goto incomplete;
			}

			$at = $this->find_html_comment_end( $comment_opening_at, $end );
			continue;
		}

		/*
		 * If the block contains no namespace, this will end up masquerading with
		 * the block name. It’s easier to first detect the span and then determine
		 * if it’s a namespace of a name.
		 *
		 * <!-- /wp:c⃨o⃨r⃨e⃨/paragraph {"dropCap":true} /-->
		 */
		$namespace_at = $wp_prefix_at + 3;
		if ( $namespace_at >= $end ) {
			goto incomplete;
		}

		$start_of_namespace = $text[ $namespace_at ];

		// The namespace must start with a-z.
		if ( 'a' > $start_of_namespace || 'z' < $start_of_namespace ) {
			$at = $this->find_html_comment_end( $comment_opening_at, $end );
			continue;
		}

		$namespace_length = 1 + strspn( $text, 'abcdefghijklmnopqrstuvwxyz0123456789-_', $namespace_at + 1 );
		$separator_at     = $namespace_at + $namespace_length;
		if ( $separator_at >= $end ) {
			goto incomplete;
		}

		// <!-- /wp:core/⃨paragraph {"dropCap":true} /-->
		$has_separator = '/' === $text[ $separator_at ];
		if ( $has_separator ) {
			$name_at = $separator_at + 1;

			if ( $name_at >= $end ) {
				goto incomplete;
			}

			// <!-- /wp:core/p⃨a⃨r⃨a⃨g⃨r⃨a⃨p⃨h⃨ {"dropCap":true} /-->
			$start_of_name = $text[ $name_at ];
			if ( 'a' > $start_of_name || 'z' < $start_of_name ) {
				$at = $this->find_html_comment_end( $comment_opening_at, $end );
				continue;
			}

			$name_length = 1 + strspn( $text, 'abcdefghijklmnopqrstuvwxyz0123456789-_', $name_at + 1 );
		} else {
			$name_at     = $namespace_at;
			$name_length = $namespace_length;
		}

		if ( $name_at + $name_length >= $end ) {
			goto incomplete;
		}

		/*
		 * For this next section of the delimiter, it could be the JSON attributes
		 * or it could be the end of the comment. Assume that the JSON is there and
		 * update if it’s not.
		 */

		// <!-- /wp:core/paragraph ⃨{"dropCap":true} /-->
		$after_name_whitespace_at     = $name_at + $name_length;
		$after_name_whitespace_length = strspn( $text, " \t\f\r\n", $after_name_whitespace_at );
		$json_at                      = $after_name_whitespace_at + $after_name_whitespace_length;

		if ( $json_at >= $end ) {
			goto incomplete;
		}

		if ( 0 === $after_name_whitespace_length ) {
			$at = $this->find_html_comment_end( $comment_opening_at, $end );
			continue;
		}

		// <!-- /wp:core/paragraph {⃨"dropCap":true} /-->
		$has_json    = '{' === $text[ $json_at ];
		$json_length = 0;

		/*
		 * For the final span of the delimiter it's most efficient to find the end of the
		 * HTML comment and work backwards. This prevents complicated parsing inside the
		 * JSON span, which is not allowed to contain the HTML comment terminator.
		 *
		 * This also matches the behavior in the official block parser,
		 * even though it allows for matching invalid JSON content.
		 *
		 * <!-- /wp:core/paragraph {"dropCap":true} /-⃨-⃨>⃨
		 */
		$comment_closing_at = strpos( $text, '-->', $json_at );
		if ( false === $comment_closing_at ) {
			goto incomplete;
		}

		// <!-- /wp:core/paragraph {"dropCap":true} /⃨-->
		if ( '/' === $text[ $comment_closing_at - 1 ] ) {
			$has_void_flag    = true;
			$void_flag_length = 1;
		} else {
			$has_void_flag    = false;
			$void_flag_length = 0;
		}

		/*
		 * If there's no JSON, then the span of text after the name
		 * until the comment closing must be completely whitespace.
		 * Otherwise it’s a normal HTML comment.
		 */
		if ( ! $has_json ) {
			if ( $after_name_whitespace_at + $after_name_whitespace_length === $comment_closing_at - $void_flag_length ) {
				// This must be a block delimiter!
				$this->state = self::MATCHED;
				break;
			}

			$at = $this->find_html_comment_end( $comment_opening_at, $end );
			continue;
		}

		/*
		 * There's JSON, so attempt to find its boundary.
		 *
		 * @todo It’s likely faster to scan forward instead of in reverse.
		 *
		 * <!-- /wp:core/paragraph {"dropCap":true}⃨ ⃨/-->
		 */
		$after_json_whitespace_length = 0;
		for ( $char_at = $comment_closing_at - $void_flag_length - 1; $char_at > $json_at; $char_at-- ) {
			$char = $text[ $char_at ];

			switch ( $char ) {
				case ' ':
				case "\t":
				case "\f":
				case "\r":
				case "\n":
					++$after_json_whitespace_length;
					continue 2;

				case '}':
					$json_length = $char_at - $json_at + 1;
					break 2;

				default:
					++$at;
					continue 3;
			}
		}

		/*
		 * This covers cases where there is no terminating “}” or where
		 * mandatory whitespace is missing.
		 */
		if ( 0 === $json_length || 0 === $after_json_whitespace_length ) {
			$at = $this->find_html_comment_end( $comment_opening_at, $end );
			continue;
		}

		// This must be a block delimiter!
		$this->state = self::MATCHED;
		break;
	}

	// The end of the document was reached without a match.
	if ( self::MATCHED !== $this->state ) {
		$this->state = self::COMPLETE;
		return false;
	}

	/*
	 * From this point forward, a delimiter has been matched. There
	 * might also be an HTML span that appears before the delimiter.
	 */

	$this->after_previous_delimiter = $after_prev_delimiter;

	$this->matched_delimiter_at     = $comment_opening_at;
	$this->matched_delimiter_length = $comment_closing_at + 3 - $comment_opening_at;

	$this->namespace_at = $namespace_at;
	$this->name_at      = $name_at;
	$this->name_length  = $name_length;

	$this->json_at     = $json_at;
	$this->json_length = $json_length;

	/*
	 * When delimiters contain both the void flag and the closing flag
	 * they shall be interpreted as void blocks, per the spec parser.
	 */
	if ( $has_void_flag ) {
		$this->type          = self::VOID;
		$this->next_stack_op = 'void';
	} elseif ( $has_closer ) {
		$this->type          = self::CLOSER;
		$this->next_stack_op = 'pop';

		/*
		 * @todo Check if the name matches and bail according to the spec parser.
		 *       The default parser doesn’t examine the names.
		 */
	} else {
		$this->type          = self::OPENER;
		$this->next_stack_op = 'push';
	}

	$this->has_closing_flag = $has_closer;

	// HTML spans are visited before the delimiter that follows them.
	if ( $comment_opening_at > $after_prev_delimiter ) {
		$this->state                = self::HTML_SPAN;
		$this->open_blocks_at[]     = $after_prev_delimiter;
		$this->open_blocks_length[] = 0;
		$this->was_void             = true;

		return true;
	}

	// If there were no HTML spans then flush the enqueued stack operations immediately.
	switch ( $this->next_stack_op ) {
		case 'void':
			$this->was_void             = true;
			$this->open_blocks_at[]     = $namespace_at;
			$this->open_blocks_length[] = $name_at + $name_length - $namespace_at;
			break;

		case 'push':
			$this->open_blocks_at[]     = $namespace_at;
			$this->open_blocks_length[] = $name_at + $name_length - $namespace_at;
			break;

		case 'pop':
			array_pop( $this->open_blocks_at );
			array_pop( $this->open_blocks_length );
			break;
	}

	$this->next_stack_op = null;

	return true;

	incomplete:
	$this->state      = self::COMPLETE;
	$this->last_error = self::INCOMPLETE_INPUT;
	return false;
}