WP_Block_Processor::next_token │ public │ WP 6.9.0
Advance to the next block delimiter or HTML span in a document, indicating if one was found.
This function steps through every syntactic chunk in a document. This includes explicit block comment delimiters, freeform non-block content, and inner HTML segments.
Example tokens:
<!-- wp:paragraph {"dropCap": true} -->
<!-- wp:separator /-->
<!-- /wp:paragraph -->
<p>Normal HTML content</p>
Plaintext content too!
Example:
// Find span containing wrapping HTML element surrounding inner blocks.
$processor = new WP_Block_Processor( $html );
if ( ! $processor->next_block( 'gallery' ) ) {
return null;
}
$containing_span = null;
while ( $processor->next_token() && $processor->is_html() ) {
$containing_span = $processor->get_span();
}
This method will visit all HTML spans including those forming freeform non-block content as well as those which are part of a block’s inner HTML.
Метод класса: WP_Block_Processor{}
Хуков нет.
Возвращает
true|false. Whether a token was matched or the end of the document was reached without finding any.
Использование
$WP_Block_Processor = new WP_Block_Processor(); $WP_Block_Processor->next_token(): bool;
Список изменений
| С версии 6.9.0 | Введена. |
Код WP_Block_Processor::next_token() WP Block Processor::next token WP 6.9.1
public function next_token(): bool {
if ( $this->last_error || self::COMPLETE === $this->state || self::INCOMPLETE_INPUT === $this->state ) {
return false;
}
// Void tokens automatically pop off the stack of open blocks.
if ( $this->was_void ) {
array_pop( $this->open_blocks_at );
array_pop( $this->open_blocks_length );
$this->was_void = false;
}
$text = $this->source_text;
$end = strlen( $text );
/*
* Because HTML spans are inferred after finding the next delimiter, it means that
* the parser must transition out of that HTML state and reuse the token boundaries
* it found after the HTML span. If those boundaries are before the end of the
* document it implies that a real delimiter was found; otherwise this must be the
* terminating HTML span and the parsing is complete.
*/
if ( self::HTML_SPAN === $this->state ) {
if ( $this->matched_delimiter_at >= $end ) {
$this->state = self::COMPLETE;
return false;
}
switch ( $this->next_stack_op ) {
case 'void':
$this->was_void = true;
$this->open_blocks_at[] = $this->namespace_at;
$this->open_blocks_length[] = $this->name_at + $this->name_length - $this->namespace_at;
break;
case 'push':
$this->open_blocks_at[] = $this->namespace_at;
$this->open_blocks_length[] = $this->name_at + $this->name_length - $this->namespace_at;
break;
case 'pop':
array_pop( $this->open_blocks_at );
array_pop( $this->open_blocks_length );
break;
}
$this->next_stack_op = null;
$this->state = self::MATCHED;
return true;
}
$this->state = self::READY;
$after_prev_delimiter = $this->matched_delimiter_at + $this->matched_delimiter_length;
$at = $after_prev_delimiter;
while ( $at < $end ) {
/*
* Find the next possible start of a delimiter.
*
* This follows the behavior in the official block parser, which segments a post
* by the block comment delimiters. It is possible for an HTML attribute to contain
* what looks like a block comment delimiter but which is actually an HTML attribute
* value. In such a case, the parser here will break apart the HTML and create the
* block boundary inside the HTML attribute. In other words, the block parser
* isolates sections of HTML from each other, even if that leads to malformed markup.
*
* For a more robust parse, scan through the document with the HTML API and parse
* comments once they are matched to see if they are also block delimiters. In
* practice, this nuance has not caused any known problems since developing blocks.
*
* <⃨!⃨-⃨-⃨ /wp:core/paragraph {"dropCap":true} /-->
*/
$comment_opening_at = strpos( $text, '<!--', $at );
/*
* Even if the start of a potential block delimiter is not found, the document
* might end in a prefix of such, and in that case there is incomplete input.
*/
if ( false === $comment_opening_at ) {
if ( str_ends_with( $text, '<!-' ) ) {
$backup = 3;
} elseif ( str_ends_with( $text, '<!' ) ) {
$backup = 2;
} elseif ( str_ends_with( $text, '<' ) ) {
$backup = 1;
} else {
$backup = 0;
}
// Whether or not there is a potential delimiter, there might be an HTML span.
if ( $after_prev_delimiter < ( $end - $backup ) ) {
$this->state = self::HTML_SPAN;
$this->after_previous_delimiter = $after_prev_delimiter;
$this->matched_delimiter_at = $end - $backup;
$this->matched_delimiter_length = $backup;
$this->open_blocks_at[] = $after_prev_delimiter;
$this->open_blocks_length[] = 0;
$this->was_void = true;
return true;
}
/*
* In the case that there is the start of an HTML comment, it means that there
* might be a block delimiter, but it’s not possible know, therefore it’s incomplete.
*/
if ( $backup > 0 ) {
goto incomplete;
}
// Otherwise this is the end.
$this->state = self::COMPLETE;
return false;
}
// <!-- ⃨/wp:core/paragraph {"dropCap":true} /-->
$opening_whitespace_at = $comment_opening_at + 4;
if ( $opening_whitespace_at >= $end ) {
goto incomplete;
}
$opening_whitespace_length = strspn( $text, " \t\f\r\n", $opening_whitespace_at );
/*
* The `wp` prefix cannot come before this point, but it may come after it
* depending on the presence of the closer. This is detected next.
*/
$wp_prefix_at = $opening_whitespace_at + $opening_whitespace_length;
if ( $wp_prefix_at >= $end ) {
goto incomplete;
}
if ( 0 === $opening_whitespace_length ) {
$at = $this->find_html_comment_end( $comment_opening_at, $end );
continue;
}
// <!-- /⃨wp:core/paragraph {"dropCap":true} /-->
$has_closer = false;
if ( '/' === $text[ $wp_prefix_at ] ) {
$has_closer = true;
++$wp_prefix_at;
}
// <!-- /w⃨p⃨:⃨core/paragraph {"dropCap":true} /-->
if ( $wp_prefix_at < $end && 0 !== substr_compare( $text, 'wp:', $wp_prefix_at, 3 ) ) {
if (
( $wp_prefix_at + 2 >= $end && str_ends_with( $text, 'wp' ) ) ||
( $wp_prefix_at + 1 >= $end && str_ends_with( $text, 'w' ) )
) {
goto incomplete;
}
$at = $this->find_html_comment_end( $comment_opening_at, $end );
continue;
}
/*
* If the block contains no namespace, this will end up masquerading with
* the block name. It’s easier to first detect the span and then determine
* if it’s a namespace of a name.
*
* <!-- /wp:c⃨o⃨r⃨e⃨/paragraph {"dropCap":true} /-->
*/
$namespace_at = $wp_prefix_at + 3;
if ( $namespace_at >= $end ) {
goto incomplete;
}
$start_of_namespace = $text[ $namespace_at ];
// The namespace must start with a-z.
if ( 'a' > $start_of_namespace || 'z' < $start_of_namespace ) {
$at = $this->find_html_comment_end( $comment_opening_at, $end );
continue;
}
$namespace_length = 1 + strspn( $text, 'abcdefghijklmnopqrstuvwxyz0123456789-_', $namespace_at + 1 );
$separator_at = $namespace_at + $namespace_length;
if ( $separator_at >= $end ) {
goto incomplete;
}
// <!-- /wp:core/⃨paragraph {"dropCap":true} /-->
$has_separator = '/' === $text[ $separator_at ];
if ( $has_separator ) {
$name_at = $separator_at + 1;
if ( $name_at >= $end ) {
goto incomplete;
}
// <!-- /wp:core/p⃨a⃨r⃨a⃨g⃨r⃨a⃨p⃨h⃨ {"dropCap":true} /-->
$start_of_name = $text[ $name_at ];
if ( 'a' > $start_of_name || 'z' < $start_of_name ) {
$at = $this->find_html_comment_end( $comment_opening_at, $end );
continue;
}
$name_length = 1 + strspn( $text, 'abcdefghijklmnopqrstuvwxyz0123456789-_', $name_at + 1 );
} else {
$name_at = $namespace_at;
$name_length = $namespace_length;
}
if ( $name_at + $name_length >= $end ) {
goto incomplete;
}
/*
* For this next section of the delimiter, it could be the JSON attributes
* or it could be the end of the comment. Assume that the JSON is there and
* update if it’s not.
*/
// <!-- /wp:core/paragraph ⃨{"dropCap":true} /-->
$after_name_whitespace_at = $name_at + $name_length;
$after_name_whitespace_length = strspn( $text, " \t\f\r\n", $after_name_whitespace_at );
$json_at = $after_name_whitespace_at + $after_name_whitespace_length;
if ( $json_at >= $end ) {
goto incomplete;
}
if ( 0 === $after_name_whitespace_length ) {
$at = $this->find_html_comment_end( $comment_opening_at, $end );
continue;
}
// <!-- /wp:core/paragraph {⃨"dropCap":true} /-->
$has_json = '{' === $text[ $json_at ];
$json_length = 0;
/*
* For the final span of the delimiter it's most efficient to find the end of the
* HTML comment and work backwards. This prevents complicated parsing inside the
* JSON span, which is not allowed to contain the HTML comment terminator.
*
* This also matches the behavior in the official block parser,
* even though it allows for matching invalid JSON content.
*
* <!-- /wp:core/paragraph {"dropCap":true} /-⃨-⃨>⃨
*/
$comment_closing_at = strpos( $text, '-->', $json_at );
if ( false === $comment_closing_at ) {
goto incomplete;
}
// <!-- /wp:core/paragraph {"dropCap":true} /⃨-->
if ( '/' === $text[ $comment_closing_at - 1 ] ) {
$has_void_flag = true;
$void_flag_length = 1;
} else {
$has_void_flag = false;
$void_flag_length = 0;
}
/*
* If there's no JSON, then the span of text after the name
* until the comment closing must be completely whitespace.
* Otherwise it’s a normal HTML comment.
*/
if ( ! $has_json ) {
if ( $after_name_whitespace_at + $after_name_whitespace_length === $comment_closing_at - $void_flag_length ) {
// This must be a block delimiter!
$this->state = self::MATCHED;
break;
}
$at = $this->find_html_comment_end( $comment_opening_at, $end );
continue;
}
/*
* There's JSON, so attempt to find its boundary.
*
* @todo It’s likely faster to scan forward instead of in reverse.
*
* <!-- /wp:core/paragraph {"dropCap":true}⃨ ⃨/-->
*/
$after_json_whitespace_length = 0;
for ( $char_at = $comment_closing_at - $void_flag_length - 1; $char_at > $json_at; $char_at-- ) {
$char = $text[ $char_at ];
switch ( $char ) {
case ' ':
case "\t":
case "\f":
case "\r":
case "\n":
++$after_json_whitespace_length;
continue 2;
case '}':
$json_length = $char_at - $json_at + 1;
break 2;
default:
++$at;
continue 3;
}
}
/*
* This covers cases where there is no terminating “}” or where
* mandatory whitespace is missing.
*/
if ( 0 === $json_length || 0 === $after_json_whitespace_length ) {
$at = $this->find_html_comment_end( $comment_opening_at, $end );
continue;
}
// This must be a block delimiter!
$this->state = self::MATCHED;
break;
}
// The end of the document was reached without a match.
if ( self::MATCHED !== $this->state ) {
$this->state = self::COMPLETE;
return false;
}
/*
* From this point forward, a delimiter has been matched. There
* might also be an HTML span that appears before the delimiter.
*/
$this->after_previous_delimiter = $after_prev_delimiter;
$this->matched_delimiter_at = $comment_opening_at;
$this->matched_delimiter_length = $comment_closing_at + 3 - $comment_opening_at;
$this->namespace_at = $namespace_at;
$this->name_at = $name_at;
$this->name_length = $name_length;
$this->json_at = $json_at;
$this->json_length = $json_length;
/*
* When delimiters contain both the void flag and the closing flag
* they shall be interpreted as void blocks, per the spec parser.
*/
if ( $has_void_flag ) {
$this->type = self::VOID;
$this->next_stack_op = 'void';
} elseif ( $has_closer ) {
$this->type = self::CLOSER;
$this->next_stack_op = 'pop';
/*
* @todo Check if the name matches and bail according to the spec parser.
* The default parser doesn’t examine the names.
*/
} else {
$this->type = self::OPENER;
$this->next_stack_op = 'push';
}
$this->has_closing_flag = $has_closer;
// HTML spans are visited before the delimiter that follows them.
if ( $comment_opening_at > $after_prev_delimiter ) {
$this->state = self::HTML_SPAN;
$this->open_blocks_at[] = $after_prev_delimiter;
$this->open_blocks_length[] = 0;
$this->was_void = true;
return true;
}
// If there were no HTML spans then flush the enqueued stack operations immediately.
switch ( $this->next_stack_op ) {
case 'void':
$this->was_void = true;
$this->open_blocks_at[] = $namespace_at;
$this->open_blocks_length[] = $name_at + $name_length - $namespace_at;
break;
case 'push':
$this->open_blocks_at[] = $namespace_at;
$this->open_blocks_length[] = $name_at + $name_length - $namespace_at;
break;
case 'pop':
array_pop( $this->open_blocks_at );
array_pop( $this->open_blocks_length );
break;
}
$this->next_stack_op = null;
return true;
incomplete:
$this->state = self::COMPLETE;
$this->last_error = self::INCOMPLETE_INPUT;
return false;
}