balmet.com

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

class-wp-block-parser.php (15218B)


      1 <?php
      2 /**
      3  * Block Serialization Parser
      4  *
      5  * @package WordPress
      6  */
      7 
      8 /**
      9  * Class WP_Block_Parser_Block
     10  *
     11  * Holds the block structure in memory
     12  *
     13  * @since 5.0.0
     14  */
     15 class WP_Block_Parser_Block {
     16 	/**
     17 	 * Name of block
     18 	 *
     19 	 * @example "core/paragraph"
     20 	 *
     21 	 * @since 5.0.0
     22 	 * @var string
     23 	 */
     24 	public $blockName;
     25 
     26 	/**
     27 	 * Optional set of attributes from block comment delimiters
     28 	 *
     29 	 * @example null
     30 	 * @example array( 'columns' => 3 )
     31 	 *
     32 	 * @since 5.0.0
     33 	 * @var array|null
     34 	 */
     35 	public $attrs;
     36 
     37 	/**
     38 	 * List of inner blocks (of this same class)
     39 	 *
     40 	 * @since 5.0.0
     41 	 * @var WP_Block_Parser_Block[]
     42 	 */
     43 	public $innerBlocks;
     44 
     45 	/**
     46 	 * Resultant HTML from inside block comment delimiters
     47 	 * after removing inner blocks
     48 	 *
     49 	 * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..."
     50 	 *
     51 	 * @since 5.0.0
     52 	 * @var string
     53 	 */
     54 	public $innerHTML;
     55 
     56 	/**
     57 	 * List of string fragments and null markers where inner blocks were found
     58 	 *
     59 	 * @example array(
     60 	 *   'innerHTML'    => 'BeforeInnerAfter',
     61 	 *   'innerBlocks'  => array( block, block ),
     62 	 *   'innerContent' => array( 'Before', null, 'Inner', null, 'After' ),
     63 	 * )
     64 	 *
     65 	 * @since 4.2.0
     66 	 * @var array
     67 	 */
     68 	public $innerContent;
     69 
     70 	/**
     71 	 * Constructor.
     72 	 *
     73 	 * Will populate object properties from the provided arguments.
     74 	 *
     75 	 * @since 5.0.0
     76 	 *
     77 	 * @param string $name         Name of block.
     78 	 * @param array  $attrs        Optional set of attributes from block comment delimiters.
     79 	 * @param array  $innerBlocks  List of inner blocks (of this same class).
     80 	 * @param string $innerHTML    Resultant HTML from inside block comment delimiters after removing inner blocks.
     81 	 * @param array  $innerContent List of string fragments and null markers where inner blocks were found.
     82 	 */
     83 	function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) {
     84 		$this->blockName    = $name;
     85 		$this->attrs        = $attrs;
     86 		$this->innerBlocks  = $innerBlocks;
     87 		$this->innerHTML    = $innerHTML;
     88 		$this->innerContent = $innerContent;
     89 	}
     90 }
     91 
     92 /**
     93  * Class WP_Block_Parser_Frame
     94  *
     95  * Holds partial blocks in memory while parsing
     96  *
     97  * @internal
     98  * @since 5.0.0
     99  */
    100 class WP_Block_Parser_Frame {
    101 	/**
    102 	 * Full or partial block
    103 	 *
    104 	 * @since 5.0.0
    105 	 * @var WP_Block_Parser_Block
    106 	 */
    107 	public $block;
    108 
    109 	/**
    110 	 * Byte offset into document for start of parse token
    111 	 *
    112 	 * @since 5.0.0
    113 	 * @var int
    114 	 */
    115 	public $token_start;
    116 
    117 	/**
    118 	 * Byte length of entire parse token string
    119 	 *
    120 	 * @since 5.0.0
    121 	 * @var int
    122 	 */
    123 	public $token_length;
    124 
    125 	/**
    126 	 * Byte offset into document for after parse token ends
    127 	 * (used during reconstruction of stack into parse production)
    128 	 *
    129 	 * @since 5.0.0
    130 	 * @var int
    131 	 */
    132 	public $prev_offset;
    133 
    134 	/**
    135 	 * Byte offset into document where leading HTML before token starts
    136 	 *
    137 	 * @since 5.0.0
    138 	 * @var int
    139 	 */
    140 	public $leading_html_start;
    141 
    142 	/**
    143 	 * Constructor
    144 	 *
    145 	 * Will populate object properties from the provided arguments.
    146 	 *
    147 	 * @since 5.0.0
    148 	 *
    149 	 * @param WP_Block_Parser_Block $block              Full or partial block.
    150 	 * @param int                   $token_start        Byte offset into document for start of parse token.
    151 	 * @param int                   $token_length       Byte length of entire parse token string.
    152 	 * @param int                   $prev_offset        Byte offset into document for after parse token ends.
    153 	 * @param int                   $leading_html_start Byte offset into document where leading HTML before token starts.
    154 	 */
    155 	function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) {
    156 		$this->block              = $block;
    157 		$this->token_start        = $token_start;
    158 		$this->token_length       = $token_length;
    159 		$this->prev_offset        = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length;
    160 		$this->leading_html_start = $leading_html_start;
    161 	}
    162 }
    163 
    164 /**
    165  * Class WP_Block_Parser
    166  *
    167  * Parses a document and constructs a list of parsed block objects
    168  *
    169  * @since 5.0.0
    170  * @since 4.0.0 returns arrays not objects, all attributes are arrays
    171  */
    172 class WP_Block_Parser {
    173 	/**
    174 	 * Input document being parsed
    175 	 *
    176 	 * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
    177 	 *
    178 	 * @since 5.0.0
    179 	 * @var string
    180 	 */
    181 	public $document;
    182 
    183 	/**
    184 	 * Tracks parsing progress through document
    185 	 *
    186 	 * @since 5.0.0
    187 	 * @var int
    188 	 */
    189 	public $offset;
    190 
    191 	/**
    192 	 * List of parsed blocks
    193 	 *
    194 	 * @since 5.0.0
    195 	 * @var WP_Block_Parser_Block[]
    196 	 */
    197 	public $output;
    198 
    199 	/**
    200 	 * Stack of partially-parsed structures in memory during parse
    201 	 *
    202 	 * @since 5.0.0
    203 	 * @var WP_Block_Parser_Frame[]
    204 	 */
    205 	public $stack;
    206 
    207 	/**
    208 	 * Empty associative array, here due to PHP quirks
    209 	 *
    210 	 * @since 4.4.0
    211 	 * @var array empty associative array
    212 	 */
    213 	public $empty_attrs;
    214 
    215 	/**
    216 	 * Parses a document and returns a list of block structures
    217 	 *
    218 	 * When encountering an invalid parse will return a best-effort
    219 	 * parse. In contrast to the specification parser this does not
    220 	 * return an error on invalid inputs.
    221 	 *
    222 	 * @since 5.0.0
    223 	 *
    224 	 * @param string $document Input document being parsed.
    225 	 * @return WP_Block_Parser_Block[]
    226 	 */
    227 	function parse( $document ) {
    228 		$this->document    = $document;
    229 		$this->offset      = 0;
    230 		$this->output      = array();
    231 		$this->stack       = array();
    232 		$this->empty_attrs = json_decode( '{}', true );
    233 
    234 		do {
    235 			// twiddle our thumbs.
    236 		} while ( $this->proceed() );
    237 
    238 		return $this->output;
    239 	}
    240 
    241 	/**
    242 	 * Processes the next token from the input document
    243 	 * and returns whether to proceed eating more tokens
    244 	 *
    245 	 * This is the "next step" function that essentially
    246 	 * takes a token as its input and decides what to do
    247 	 * with that token before descending deeper into a
    248 	 * nested block tree or continuing along the document
    249 	 * or breaking out of a level of nesting.
    250 	 *
    251 	 * @internal
    252 	 * @since 5.0.0
    253 	 * @return bool
    254 	 */
    255 	function proceed() {
    256 		$next_token = $this->next_token();
    257 		list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
    258 		$stack_depth = count( $this->stack );
    259 
    260 		// we may have some HTML soup before the next block.
    261 		$leading_html_start = $start_offset > $this->offset ? $this->offset : null;
    262 
    263 		switch ( $token_type ) {
    264 			case 'no-more-tokens':
    265 				// if not in a block then flush output.
    266 				if ( 0 === $stack_depth ) {
    267 					$this->add_freeform();
    268 					return false;
    269 				}
    270 
    271 				/*
    272 				 * Otherwise we have a problem
    273 				 * This is an error
    274 				 *
    275 				 * we have options
    276 				 * - treat it all as freeform text
    277 				 * - assume an implicit closer (easiest when not nesting)
    278 				 */
    279 
    280 				// for the easy case we'll assume an implicit closer.
    281 				if ( 1 === $stack_depth ) {
    282 					$this->add_block_from_stack();
    283 					return false;
    284 				}
    285 
    286 				/*
    287 				 * for the nested case where it's more difficult we'll
    288 				 * have to assume that multiple closers are missing
    289 				 * and so we'll collapse the whole stack piecewise
    290 				 */
    291 				while ( 0 < count( $this->stack ) ) {
    292 					$this->add_block_from_stack();
    293 				}
    294 				return false;
    295 
    296 			case 'void-block':
    297 				/*
    298 				 * easy case is if we stumbled upon a void block
    299 				 * in the top-level of the document
    300 				 */
    301 				if ( 0 === $stack_depth ) {
    302 					if ( isset( $leading_html_start ) ) {
    303 						$this->output[] = (array) $this->freeform(
    304 							substr(
    305 								$this->document,
    306 								$leading_html_start,
    307 								$start_offset - $leading_html_start
    308 							)
    309 						);
    310 					}
    311 
    312 					$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
    313 					$this->offset   = $start_offset + $token_length;
    314 					return true;
    315 				}
    316 
    317 				// otherwise we found an inner block.
    318 				$this->add_inner_block(
    319 					new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
    320 					$start_offset,
    321 					$token_length
    322 				);
    323 				$this->offset = $start_offset + $token_length;
    324 				return true;
    325 
    326 			case 'block-opener':
    327 				// track all newly-opened blocks on the stack.
    328 				array_push(
    329 					$this->stack,
    330 					new WP_Block_Parser_Frame(
    331 						new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
    332 						$start_offset,
    333 						$token_length,
    334 						$start_offset + $token_length,
    335 						$leading_html_start
    336 					)
    337 				);
    338 				$this->offset = $start_offset + $token_length;
    339 				return true;
    340 
    341 			case 'block-closer':
    342 				/*
    343 				 * if we're missing an opener we're in trouble
    344 				 * This is an error
    345 				 */
    346 				if ( 0 === $stack_depth ) {
    347 					/*
    348 					 * we have options
    349 					 * - assume an implicit opener
    350 					 * - assume _this_ is the opener
    351 					 * - give up and close out the document
    352 					 */
    353 					$this->add_freeform();
    354 					return false;
    355 				}
    356 
    357 				// if we're not nesting then this is easy - close the block.
    358 				if ( 1 === $stack_depth ) {
    359 					$this->add_block_from_stack( $start_offset );
    360 					$this->offset = $start_offset + $token_length;
    361 					return true;
    362 				}
    363 
    364 				/*
    365 				 * otherwise we're nested and we have to close out the current
    366 				 * block and add it as a new innerBlock to the parent
    367 				 */
    368 				$stack_top                        = array_pop( $this->stack );
    369 				$html                             = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
    370 				$stack_top->block->innerHTML     .= $html;
    371 				$stack_top->block->innerContent[] = $html;
    372 				$stack_top->prev_offset           = $start_offset + $token_length;
    373 
    374 				$this->add_inner_block(
    375 					$stack_top->block,
    376 					$stack_top->token_start,
    377 					$stack_top->token_length,
    378 					$start_offset + $token_length
    379 				);
    380 				$this->offset = $start_offset + $token_length;
    381 				return true;
    382 
    383 			default:
    384 				// This is an error.
    385 				$this->add_freeform();
    386 				return false;
    387 		}
    388 	}
    389 
    390 	/**
    391 	 * Scans the document from where we last left off
    392 	 * and finds the next valid token to parse if it exists
    393 	 *
    394 	 * Returns the type of the find: kind of find, block information, attributes
    395 	 *
    396 	 * @internal
    397 	 * @since 5.0.0
    398 	 * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
    399 	 * @return array
    400 	 */
    401 	function next_token() {
    402 		$matches = null;
    403 
    404 		/*
    405 		 * aye the magic
    406 		 * we're using a single RegExp to tokenize the block comment delimiters
    407 		 * we're also using a trick here because the only difference between a
    408 		 * block opener and a block closer is the leading `/` before `wp:` (and
    409 		 * a closer has no attributes). we can trap them both and process the
    410 		 * match back in PHP to see which one it was.
    411 		 */
    412 		$has_match = preg_match(
    413 			'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
    414 			$this->document,
    415 			$matches,
    416 			PREG_OFFSET_CAPTURE,
    417 			$this->offset
    418 		);
    419 
    420 		// if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
    421 		if ( false === $has_match ) {
    422 			return array( 'no-more-tokens', null, null, null, null );
    423 		}
    424 
    425 		// we have no more tokens.
    426 		if ( 0 === $has_match ) {
    427 			return array( 'no-more-tokens', null, null, null, null );
    428 		}
    429 
    430 		list( $match, $started_at ) = $matches[0];
    431 
    432 		$length    = strlen( $match );
    433 		$is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
    434 		$is_void   = isset( $matches['void'] ) && -1 !== $matches['void'][1];
    435 		$namespace = $matches['namespace'];
    436 		$namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
    437 		$name      = $namespace . $matches['name'][0];
    438 		$has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
    439 
    440 		/*
    441 		 * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
    442 		 * are associative arrays. If we use `array()` we get a JSON `[]`
    443 		 */
    444 		$attrs = $has_attrs
    445 			? json_decode( $matches['attrs'][0], /* as-associative */ true )
    446 			: $this->empty_attrs;
    447 
    448 		/*
    449 		 * This state isn't allowed
    450 		 * This is an error
    451 		 */
    452 		if ( $is_closer && ( $is_void || $has_attrs ) ) {
    453 			// we can ignore them since they don't hurt anything.
    454 		}
    455 
    456 		if ( $is_void ) {
    457 			return array( 'void-block', $name, $attrs, $started_at, $length );
    458 		}
    459 
    460 		if ( $is_closer ) {
    461 			return array( 'block-closer', $name, null, $started_at, $length );
    462 		}
    463 
    464 		return array( 'block-opener', $name, $attrs, $started_at, $length );
    465 	}
    466 
    467 	/**
    468 	 * Returns a new block object for freeform HTML
    469 	 *
    470 	 * @internal
    471 	 * @since 3.9.0
    472 	 *
    473 	 * @param string $innerHTML HTML content of block.
    474 	 * @return WP_Block_Parser_Block freeform block object.
    475 	 */
    476 	function freeform( $innerHTML ) {
    477 		return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) );
    478 	}
    479 
    480 	/**
    481 	 * Pushes a length of text from the input document
    482 	 * to the output list as a freeform block.
    483 	 *
    484 	 * @internal
    485 	 * @since 5.0.0
    486 	 * @param null $length how many bytes of document text to output.
    487 	 */
    488 	function add_freeform( $length = null ) {
    489 		$length = $length ? $length : strlen( $this->document ) - $this->offset;
    490 
    491 		if ( 0 === $length ) {
    492 			return;
    493 		}
    494 
    495 		$this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
    496 	}
    497 
    498 	/**
    499 	 * Given a block structure from memory pushes
    500 	 * a new block to the output list.
    501 	 *
    502 	 * @internal
    503 	 * @since 5.0.0
    504 	 * @param WP_Block_Parser_Block $block        The block to add to the output.
    505 	 * @param int                   $token_start  Byte offset into the document where the first token for the block starts.
    506 	 * @param int                   $token_length Byte length of entire block from start of opening token to end of closing token.
    507 	 * @param int|null              $last_offset  Last byte offset into document if continuing form earlier output.
    508 	 */
    509 	function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
    510 		$parent                       = $this->stack[ count( $this->stack ) - 1 ];
    511 		$parent->block->innerBlocks[] = (array) $block;
    512 		$html                         = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
    513 
    514 		if ( ! empty( $html ) ) {
    515 			$parent->block->innerHTML     .= $html;
    516 			$parent->block->innerContent[] = $html;
    517 		}
    518 
    519 		$parent->block->innerContent[] = null;
    520 		$parent->prev_offset           = $last_offset ? $last_offset : $token_start + $token_length;
    521 	}
    522 
    523 	/**
    524 	 * Pushes the top block from the parsing stack to the output list.
    525 	 *
    526 	 * @internal
    527 	 * @since 5.0.0
    528 	 * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
    529 	 */
    530 	function add_block_from_stack( $end_offset = null ) {
    531 		$stack_top   = array_pop( $this->stack );
    532 		$prev_offset = $stack_top->prev_offset;
    533 
    534 		$html = isset( $end_offset )
    535 			? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
    536 			: substr( $this->document, $prev_offset );
    537 
    538 		if ( ! empty( $html ) ) {
    539 			$stack_top->block->innerHTML     .= $html;
    540 			$stack_top->block->innerContent[] = $html;
    541 		}
    542 
    543 		if ( isset( $stack_top->leading_html_start ) ) {
    544 			$this->output[] = (array) $this->freeform(
    545 				substr(
    546 					$this->document,
    547 					$stack_top->leading_html_start,
    548 					$stack_top->token_start - $stack_top->leading_html_start
    549 				)
    550 			);
    551 		}
    552 
    553 		$this->output[] = (array) $stack_top->block;
    554 	}
    555 }