wpdb::strip_invalid_text()protectedWP 4.2.0

Strips any invalid characters based on value/charset pairs.

Метод класса: wpdb{}

Хуков нет.

Возвращает

Массив|WP_Error. The $data parameter, with invalid characters removed from each value. This works as a passthrough: any additional keys such as 'field' are retained in each value array. If we cannot remove invalid characters, a WP_Error object is returned.

Использование

// protected - в коде основоного (родительского) или дочернего класса
$result = $this->strip_invalid_text( $data );
$data(массив) (обязательный)
Array of value arrays. Each value array has the keys 'value', 'charset', and 'length'. An optional 'ascii' key can be set to false to avoid redundant ASCII checks.

Список изменений

С версии 4.2.0 Введена.

Код wpdb::strip_invalid_text() WP 6.4.3

protected function strip_invalid_text( $data ) {
	$db_check_string = false;

	foreach ( $data as &$value ) {
		$charset = $value['charset'];

		if ( is_array( $value['length'] ) ) {
			$length                  = $value['length']['length'];
			$truncate_by_byte_length = 'byte' === $value['length']['type'];
		} else {
			$length = false;
			/*
			 * Since we have no length, we'll never truncate. Initialize the variable to false.
			 * True would take us through an unnecessary (for this case) codepath below.
			 */
			$truncate_by_byte_length = false;
		}

		// There's no charset to work with.
		if ( false === $charset ) {
			continue;
		}

		// Column isn't a string.
		if ( ! is_string( $value['value'] ) ) {
			continue;
		}

		$needs_validation = true;
		if (
			// latin1 can store any byte sequence.
			'latin1' === $charset
		||
			// ASCII is always OK.
			( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) )
		) {
			$truncate_by_byte_length = true;
			$needs_validation        = false;
		}

		if ( $truncate_by_byte_length ) {
			mbstring_binary_safe_encoding();
			if ( false !== $length && strlen( $value['value'] ) > $length ) {
				$value['value'] = substr( $value['value'], 0, $length );
			}
			reset_mbstring_encoding();

			if ( ! $needs_validation ) {
				continue;
			}
		}

		// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
		if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) {
			$regex = '/
				(
					(?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
					|   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
					|   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
					|   [\xE1-\xEC][\x80-\xBF]{2}
					|   \xED[\x80-\x9F][\x80-\xBF]
					|   [\xEE-\xEF][\x80-\xBF]{2}';

			if ( 'utf8mb4' === $charset ) {
				$regex .= '
					|    \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
					|    [\xF1-\xF3][\x80-\xBF]{3}
					|    \xF4[\x80-\x8F][\x80-\xBF]{2}
				';
			}

			$regex         .= '){1,40}                          # ...one or more times
				)
				| .                                  # anything else
				/x';
			$value['value'] = preg_replace( $regex, '$1', $value['value'] );

			if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) {
				$value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' );
			}
			continue;
		}

		// We couldn't use any local conversions, send it to the DB.
		$value['db']     = true;
		$db_check_string = true;
	}
	unset( $value ); // Remove by reference.

	if ( $db_check_string ) {
		$queries = array();
		foreach ( $data as $col => $value ) {
			if ( ! empty( $value['db'] ) ) {
				// We're going to need to truncate by characters or bytes, depending on the length value we have.
				if ( isset( $value['length']['type'] ) && 'byte' === $value['length']['type'] ) {
					// Using binary causes LEFT() to truncate by bytes.
					$charset = 'binary';
				} else {
					$charset = $value['charset'];
				}

				if ( $this->charset ) {
					$connection_charset = $this->charset;
				} else {
					$connection_charset = mysqli_character_set_name( $this->dbh );
				}

				if ( is_array( $value['length'] ) ) {
					$length          = sprintf( '%.0f', $value['length']['length'] );
					$queries[ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING $charset ), $length ) USING $connection_charset )", $value['value'] );
				} elseif ( 'binary' !== $charset ) {
					// If we don't have a length, there's no need to convert binary - it will always return the same result.
					$queries[ $col ] = $this->prepare( "CONVERT( CONVERT( %s USING $charset ) USING $connection_charset )", $value['value'] );
				}

				unset( $data[ $col ]['db'] );
			}
		}

		$sql = array();
		foreach ( $queries as $column => $query ) {
			if ( ! $query ) {
				continue;
			}

			$sql[] = $query . " AS x_$column";
		}

		$this->check_current_query = false;
		$row                       = $this->get_row( 'SELECT ' . implode( ', ', $sql ), ARRAY_A );
		if ( ! $row ) {
			return new WP_Error( 'wpdb_strip_invalid_text_failure', __( 'Could not strip invalid text.' ) );
		}

		foreach ( array_keys( $data ) as $column ) {
			if ( isset( $row[ "x_$column" ] ) ) {
				$data[ $column ]['value'] = $row[ "x_$column" ];
			}
		}
	}

	return $data;
}