-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
parser.js
224 lines (199 loc) · 6.9 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/**
* External dependencies
*/
import * as query from 'hpq';
import { escape, unescape } from 'lodash';
/**
* Internal dependencies
*/
import { parse as grammarParse } from './post.pegjs';
import { getBlockSettings, getUnknownTypeHandler } from './registration';
import { createBlock } from './factory';
/**
* Returns the block attributes parsed from raw content.
*
* @param {String} rawContent Raw block content
* @param {Object} blockSettings Block settings
* @return {Object} Block attributes
*/
export function parseBlockAttributes( rawContent, blockSettings ) {
if ( 'function' === typeof blockSettings.attributes ) {
return blockSettings.attributes( rawContent );
} else if ( blockSettings.attributes ) {
return query.parse( rawContent, blockSettings.attributes );
}
return {};
}
/**
* Returns the block attributes of a registered block node given its settings.
*
* @param {?Object} blockSettings Block settings
* @param {string} rawContent Raw block content
* @param {?Object} attributes Known block attributes (from delimiters)
* @return {Object} All block attributes
*/
export function getBlockAttributes( blockSettings, rawContent, attributes ) {
// Merge any attributes from comment delimiters with block implementation
attributes = attributes || {};
if ( blockSettings ) {
attributes = {
...attributes,
...blockSettings.defaultAttributes,
...parseBlockAttributes( rawContent, blockSettings ),
};
}
return attributes;
}
/**
* Creates a block with fallback to the unknown type handler.
*
* @param {?String} blockType Block type slug
* @param {String} rawContent Raw block content
* @param {?Object} attributes Attributes obtained from block delimiters
* @return {?Object} An initialized block object (if possible)
*/
export function createBlockWithFallback( blockType, rawContent, attributes ) {
// Use type from block content, otherwise find unknown handler
blockType = blockType || getUnknownTypeHandler();
// Try finding settings for known block type, else again fall back
let blockSettings = getBlockSettings( blockType );
if ( ! blockSettings ) {
blockType = getUnknownTypeHandler();
blockSettings = getBlockSettings( blockType );
}
// Include in set only if settings were determined
// TODO do we ever expect there to not be an unknown type handler?
if ( blockSettings ) {
// TODO allow blocks to opt-in to receiving a tree instead of a string.
// Gradually convert all blocks to this new format, then remove the
// string serialization.
const block = createBlock(
blockType,
getBlockAttributes( blockSettings, rawContent, attributes )
);
return block;
}
}
/**
* Parses the post content with TinyMCE and returns a list of blocks.
*
* @param {String} content The post content
* @return {Array} Block list
*/
export function parseWithTinyMCE( content ) {
// First, convert comment delimiters into temporary <wp-block> "tags" so
// that TinyMCE can parse them. Examples:
// In : <!-- wp:core/text -->
// Out : <wp-block slug="core/text">
// In : <!-- /wp:core/text -->
// Out : </wp-block>
// In : <!-- wp:core/embed url:youtube.com/xxx& -->
// Out : <wp-block slug="core/embed" attributes="url:youtube.com/xxx&">
content = content.replace(
/<!--\s*(\/?)wp:([a-z0-9/-]+)((?:\s+[a-z0-9_-]+:[^\s]+)*)\s*-->/g,
function( match, closingSlash, slug, attributes ) {
if ( closingSlash ) {
return '</wp-block>';
}
if ( attributes ) {
attributes = ' attributes="' + escape( attributes.trim() ) + '"';
}
return '<wp-block slug="' + slug + '"' + attributes + '>';
}
);
// Create a custom HTML schema
const schema = new tinymce.html.Schema();
// Add <wp-block> "tags" to our schema
schema.addCustomElements( 'wp-block' );
// Add valid <wp-block> "attributes" also
schema.addValidElements( 'wp-block[slug|attributes]' );
// Initialize the parser with our custom schema
const parser = new tinymce.html.DomParser( { validate: true }, schema );
// Parse the content into an object tree
const tree = parser.parse( content );
// Create a serializer that we will use to pass strings to blocks.
// TODO: pass parse trees instead, and verify them against the markup
// shapes that each block can accept.
const serializer = new tinymce.html.Serializer( { validate: true }, schema );
// Walk the tree and initialize blocks
const blocks = [];
// Store markup we found in between blocks
let contentBetweenBlocks = null;
function flushContentBetweenBlocks() {
if ( contentBetweenBlocks && contentBetweenBlocks.firstChild ) {
const block = createBlockWithFallback(
null, // default: unknown type handler
serializer.serialize( contentBetweenBlocks ),
null // no known attributes
);
if ( block ) {
blocks.push( block );
}
}
contentBetweenBlocks = new tinymce.html.Node( 'body', 11 );
}
flushContentBetweenBlocks();
let currentNode = tree.firstChild;
do {
if ( currentNode.name === 'wp-block' ) {
// Set node type to document fragment so that the TinyMCE
// serializer doesn't output its markup
currentNode.type = 11;
// Serialize the content
const rawContent = serializer.serialize( currentNode );
// Retrieve the attributes from the <wp-block> tag
const nodeAttributes = currentNode.attributes.reduce( ( memo, attr ) => {
memo[ attr.name ] = attr.value;
return memo;
}, {} );
// Retrieve the block attributes from the original delimiters
const blockAttributes = unescape( nodeAttributes.attributes || '' )
.split( /\s+/ )
.reduce( ( memo, attrString ) => {
const pieces = attrString.match( /^([a-z0-9_-]+):(.*)$/ );
if ( pieces ) {
memo[ pieces[ 1 ] ] = pieces[ 2 ];
}
return memo;
}, {} );
// Try to create the block
const block = createBlockWithFallback(
nodeAttributes.slug,
rawContent,
blockAttributes
);
if ( block ) {
flushContentBetweenBlocks();
blocks.push( block );
}
currentNode = currentNode.next;
} else {
// We have some HTML content outside of block delimiters. Save it
// so that we can initialize it using `getUnknownTypeHandler`.
const toAppend = currentNode;
// Advance the DOM tree pointer before calling `append` because
// this is a destructive operation.
currentNode = currentNode.next;
contentBetweenBlocks.append( toAppend );
}
} while ( currentNode );
flushContentBetweenBlocks();
return blocks;
}
/**
* Parses the post content with a PegJS grammar and returns a list of blocks.
*
* @param {String} content The post content
* @return {Array} Block list
*/
export function parseWithGrammar( content ) {
return grammarParse( content ).reduce( ( memo, blockNode ) => {
const { blockType, rawContent, attrs } = blockNode;
const block = createBlockWithFallback( blockType, rawContent, attrs );
if ( block ) {
memo.push( block );
}
return memo;
}, [] );
}
export default parseWithTinyMCE;