@@ -302,6 +302,9 @@ class Parser {
302
302
/** @var SplObjectStorage */
303
303
protected $ parsed ;
304
304
305
+ /**
306
+ * @var bool
307
+ */
305
308
public $ jsonMode ;
306
309
307
310
/** @var boolean Whether to include experimental language parsing in the result */
@@ -316,6 +319,11 @@ class Parser {
316
319
*/
317
320
protected $ upgraded ;
318
321
322
+ /**
323
+ * Whether to convert classic microformats
324
+ * @var bool
325
+ */
326
+ public $ convertClassic ;
319
327
320
328
/**
321
329
* Constructor
@@ -931,74 +939,23 @@ public function parseH(\DOMElement $e, $is_backcompat = false) {
931
939
$ return = array ();
932
940
$ children = array ();
933
941
$ dates = array ();
942
+ $ prefixes = array ();
934
943
$ impliedTimezone = null ;
935
944
936
- // each rel-bookmark with an href attribute
937
- foreach ( $ this ->xpath ->query ('.//a[contains(concat(" ",normalize-space(@rel)," ")," bookmark ") and @href] ' , $ e ) as $ el )
938
- {
939
- $ class = 'u-url ' ;
940
- // rel-bookmark already has class attribute; append current value
941
- if ($ el ->hasAttribute ('class ' )) {
942
- $ class .= ' ' . $ el ->getAttribute ('class ' );
943
- }
944
- $ el ->setAttribute ('class ' , $ class );
945
- }
946
-
947
- $ subMFs = $ this ->getRootMF ($ e );
948
-
949
- // Handle nested microformats (h-*)
950
- foreach ( $ subMFs as $ subMF ) {
951
-
952
- // Parse
953
- $ result = $ this ->parseH ($ subMF );
954
-
955
- // If result was already parsed, skip it
956
- if (null === $ result ) {
957
- continue ;
958
- }
959
-
960
- // Does this µf have any property names other than h-*?
961
- $ properties = nestedMfPropertyNamesFromElement ($ subMF );
962
-
963
- if (!empty ($ properties )) {
964
- // Yes! It’s a nested property µf
965
- foreach ($ properties as $ property => $ prefixes ) {
966
- // Note: handling microformat nesting under multiple conflicting prefixes is not currently specified by the mf2 parsing spec.
967
- $ prefixSpecificResult = $ result ;
968
- if (in_array ('p- ' , $ prefixes )) {
969
- $ prefixSpecificResult ['value ' ] = $ prefixSpecificResult ['properties ' ]['name ' ][0 ];
970
- } elseif (in_array ('e- ' , $ prefixes )) {
971
- $ eParsedResult = $ this ->parseE ($ subMF );
972
- $ prefixSpecificResult ['html ' ] = $ eParsedResult ['html ' ];
973
- $ prefixSpecificResult ['value ' ] = $ eParsedResult ['value ' ];
974
- } elseif (in_array ('u- ' , $ prefixes )) {
975
- $ prefixSpecificResult ['value ' ] = (empty ($ result ['properties ' ]['url ' ])) ? $ this ->parseU ($ subMF ) : reset ($ result ['properties ' ]['url ' ]);
976
- }
977
- $ return [$ property ][] = $ prefixSpecificResult ;
978
- }
979
- } else {
980
- // No, it’s a child µf
981
- $ children [] = $ result ;
982
- }
983
-
984
- // Make sure this sub-mf won’t get parsed as a µf or property
985
- // TODO: Determine if clearing this is required?
986
- $ this ->elementPrefixParsed ($ subMF , 'h ' );
987
- $ this ->elementPrefixParsed ($ subMF , 'p ' );
988
- $ this ->elementPrefixParsed ($ subMF , 'u ' );
989
- $ this ->elementPrefixParsed ($ subMF , 'dt ' );
990
- $ this ->elementPrefixParsed ($ subMF , 'e ' );
991
- }
992
-
993
945
if ($ e ->tagName == 'area ' ) {
994
946
$ coords = $ e ->getAttribute ('coords ' );
995
947
$ shape = $ e ->getAttribute ('shape ' );
996
948
}
997
949
998
950
// Handle p-*
999
951
foreach ($ this ->xpath ->query ('.//*[contains(concat(" ", @class) ," p-")] ' , $ e ) as $ p ) {
952
+ // element is already parsed
1000
953
if ($ this ->isElementParsed ($ p , 'p ' )) {
1001
954
continue ;
955
+ // backcompat parsing and element was not upgraded; skip it
956
+ } else if ( $ is_backcompat && empty ($ this ->upgraded [$ p ]) ) {
957
+ $ this ->elementPrefixParsed ($ p , 'p ' );
958
+ continue ;
1002
959
}
1003
960
1004
961
$ pValue = $ this ->parseP ($ p );
@@ -1016,8 +973,13 @@ public function parseH(\DOMElement $e, $is_backcompat = false) {
1016
973
1017
974
// Handle u-*
1018
975
foreach ($ this ->xpath ->query ('.//*[contains(concat(" ", @class)," u-")] ' , $ e ) as $ u ) {
976
+ // element is already parsed
1019
977
if ($ this ->isElementParsed ($ u , 'u ' )) {
1020
978
continue ;
979
+ // backcompat parsing and element was not upgraded; skip it
980
+ } else if ( $ is_backcompat && empty ($ this ->upgraded [$ u ]) ) {
981
+ $ this ->elementPrefixParsed ($ u , 'u ' );
982
+ continue ;
1021
983
}
1022
984
1023
985
$ uValue = $ this ->parseU ($ u );
@@ -1035,8 +997,13 @@ public function parseH(\DOMElement $e, $is_backcompat = false) {
1035
997
1036
998
// Handle dt-*
1037
999
foreach ($ this ->xpath ->query ('.//*[contains(concat(" ", @class), " dt-")] ' , $ e ) as $ dt ) {
1000
+ // element is already parsed
1038
1001
if ($ this ->isElementParsed ($ dt , 'dt ' )) {
1039
1002
continue ;
1003
+ // backcompat parsing and element was not upgraded; skip it
1004
+ } else if ( $ is_backcompat && empty ($ this ->upgraded [$ dt ]) ) {
1005
+ $ this ->elementPrefixParsed ($ dt , 'dt ' );
1006
+ continue ;
1040
1007
}
1041
1008
1042
1009
$ dtValue = $ this ->parseDT ($ dt , $ dates , $ impliedTimezone );
@@ -1064,8 +1031,13 @@ public function parseH(\DOMElement $e, $is_backcompat = false) {
1064
1031
1065
1032
// Handle e-*
1066
1033
foreach ($ this ->xpath ->query ('.//*[contains(concat(" ", @class)," e-")] ' , $ e ) as $ em ) {
1034
+ // element is already parsed
1067
1035
if ($ this ->isElementParsed ($ em , 'e ' )) {
1068
1036
continue ;
1037
+ // backcompat parsing and element was not upgraded; skip it
1038
+ } else if ( $ is_backcompat && empty ($ this ->upgraded [$ em ]) ) {
1039
+ $ this ->elementPrefixParsed ($ em , 'e ' );
1040
+ continue ;
1069
1041
}
1070
1042
1071
1043
$ eValue = $ this ->parseE ($ em );
@@ -1333,32 +1305,16 @@ public function parseRelsAndAlternates() {
1333
1305
return array ($ rels , $ rel_urls , $ alternates );
1334
1306
}
1335
1307
1308
+
1336
1309
/**
1337
1310
* Kicks off the parsing routine
1338
- *
1339
- * If `$htmlSafe` is set, any angle brackets in the results from non e-* properties
1340
- * will be HTML-encoded, bringing all output to the same level of encoding.
1341
- *
1342
- * If a DOMElement is set as the $context, only descendants of that element will
1343
- * be parsed for microformats.
1344
- *
1345
- * @param bool $htmlSafe whether or not to html-encode non e-* properties. Defaults to false
1346
- * @param DOMElement $context optionally an element from which to parse microformats
1347
- * @return array An array containing all the µfs found in the current document
1311
+ * @param bool $convertClassic whether to do backcompat parsing on microformats1. Defaults to true.
1312
+ * @param DOMElement $context optionally specify an element from which to parse microformats
1313
+ * @return array An array containing all the microformats found in the current document
1348
1314
*/
1349
1315
public function parse ($ convertClassic = true , DOMElement $ context = null ) {
1350
- $ mfs = array ();
1351
- $ mfElements = $ this ->getRootMF ($ context );
1352
-
1353
- foreach ($ mfElements as $ node ) {
1354
- $ is_backcompat = !$ this ->hasRootMf2 ($ node );
1355
-
1356
- if ( $ convertClassic && $ is_backcompat ) {
1357
- $ this ->backcompat ($ node );
1358
- }
1359
-
1360
- $ mfs [] = $ this ->parseH ($ node , $ is_backcompat );
1361
- }
1316
+ $ this ->convertClassic = $ convertClassic ;
1317
+ $ mfs = $ this ->parse_recursive ($ context );
1362
1318
1363
1319
// Parse rels
1364
1320
list ($ rels , $ rel_urls , $ alternates ) = $ this ->parseRelsAndAlternates ();
@@ -1376,6 +1332,122 @@ public function parse($convertClassic = true, DOMElement $context = null) {
1376
1332
return $ top ;
1377
1333
}
1378
1334
1335
+
1336
+ /**
1337
+ * Parse microformats recursively
1338
+ * Keeps track of whether inside a backcompat root or not
1339
+ * @param DOMElement $context: node to start with
1340
+ * @param int $depth: recusion depth
1341
+ * @return array
1342
+ */
1343
+ public function parse_recursive (DOMElement $ context = null , $ depth = 0 ) {
1344
+ $ mfs = array ();
1345
+ $ children = array ();
1346
+ $ properties = array ();
1347
+ $ mfElements = $ this ->getRootMF ($ context );
1348
+ $ result = array ();
1349
+
1350
+ foreach ($ mfElements as $ node ) {
1351
+ $ merge_properties = array ();
1352
+ $ children = array ();
1353
+
1354
+ $ is_backcompat = !$ this ->hasRootMf2 ($ node );
1355
+
1356
+ if ( $ this ->convertClassic && $ is_backcompat ) {
1357
+ $ this ->backcompat ($ node );
1358
+ }
1359
+
1360
+ $ recurse = $ this ->parse_recursive ($ node , ++$ depth );
1361
+
1362
+ // recursion returned parsed result
1363
+ if ( !empty ($ recurse ) ) {
1364
+
1365
+ // parsed result is an mf root
1366
+ if ( is_numeric (key ($ recurse )) ) {
1367
+
1368
+ // nested mf
1369
+ if ( $ depth > 0 ) {
1370
+ $ children = $ recurse ;
1371
+ // top-level mf
1372
+ } else {
1373
+ $ mfs = array_merge_recursive ($ mfs , $ recurse );
1374
+ }
1375
+
1376
+ // parsed result is an mf property
1377
+ } else {
1378
+ $ merge_properties = $ recurse ;
1379
+ }
1380
+
1381
+ }
1382
+
1383
+ // parse for root mf
1384
+ $ result = $ this ->parseH ($ node , $ is_backcompat );
1385
+
1386
+ // merge nested mf properties
1387
+ if ( $ merge_properties && isset ($ result ['properties ' ]) ) {
1388
+ $ result ['properties ' ] = array_merge ($ result ['properties ' ], $ merge_properties );
1389
+ }
1390
+
1391
+ // parseH returned a parsed result
1392
+ if ( $ result ) {
1393
+
1394
+ // currently a nested mf; check if node is an mf property of parent
1395
+ if ( $ depth > 0 ) {
1396
+ $ temp_properties = nestedMfPropertyNamesFromElement ($ node );
1397
+
1398
+ // properties found; set up parsed result in $properties
1399
+ if ( !empty ($ temp_properties ) ) {
1400
+
1401
+ foreach ($ temp_properties as $ property => $ prefixes ) {
1402
+ // Note: handling microformat nesting under multiple conflicting prefixes is not currently specified by the mf2 parsing spec.
1403
+ $ prefixSpecificResult = $ result ;
1404
+ if (in_array ('p- ' , $ prefixes )) {
1405
+ $ prefixSpecificResult ['value ' ] = $ prefixSpecificResult ['properties ' ]['name ' ][0 ];
1406
+ } elseif (in_array ('e- ' , $ prefixes )) {
1407
+ $ eParsedResult = $ this ->parseE ($ node );
1408
+ $ prefixSpecificResult ['html ' ] = $ eParsedResult ['html ' ];
1409
+ $ prefixSpecificResult ['value ' ] = $ eParsedResult ['value ' ];
1410
+ } elseif (in_array ('u- ' , $ prefixes )) {
1411
+ $ prefixSpecificResult ['value ' ] = (empty ($ result ['properties ' ]['url ' ])) ? $ this ->parseU ($ node ) : reset ($ result ['properties ' ]['url ' ]);
1412
+ }
1413
+
1414
+ if ( $ children ) {
1415
+ $ prefixSpecificResult ['children ' ] = $ children ;
1416
+ }
1417
+
1418
+ $ properties [$ property ][] = $ prefixSpecificResult ;
1419
+ }
1420
+
1421
+ }
1422
+
1423
+ // TODO: Determine if clearing this is required?
1424
+ $ this ->elementPrefixParsed ($ node , 'h ' );
1425
+ $ this ->elementPrefixParsed ($ node , 'p ' );
1426
+ $ this ->elementPrefixParsed ($ node , 'u ' );
1427
+ $ this ->elementPrefixParsed ($ node , 'dt ' );
1428
+ $ this ->elementPrefixParsed ($ node , 'e ' );
1429
+ }
1430
+
1431
+ // add children mf from recursion
1432
+ if ( $ children ) {
1433
+ $ result ['children ' ] = $ children ;
1434
+ }
1435
+
1436
+ $ mfs [] = $ result ;
1437
+ }
1438
+
1439
+ }
1440
+
1441
+ // node is an mf property of parent, return $properties which has property name(s) as array indices
1442
+ if ( $ properties && ($ depth > 1 ) ) {
1443
+ return $ properties ;
1444
+ }
1445
+
1446
+ // otherwise, return $mfs which has numeric array indices
1447
+ return $ mfs ;
1448
+ }
1449
+
1450
+
1379
1451
/**
1380
1452
* Parse From ID
1381
1453
*
@@ -1413,7 +1485,7 @@ public function getRootMF(DOMElement $context = null) {
1413
1485
1414
1486
// add mf1 root class names
1415
1487
foreach ( $ this ->classicRootMap as $ old => $ new ) {
1416
- $ xpaths [] = '( contains(concat(" ",normalize-space(@class), " "), " ' . $ old . ' ") and not(ancestor::*[contains(concat(" ",normalize-space(@class)), " h-")]) ) ' ;
1488
+ $ xpaths [] = '( contains(concat(" ",normalize-space(@class), " "), " ' . $ old . ' ") ) ' ;
1417
1489
}
1418
1490
1419
1491
// final xpath with OR
@@ -1448,6 +1520,17 @@ public function backcompat(DOMElement $el, $context = '', $isParentMf2 = false)
1448
1520
// special handling for specific properties
1449
1521
switch ( $ classname )
1450
1522
{
1523
+ case 'hentry ' :
1524
+ $ rel_bookmark = $ this ->xpath ->query ('.//a[contains(concat(" ",normalize-space(@rel)," ")," bookmark ") and @href] ' , $ el );
1525
+
1526
+ if ( $ rel_bookmark ->length ) {
1527
+ foreach ( $ rel_bookmark as $ tempEl ) {
1528
+ $ this ->addMfClasses ($ tempEl , 'u-url ' );
1529
+ $ this ->addUpgraded ($ tempEl , array ('bookmark ' ));
1530
+ }
1531
+ }
1532
+ break ;
1533
+
1451
1534
case 'hreview ' :
1452
1535
$ item_and_vcard = $ this ->xpath ->query ('.//*[contains(concat(" ", normalize-space(@class), " "), " item ") and contains(concat(" ", normalize-space(@class), " "), " vcard ")] ' , $ el );
1453
1536
0 commit comments