PHP — PHP 5.4.6, pasted 4 months ago
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | <?php
/**
* How can I extract structured text from an HTML list in PHP?
* @link http://stackoverflow.com/a/14085173/367456
*/
$html = <<<HTML
<ul>
<li id="1">Page 1</li>
<li id="2">Page 2
<ul>
<li id="3">Sub Page A</li>
<li id="4">Sub Page B</li>
<li id="5">Sub Page C
<ul>
<li id="6">Sub Sub Page I</li>
</ul>
</li>
</ul>
</li>
<li id="7">Page 3
<ul>
<li id="8">Sub Page D</li>
</ul>
</li>
<li id="9">Page 4</li>
</ul>
HTML;
$doc = new DOMDocument();
$doc->loadHTML($html);
$xp = new DOMXPath($doc);
$parents = [];
$map = function (DOMElement $li) use ($xp, &$parents) {
$id = (int)$xp->evaluate('string(./@id)', $li);
$parentId = (int)$xp->evaluate('string(./ancestor::li[1]/@id)', $li);
$label = $xp->evaluate('normalize-space(./text()[1])', $li);
isset($parents[$parentId][0]) ? $parents[$parentId][0]++ : ($parents[$parentId][0] = 1);
$order = $parents[$parentId][0];
$parents[$parentId][1][] = $id;
isset($parents[$id][1]) || $parents[$id][1] = [];
return array($id, $label, $order, $parentId, &$parents[$id][1]);
};
$result = [];
foreach ($xp->query('//li') as $li) {
list($id) = $array = $map($li);
$result[$id] = $array;
}
foreach ($parents as &$parent) {
$parent[1] = implode(',', $parent[1]);
}
unset($parent, $parents);
echo '+----+----------------+-------+--------+----------+
| ID | LABEL | ORDER | PARENT | CHILDREN |
+----+----------------+-------+--------+----------+
';
foreach ($result as $line) {
vprintf("| %' 2d | %' -14s | %' 2d | %' 2d | %-8s |\n", $line);
}
echo '+----+----------------+-------+--------+----------+
';
|
+----+----------------+-------+--------+----------+ | ID | LABEL | ORDER | PARENT | CHILDREN | +----+----------------+-------+--------+----------+ | 1 | Page 1 | 1 | 0 | | | 2 | Page 2 | 2 | 0 | 3,4,5 | | 3 | Sub Page A | 1 | 2 | | | 4 | Sub Page B | 2 | 2 | | | 5 | Sub Page C | 3 | 2 | 6 | | 6 | Sub Sub Page I | 1 | 5 | | | 7 | Page 3 | 3 | 0 | 8 | | 8 | Sub Page D | 1 | 7 | | | 9 | Page 4 | 4 | 0 | | +----+----------------+-------+--------+----------+
OK (0.008 sec real, 0.071 sec wall, 13 MB, 54 syscalls)