eval.in

Paste #5544

PHP — PHP 5.4.6, pasted 1 year ago

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
<?php
/**
 * How can I extract structured text from an HTML list in PHP?
 * @link http://stackoverflow.com/a/14085173/367456
 */

$html = <<<HTML
<ul>
  <li id="1">Page 1</li>
  <li id="2">Page 2
    <ul>
      <li id="3">Sub Page A</li>
      <li id="4">Sub Page B</li>
      <li id="5">Sub Page C
        <ul>
          <li id="6">Sub Sub Page I</li>
        </ul>
      </li>
    </ul>
  </li>
  <li id="7">Page 3
    <ul>
      <li id="8">Sub Page D</li>
    </ul>
  </li>
  <li id="9">Page 4</li>
</ul>
HTML;


$doc = new DOMDocument();
$doc->loadHTML($html);
$xp = new DOMXPath($doc);

$parents = [];

$map = function (DOMElement $li) use ($xp, &$parents) {

    $id       = (int)$xp->evaluate('string(./@id)', $li);
    $parentId = (int)$xp->evaluate('string(./ancestor::li[1]/@id)', $li);
    $label    = $xp->evaluate('normalize-space(./text()[1])', $li);

    isset($parents[$parentId][0]) ? $parents[$parentId][0]++ : ($parents[$parentId][0] = 1);
    $order                   = $parents[$parentId][0];
    $parents[$parentId][1][] = $id;
    isset($parents[$id][1]) || $parents[$id][1] = [];

    return array($id, $label, $order, $parentId, &$parents[$id][1]);
};

$result = [];
foreach ($xp->query('//li') as $li) {
    list($id) = $array = $map($li);
    $result[$id] = $array;
}

foreach ($parents as &$parent) {
    $parent[1] = implode(',', $parent[1]);
}
unset($parent, $parents);

echo '+----+----------------+-------+--------+----------+
| ID |     LABEL      | ORDER | PARENT | CHILDREN |
+----+----------------+-------+--------+----------+
';
foreach ($result as $line) {
    vprintf("| %' 2d | %' -14s |  %' 2d   |   %' 2d   | %-8s |\n", $line);
}
echo '+----+----------------+-------+--------+----------+
';

Program Output

+----+----------------+-------+--------+----------+
| ID |     LABEL      | ORDER | PARENT | CHILDREN |
+----+----------------+-------+--------+----------+
|  1 | Page 1         |   1   |    0   |          |
|  2 | Page 2         |   2   |    0   | 3,4,5    |
|  3 | Sub Page A     |   1   |    2   |          |
|  4 | Sub Page B     |   2   |    2   |          |
|  5 | Sub Page C     |   3   |    2   | 6        |
|  6 | Sub Sub Page I |   1   |    5   |          |
|  7 | Page 3         |   3   |    0   | 8        |
|  8 | Sub Page D     |   1   |    7   |          |
|  9 | Page 4         |   4   |    0   |          |
+----+----------------+-------+--------+----------+

OK (0.008 sec real, 0.071 sec wall, 13 MB, 54 syscalls)

Fork