@@ -58,3 +58,108 @@ function sglobal(string $key)
58
58
}
59
59
60
60
}
61
+
62
+ if (! function_exists ('parse_landmarks ' )) {
63
+
64
+ function _parse_html_headings (string $ html ): array
65
+ {
66
+ preg_match_all ('/<h(\d) id="([^"]+)"[^>]*>(.+?)<\/h\d>/sm ' , $ html , $ matches );
67
+
68
+ return array_map (
69
+ function ($ _ , $ level , $ anchor , $ title ) {
70
+ return (object ) [
71
+ 'title ' => trim (_remove_anchor_from_title ($ title )),
72
+ 'anchor ' => "# $ anchor " ,
73
+ 'level ' => intval ($ level ),
74
+ ];
75
+ },
76
+ ...$ matches
77
+ );
78
+ }
79
+
80
+ function _remove_anchor_from_title (string $ title ): string
81
+ {
82
+ return preg_replace ('/<a href="#[^"]+"[^>]*>(.*?)<\/a>/sm ' , '$1 ' , $ title );
83
+ }
84
+
85
+ function _create_ancestor_landmark ($ previousLandmark , $ header ) {
86
+ while ($ previousLandmark ->level !== $ header ->level ) {
87
+ $ previousLandmark = $ previousLandmark ->parent ;
88
+ }
89
+
90
+ $ landmark = (object ) [
91
+ 'level ' => $ header ->level ,
92
+ 'title ' => $ header ->title ,
93
+ 'anchor ' => $ header ->anchor ,
94
+ 'parent ' => $ previousLandmark ->parent ,
95
+ 'children ' => [],
96
+ ];
97
+
98
+ return tap ($ landmark , function ($ landmark ) use ($ previousLandmark ) {
99
+ $ previousLandmark ->parent ->children [] = $ landmark ;
100
+ });
101
+ }
102
+
103
+ function _create_descendant_landmark ($ previousLandmark , $ header ) {
104
+ while ($ previousLandmark ->level !== $ header ->level - 1 ) {
105
+ $ childLandmark = (object ) [
106
+ 'level ' => $ previousLandmark ->level + 1 ,
107
+ 'parent ' => $ previousLandmark ,
108
+ 'children ' => [],
109
+ ];
110
+
111
+ $ previousLandmark ->children [] = $ childLandmark ;
112
+ $ previousLandmark = $ childLandmark ;
113
+ }
114
+
115
+ $ landmark = (object ) [
116
+ 'level ' => $ header ->level ,
117
+ 'title ' => $ header ->title ,
118
+ 'anchor ' => $ header ->anchor ,
119
+ 'parent ' => $ previousLandmark ,
120
+ 'children ' => [],
121
+ ];
122
+
123
+ return tap ($ landmark , function ($ landmark ) use ($ previousLandmark ) {
124
+ $ previousLandmark ->children [] = $ landmark ;
125
+ });
126
+ }
127
+
128
+ function _clean_landmark_tree ($ landmark ) {
129
+ while ($ landmark ->level !== 1 ) {
130
+ $ landmark = $ landmark ->parent ;
131
+ }
132
+
133
+ _clean_landmark ($ landmark );
134
+
135
+ return $ landmark ;
136
+ }
137
+
138
+ function _clean_landmark ($ landmark ) {
139
+ unset($ landmark ->parent );
140
+
141
+ if (empty ($ landmark ->children )) {
142
+ unset($ landmark ->children );
143
+ } else {
144
+ array_map ('_clean_landmark ' , $ landmark ->children );
145
+ }
146
+ }
147
+
148
+ function parse_landmarks (string $ html ): array
149
+ {
150
+ $ headings = _parse_html_headings ($ html );
151
+ $ currentLandmark = (object ) [
152
+ 'level ' => 1 ,
153
+ 'children ' => [],
154
+ ];
155
+
156
+ foreach ($ headings as $ heading ) {
157
+ $ currentLandmark = $ currentLandmark ->level >= $ heading ->level
158
+ ? _create_ancestor_landmark ($ currentLandmark , $ heading )
159
+ : _create_descendant_landmark ($ currentLandmark , $ heading );
160
+ }
161
+
162
+ return _clean_landmark_tree ($ currentLandmark )->children ?? [];
163
+ }
164
+
165
+ }
0 commit comments