;; -*- coding: utf-8; lexical-binding: t; -*-
;; for each html file in a dir, for each html marker
;; <span class="xn-marker240">duenna</span>
;; find the target
;; <b class="xn-target561">duenna</b>
;; with matching innertext.
;; they must always come in pairs.
;; report if not
;; travese dir
;; open each html file
;; find all the markup, put them hashtable or asso list
;; find all the target markup, put them hashtable or asso list
;; them pair them up, check if any is missing
;; if so, report e.g. print.
;; c:/Users/xah/web/xahlee_org/wordy/arabian_nights/an3.html
(require 'subr-x)
(defun my-process-file (FPath)
"Process the file at path FPath
Created: 2025-04-17
Version: 2025-04-17"
(let ((xmarker-table (make-hash-table:test 'equal))
(xtarget-table (make-hash-table:test 'equal)))
;; open file for read
(with-temp-buffer
(insert-file-contentsFPath)
;; find all
;; <span class="xn-marker240">ID</span>
;; put ID a hashtable, with file path
;; if already exist, report it
;; populate the xmarker-table
(goto-char (point-min))
(while
(re-search-forward"<span class=\"xn-marker240\">\\([^<]+\\)</span>"nilt)
(let (xid)
(setqxid (match-string 1))
(if (gethashxidxmarker-table)
(progn
(warn"duplicate marker %s in file %s"xidFPath))
(progn
(puthashxidtxmarker-table)))))
;; populate the xtarget-table
(goto-char (point-min))
(while
(re-search-forward"<b class=\"xn-target561\">\\([^<]+\\)</b>"nilt)
(let (xid)
(setqxid (match-string 1))
(if (gethashxidxtarget-table)
(progn
(warn"duplicate target %s in file %s"xidFPath))
(progn
(puthashxidtxtarget-table)))))
(message"this is all marker id found [%s]"xmarker-table)
;; check if the 2 hashtable have the same keys
(if (equal
(sort (hash-table-keysxmarker-table))
(sort (hash-table-keysxtarget-table)))
(progn (message"fantastic"))
(progn (warn"you got a problem, in file %s"FPath))))))
;; HHHH------------------------------
;; (my-process-file "c:/Users/xah/web/xahlee_org/wordy/arabian_nights/an3.html" )
;; HHHH------------------------------
;; list dir and and all subdirectory
;; of file extension .html
;; ignore dirs whose name start with dot, e.g. .git
(setqxfilelist
(directory-files-recursively"c:/Users/xah/web/xahlee_org/wordy/""\\.html$"nil (lambda (x) (not (string-match-p"/\\."x)))))
(mapc (lambda (x) (my-process-file x)) xfilelist)