parsing html xml parsing expression grammar

By Xah Lee. Date: .

parsing expression grammar

;; -*- coding: utf-8; lexical-binding: t; -*-

(require 'peg)

;; (define-peg-ruleset 'number-grammar
;;         '((number sign digit (* digit))
;;           digit  ;; A reference to the definition above.
;;           (sign (or "+" "-" ""))))

(peg-parse
  (number sign digit (* digit))
  (sign   (or "+" "-" ""))
  (digit  [0-9]))

;; -206
;; -*- coding: utf-8; lexical-binding: t; -*-

(require 'peg)

(let ((xhtml
"<section class=\"word88\">
<h3 class=\"wd\">sanctimonious</h3>
<div class=\"ex\">
<div class=\"bdy\">
Trump's victory was fueled by fatigue with <b class=\"w\">sanctimonious</b> identity politics, which became state religion under Democratic administrations.</div>
<div class=\"src\">
Camille Paglia</div>
</div>

</section>"
)
      xtree
      (xoutbuf (generate-new-buffer "*peg parse out*")))

  ;; code

  (pop-to-buffer xoutbuf))
xtodo

parsing html xml

xtodo
(libxml-available-p)
;; t

(let ((xhtml "<!DOCTYPE html>
<html>
<head>
<meta charset=\"utf-8\" />
<meta name=viewport content=\"width=device-width, initial-scale=1\" />

<title>untitled</title>
</head>
<body>

<main>

<h1>untitled</h1>

<p>
some
</p>

</main>

</body>
</html>")
      xtree
      (xoutbuf (generate-new-buffer "html parse out*")))

  (setq xtree
        (with-temp-buffer
          (insert xhtml)
          (libxml-parse-html-region (point-min) (point-max))))

  (with-current-buffer xoutbuf (shr-insert-document xtree))
  (pop-to-buffer xoutbuf))
(libxml-available-p)
;; t

(let ((xhtml "<html>
<head>
<meta charset=\"utf-8\" />

<title>untitled</title>
</head>
<body>

<main>

<h1>untitled</h1>

<p>
some
</p>

</main>

</body>
</html>")
      xtree
      (xoutbuf (generate-new-buffer "html parse out*")))

  (setq xtree
        (with-temp-buffer
          (insert xhtml)
          (libxml-parse-xml-region (point-min) (point-max))))

  (with-current-buffer xoutbuf (shr-insert-document xtree))
  (pop-to-buffer xoutbuf))
xtodo

when done study pasing html reformat my site citation.

〔<cite>...</cite> <time>...</time> @ <a ...>...</a>〕
<cite>...</cite> <time>...</time> @ <a ...>...</a>