Emacs Lisp: Replace HTML Entities
Here's commands to replace HTML entities to character or reverse.
Escape HTML Chars to Entities
Here's a command to replace
<
>
&
to entities
<
>
&
or reverse.
(defun xah-html-escape-char-to-entity (@begin @end &optional @entity-to-char-p) "Replace HTML chars & < > to HTML entities on current text block or selection. The string replaced are: & ⇒ & < ⇒ < > ⇒ > Highlight changed places. If `universal-argument' is called first, the replacement direction is reversed. When called in lisp code, @begin @end are region begin/end positions. If @entity-to-char-p is true, reverse change direction. URL `http://xahlee.info/emacs/emacs/elisp_replace_html_entities_command.html' Version 2020-08-30" (interactive (save-excursion (list (if (use-region-p) (region-beginning) (progn (re-search-backward "\n[ \t]*\n" nil "move") (re-search-forward "\n[ \t]*\n" nil "move") (point))) (if (use-region-p) (region-end) (progn (re-search-forward "\n[ \t]*\n" nil "move") (re-search-backward "\n[ \t]*\n" nil "move") (point))) (if current-prefix-arg t nil)))) (let (($changedItems '()) ($findReplaceMap (if @entity-to-char-p ;; this to prevent creating a replacement sequence out of blue [ ["&" "螽⛫1"] ["<" "螽⛫2"] [">" "螽⛫3"] ["螽⛫1" "&"] ["螽⛫2" "<"] ["螽⛫3" ">"] ] [ ["&" "&"] ["<" "<"] [">" ">"] ] ))) (save-excursion (save-restriction (narrow-to-region @begin @end) (let ( (case-fold-search nil)) (mapc (lambda ($x) (goto-char (point-min)) (while (search-forward (elt $x 0) nil t) (push (format "%s %s" (point) $x) $changedItems) (replace-match (elt $x 1) "FIXEDCASE" "LITERAL") (overlay-put (make-overlay (- (point) (length (elt $x 1))) (point)) 'font-lock-face '(:foreground "red")))) $findReplaceMap))))))
Named Entities to Unicode
Replace HTML named entities such as é
to its Unicode character é.
(defun xah-html-named-entity-to-char (@begin @end) "Replace HTML named entity to Unicode character in current text block or selection. Changed places are highlighted. For example, “©” becomes “©”. The following HTML Entities are not replaced: & < > When called in lisp code, @begin @end are region begin/end positions. URL `http://xahlee.info/emacs/emacs/elisp_replace_html_entities_command.html' Version 2020-08-30" (interactive (if (use-region-p) (list (region-beginning) (region-end)) (save-excursion (list (progn (search-backward "\n\n" nil "NOERROR" ) (search-forward "\n\n" nil "NOERROR") (point)) (progn (search-forward "\n\n" nil "NOERROR") (search-backward "\n\n" nil "NOERROR" ) (point)))))) (let ( ($replaceMap [ [" " " "] [" " " "] [" " " "] [" " " "] ["‏" ""] ["‎" ""] ["‍" ""] ["‌" ""] ["¡" "¡"] ["¢" "¢"] ["£" "£"] ["¤" "¤"] ["¥" "¥"] ["¦" "¦"] ["§" "§"] ["¨" "¨"] ["©" "©"] ["ª" "ª"] ["«" "«"] ["¬" "¬"] ["­" ""] ["®" "®"] ["¯" "¯"] ["°" "°"] ["±" "±"] ["²" "²"] ["³" "³"] ["´" "´"] ["µ" "µ"] ["¶" "¶"] ["·" "·"] ["¸" "¸"] ["¹" "¹"] ["º" "º"] ["»" "»"] ["¼" "¼"] ["½" "½"] ["¾" "¾"] ["¿" "¿"] ["À" "À"] ["Á" "Á"] ["Â" "Â"] ["Ã" "Ã"] ["Ä" "Ä"] ["Å" "Å"] ["Æ" "Æ"] ["Ç" "Ç"] ["È" "È"] ["É" "É"] ["Ê" "Ê"] ["Ë" "Ë"] ["Ì" "Ì"] ["Í" "Í"] ["Î" "Î"] ["Ï" "Ï"] ["Ð" "Ð"] ["Ñ" "Ñ"] ["Ò" "Ò"] ["Ó" "Ó"] ["Ô" "Ô"] ["Õ" "Õ"] ["Ö" "Ö"] ["×" "×"] ["Ø" "Ø"] ["Ù" "Ù"] ["Ú" "Ú"] ["Û" "Û"] ["Ü" "Ü"] ["Ý" "Ý"] ["Þ" "Þ"] ["ß" "ß"] ["à" "à"] ["á" "á"] ["â" "â"] ["ã" "ã"] ["ä" "ä"] ["å" "å"] ["æ" "æ"] ["ç" "ç"] ["è" "è"] ["é" "é"] ["ê" "ê"] ["ë" "ë"] ["ì" "ì"] ["í" "í"] ["î" "î"] ["ï" "ï"] ["ð" "ð"] ["ñ" "ñ"] ["ò" "ò"] ["ó" "ó"] ["ô" "ô"] ["õ" "õ"] ["ö" "ö"] ["÷" "÷"] ["ø" "ø"] ["ù" "ù"] ["ú" "ú"] ["û" "û"] ["ü" "ü"] ["ý" "ý"] ["þ" "þ"] ["ÿ" "ÿ"] ["ƒ" "ƒ"] ["Α" "Α"] ["Β" "Β"] ["Γ" "Γ"] ["Δ" "Δ"] ["Ε" "Ε"] ["Ζ" "Ζ"] ["Η" "Η"] ["Θ" "Θ"] ["Ι" "Ι"] ["Κ" "Κ"] ["Λ" "Λ"] ["Μ" "Μ"] ["Ν" "Ν"] ["Ξ" "Ξ"] ["Ο" "Ο"] ["Π" "Π"] ["Ρ" "Ρ"] ["Σ" "Σ"] ["Τ" "Τ"] ["Υ" "Υ"] ["Φ" "Φ"] ["Χ" "Χ"] ["Ψ" "Ψ"] ["Ω" "Ω"] ["α" "α"] ["β" "β"] ["γ" "γ"] ["δ" "δ"] ["ε" "ε"] ["ζ" "ζ"] ["η" "η"] ["θ" "θ"] ["ι" "ι"] ["κ" "κ"] ["λ" "λ"] ["μ" "μ"] ["ν" "ν"] ["ξ" "ξ"] ["ο" "ο"] ["π" "π"] ["ρ" "ρ"] ["ς" "ς"] ["σ" "σ"] ["τ" "τ"] ["υ" "υ"] ["φ" "φ"] ["χ" "χ"] ["ψ" "ψ"] ["ω" "ω"] ["ϑ" "ϑ"] ["ϒ" "ϒ"] ["ϖ" "ϖ"] ["•" "•"] ["…" "…"] ["′" "′"] ["″" "″"] ["‾" "‾"] ["⁄" "⁄"] ["℘" "℘"] ["ℑ" "ℑ"] ["ℜ" "ℜ"] ["™" "™"] ["ℵ" "ℵ"] ["←" "←"] ["↑" "↑"] ["→" "→"] ["↓" "↓"] ["↔" "↔"] ["↵" "↵"] ["⇐" "⇐"] ["⇑" "⇑"] ["⇒" "⇒"] ["⇓" "⇓"] ["⇔" "⇔"] ["∀" "∀"] ["∂" "∂"] ["∃" "∃"] ["∅" "∅"] ["∇" "∇"] ["∈" "∈"] ["∉" "∉"] ["∋" "∋"] ["∏" "∏"] ["∑" "∑"] ["−" "−"] ["∗" "∗"] ["√" "√"] ["∝" "∝"] ["∞" "∞"] ["∠" "∠"] ["∧" "∧"] ["∨" "∨"] ["∩" "∩"] ["∪" "∪"] ["∫" "∫"] ["∴" "∴"] ["∼" "∼"] ["≅" "≅"] ["≈" "≈"] ["≠" "≠"] ["≡" "≡"] ["≤" "≤"] ["≥" "≥"] ["⊂" "⊂"] ["⊃" "⊃"] ["⊄" "⊄"] ["⊆" "⊆"] ["⊇" "⊇"] ["⊕" "⊕"] ["⊗" "⊗"] ["⊥" "⊥"] ["⋅" "⋅"] ["⌈" "⌈"] ["⌉" "⌉"] ["⌊" "⌊"] ["⌋" "⌋"] ["⟨" "〈"] ["⟩" "〉"] ["◊" "◊"] ["♠" "♠"] ["♣" "♣"] ["♥" "♥"] ["♦" "♦"] [""" "\""] ["Œ" "Œ"] ["œ" "œ"] ["Š" "Š"] ["š" "š"] ["Ÿ" "Ÿ"] ["ˆ" "ˆ"] ["˜" "˜"] ["–" "–"] ["—" "—"] ["‘" "‘"] ["’" "’"] ["‚" "‚"] ["“" "“"] ["”" "”"] ["„" "„"] ["†" "†"] ["‡" "‡"] ["‰" "‰"] ["‹" "‹"] ["›" "›"] ["€" "€"] ])) (save-restriction (narrow-to-region @begin @end) (let ( (case-fold-search nil)) (mapc (lambda ($x) (goto-char (point-min)) (while (search-forward (elt $x 0) nil t) (replace-match (elt $x 1)) (overlay-put (make-overlay (- (point) (length (elt $x 1))) (point)) 'font-lock-face '(:foreground "red")))) $replaceMap)))))
Escape HTML Chars to Unicode
Sometimes it's useful to convert the HTML entities to special unicode chars, to avoid the whole encoding/decoding/entities complexity.
(defun xah-html-escape-char-to-unicode (@begin @end &optional @fullwidth-to-ascii-p) "Replace chars < > & to fullwidth version < > & in current text block or selection. Highlight changed places. If `universal-argument' is called first, the replacement direction is reversed. When called in lisp code, @begin @end are region begin/end positions. If @fullwidth-to-ascii-p is true, reverse change direction. URL `http://xahlee.info/emacs/emacs/elisp_replace_html_entities_command.html' Version 2020-08-30" (interactive (list (if (use-region-p) (region-beginning) (progn (re-search-backward "\n[ \t]*\n" nil "move") (re-search-forward "\n[ \t]*\n" nil "move") (point))) (if (use-region-p) (region-end) (progn (re-search-forward "\n[ \t]*\n" nil "move") (re-search-backward "\n[ \t]*\n" nil "move") (point))) (if current-prefix-arg t nil))) (let (($findReplaceMap (if @fullwidth-to-ascii-p [ ["&" "&"] [ "<" "<"] [ ">" ">"] ] [ ["&" "&"] ["<" "<"] [">" ">"] ] ))) (save-excursion (save-restriction (narrow-to-region @begin @end) (mapc (lambda ($x) (goto-char (point-min)) (while (search-forward (elt $x 0) nil t) (replace-match (elt $x 1)) (overlay-put (make-overlay (- (point) (length (elt $x 1))) (point)) 'font-lock-face '(:foreground "red")))) $findReplaceMap)))))