Emacs: Replace Invisible Unicode Chars 📜

By Xah Lee. Date: . Last updated: .

Here's a command that replaces invisible characters.

They happen often when copying text from Twitter etc sites.

put this in your Emacs Init File:

(defvar xah-replace-invisible-char-list nil
"A alist used by `xah-replace-invisible-char'.
Each element is (codepoint . nameString).
The codepoint is an integer.
The nameString is for documentation purposes.
")

(setq
 xah-replace-invisible-char-list
 '(
   ;;

   (127995 . "EMOJI MODIFIER FITZPATRICK TYPE-1-2")
   (127996 . "EMOJI MODIFIER FITZPATRICK TYPE-3")
   (127997 . "EMOJI MODIFIER FITZPATRICK TYPE-4")
   (127998 . "EMOJI MODIFIER FITZPATRICK TYPE-5")
   (127999 . "EMOJI MODIFIER FITZPATRICK TYPE-6")

   (65279 . "ZERO WIDTH NO-BREAK SPACE")
   (8203 . "ZERO WIDTH SPACE")
   (8206 . "LEFT-TO-RIGHT MARK")
   (8207 . "RIGHT-TO-LEFT MARK")
   (8232 . "LINE SEPARATOR")
   (8233 . "PARAGRAPH SEPARATOR")
   (8238 . "RIGHT-TO-LEFT OVERRIDE")
   (8239 . "NARROW NO-BREAK SPACE")
   (8288 . "WORD JOINER")

   (65532 . "OBJECT REPLACEMENT CHARACTER")
   (65024 . "VARIATION SELECTOR-1")
   (65025 . "VARIATION SELECTOR-2")
   (65026 . "VARIATION SELECTOR-3")
   (65027 . "VARIATION SELECTOR-4")
   (65028 . "VARIATION SELECTOR-5")
   (65029 . "VARIATION SELECTOR-6")
   (65030 . "VARIATION SELECTOR-7")
   (65031 . "VARIATION SELECTOR-8")
   (65032 . "VARIATION SELECTOR-9")
   (65033 . "VARIATION SELECTOR-10")
   (65034 . "VARIATION SELECTOR-11")
   (65035 . "VARIATION SELECTOR-12")
   (65036 . "VARIATION SELECTOR-13")
   (65037 . "VARIATION SELECTOR-14")
   (65038 . "VARIATION SELECTOR-15")
   (65039 . "VARIATION SELECTOR-16")))
(defun xah-replace-invisible-char (&optional Confirm-p)
  "Query replace some invisible Unicode chars.
The chars replaced are from `xah-replace-invisible-char-list'.

Search begins at beginning of buffer. (respects `narrow-to-region')

When the command is done, call `exchange-point-and-mark' to go back to the original cursor position.

URL `http://xahlee.info/emacs/emacs/elisp_unicode_replace_invisible_chars.html'
Created: 2018-09-07
Version: 2024-12-07"
  (interactive (list t))
  (goto-char (point-min))
  (let ((case-replace nil)
        (case-fold-search nil)
        (xregex
         (regexp-opt
          (mapcar (lambda (x) (char-to-string (car x))) xah-replace-invisible-char-list)))
        xresult
        )
    (while (re-search-forward xregex nil t)
      (let (xcharId xname)
        (setq xcharId (string-to-char (match-string 0)))
        (setq xname (get-char-code-property xcharId 'name))
        (if Confirm-p
            (if (y-or-n-p (format "found 「%s」 codepoint 「%s」, position 「%s」, replace?" xname xcharId (point)))
                (replace-match "")
              nil
              )
          (replace-match ""))
        (push (vector xname xcharId (or buffer-file-name (buffer-name)) (point)) xresult)
        (push-mark)
        (overlay-put (make-overlay (point) (progn (forward-word) (point))) 'face 'font-lock-warning-face)))
    (print "Done replace invisible chars or none.")
    xresult))