Emacs: Replace Invisible Unicode Chars 🚀
Here's a command that replaces invisible characters.
They happen often when copying text from Twitter etc sites.
(defvar xah-replace-invisible-char-list nil "A alist used by `xah-replace-invisible-char'. Each element is (codepoint . nameString). The codepoint is an integer. The nameString is for documentation purposes. ") (setq xah-replace-invisible-char-list '( ;; (65279 . "ZERO WIDTH NO-BREAK SPACE") (8203 . "ZERO WIDTH SPACE") (8206 . "LEFT-TO-RIGHT MARK") (8207 . "RIGHT-TO-LEFT MARK") (8232 . "LINE SEPARATOR") (8233 . "PARAGRAPH SEPARATOR") (8238 . "RIGHT-TO-LEFT OVERRIDE") (8239 . "NARROW NO-BREAK SPACE") (65532 . "OBJECT REPLACEMENT CHARACTER") (65024 . "VARIATION SELECTOR-1") (65025 . "VARIATION SELECTOR-2") (65026 . "VARIATION SELECTOR-3") (65027 . "VARIATION SELECTOR-4") (65028 . "VARIATION SELECTOR-5") (65029 . "VARIATION SELECTOR-6") (65030 . "VARIATION SELECTOR-7") (65031 . "VARIATION SELECTOR-8") (65032 . "VARIATION SELECTOR-9") (65033 . "VARIATION SELECTOR-10") (65034 . "VARIATION SELECTOR-11") (65035 . "VARIATION SELECTOR-12") (65036 . "VARIATION SELECTOR-13") (65037 . "VARIATION SELECTOR-14") (65038 . "VARIATION SELECTOR-15") (65039 . "VARIATION SELECTOR-16"))) ;; "\ufeff\\|\u200b\\|\u200f\\|\u202e\\|\u200e\\|\ufffc\\|\ufe0f" (defun xah-replace-invisible-char () "Query replace some invisible Unicode chars. The chars replaced are from `xah-replace-invisible-char-list'. Search begins at cursor position. (respects `narrow-to-region') When the command is done, call `exchange-point-and-mark' to go back to the original cursor position. URL `http://xahlee.info/emacs/emacs/elisp_unicode_replace_invisible_chars.html' Version: 2018-09-07 2023-06-22 2023-07-12" (interactive) (let ((case-replace nil) (case-fold-search nil) (xregex (regexp-opt (mapcar (lambda (x) (char-to-string (car x))) xah-replace-invisible-char-list)))) (push-mark) (while (re-search-forward xregex nil t) (let (xcharId xname) (setq xcharId (string-to-char (match-string 0))) (setq xname (get-char-code-property xcharId 'name)) (if (y-or-n-p (format "found 「%s」 codepoint 「%s」, replace?" xname xcharId)) (replace-match "") nil )))) (print "Done replace invisible chars or none."))
See also: Emacs: Insert Unicode Character