Emacs+Htmlize syntax-coloring Example


Following is syntax-colored code using emacs htmlize.

;-*- coding: utf-8 -*-

;; emacs lisp. emacs 22.

;; started: 2008-01-03.

;; generate a report of wikipedia links.

;; this program traverse a given dir, visiting every HTML file, find links to Wikipedia in those files, collect them, and generate a nice html report of these links and the files they are from, then write it to a given file.

;;   Xah Lee
;; ∑ http://xahlee.org/

;;;; user level globle parameters

(defconst dirpath (expand-file-name "../")
  "The dir to process.")

(defconst root-path-char-count (length dirpath)
  "A integer that counts how many chars to take off of a given file's full path, to result as a relative path for the link url. ⁖ if file path is
<code class="path-α">/Users/xah/web/emacs/emacs.html</code> , and root-path-char-count is 15, then its url in link would be “emacs/emacs.html”.
This number is not necessarily the length of dirpath. It can be smaller for flexibility.")

(defconst output-file 
(concat (expand-file-name "../") "wikipedia_links.html")
  "The file to save the generated report to. (existing file backedup as ~)")

;;;; loading package. global vars.

(setq tmpBufName (concat " xahtemp" (int-to-string (random t)) ))

(require 'find-lisp)

;; create hash table.
;; for each entry, the key is Wikipedia url, and value is a list of file paths.
;; like this: ("Wikipedia url" ("file1" "file2" …))
(setq wpdata-hash (make-hash-table :test 'equal :size 4000))

;; a list version of the hash for sorting & report
(setq wpdata-list '())

;; header text for the generated HTML file
(setq header-text "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html;charset=utf-8\">
<link rel=\"stylesheet\" type=\"text/css\" href=\"mybasic.css\">
<title>Links To Wikipedia from XahLee.org</title>
<style type=\"text/css\">
li > a:link:active {text-decoration:none;}
li > a:link:hover {text-decoration:none; background-color:yellow}
li > a:visited:hover {text-decoration:none;background-color:yellow}
li > a:visited {text-decoration:none;}
li > a:link {text-decoration:none;}

<h1>Links To Wikipedia from XahLee.org</h1>

(setq footer-text "
<div class=\"dstp\">2008-01</div>
<div class=\"cpr\">© 2008 by <a href=\"http://xahlee.org/PageTwo_dir/more.html\">Xah Lee</a>.</div>

<div class=\"xsig\"></div>


;;;; subroutines

(defun insert-date ()
  "Insert current date."
  (if (and (or delete-selection-mode cua-mode) mark-active)
       (delete-region (region-beginning) (region-end))
  (insert (format-time-string "%Y-%m-%d"))

(defun hash-to-list (hashtable)
  "Return a list that represent the hashtable."
  (let (mylist)
    (maphash (lambda (kk vv) (setq mylist (cons (list kk vv) mylist))) hashtable)

(defun add-wplink-to-hash (filePath)
  "Get links in filePath and add it to hash table."
  (let (url)

    (insert-file-contents filePath nil nil nil t)
    (goto-char (point-min))

         nil t)
      (when (and
             (match-string 0) ; if url found
             (not (string-match "=" (match-string 1) )) ; not some history page
        (setq url (match-string 1)) ; set url to matched string

        ;; if exist in hash, prepend to existing entry, else just add
        (if (gethash url wpdata-hash)
            (puthash url (cons filePath (gethash url wpdata-hash)) wpdata-hash)
          (puthash url (list filePath) wpdata-hash))

(defun prt-each (ele)
  "print each item. ele is of the form (url (filepath1 filepath2 …)).
Print it like this:
<li> <link to url> : <link to file1>, <link to file2>, …</li>"
  (let (wplink files)
    (setq wplink (car ele))
    (setq files (cadr ele))

    (insert "<li>")
    (insert (wikipedia-url-to-link wplink))
    (insert " —")

    (dolist (x files nil)
      (insert (concat " <a href=\"" (substring x root-path-char-count) "\">" (get-html-file-title x) "</a>,")))
    (delete-backward-char 1)

    (insert ".")
    (insert "</li>\n")


(defun wikipedia-url-to-link (url)
  "Return the url as HTML link string.\n
<a href=\"http://en.wikipedia.org/wiki/Emacs\">Emacs</a>."
  (require 'gnus-util)
  (let ((linktext url))
    (setq linktext (gnus-url-unhex-string linktext nil))
    (setq linktext (concat (car (last (split-string linktext "/")))) )
    (setq linktext (replace-regexp-in-string "&" "&" linktext))
    (setq linktext (replace-regexp-in-string "_" " " linktext))
    (concat "<a href=\"" url "\">" linktext "</a>" ) ))

(defun get-html-file-title (fname)
"Return FNAME <title> tag's text.
Assumes that the file contains the string
 (let (x1 x2 linkText)

     (goto-char (point-min))
     (insert-file-contents fname nil nil nil t)

     (setq x1 (search-forward "<title>"))
     (search-forward "</title>")
     (setq x2 (search-backward "<"))
     (buffer-substring-no-properties x1 x2)

;;;; main

;; backup
(when (file-exists-p output-file)
  (copy-file output-file (concat output-file "~") t)
  (delete-file output-file)

;; get links from files, put to hash
  (set-buffer (get-buffer-create tmpBufName))
  (let (filePaths)
    ;; get files ending in “.html” but not starting with “xx”.
     (lambda (x) (when (not (string-match "/xx" x))
                     (setq filePaths (cons x filePaths) )
     (find-lisp-find-files dirpath "\\.html$"))
    (mapc 'add-wplink-to-hash filePaths)
  (setq wpdata-list (hash-to-list wpdata-hash))
  (setq wpdata-list
        (sort wpdata-list 
              (lambda (a b) (string< (downcase (car a)) (downcase (car b))))

;; print it out in a temp buffer and save to file
(switch-to-buffer tmpBufName)
(insert header-text)
(insert "<p>This page contains all existing links from XahLee.org to Wikipedia, as of ")
(insert ". There are a total of " (number-to-string (length wpdata-list)) " links.</p>\n\n")
(insert "<ul>")
(mapcar 'prt-each wpdata-list)
(insert "</ul>")
(insert footer-text)
(write-file output-file)

(clrhash wpdata-hash)
(setq wpdata-list '())
blog comments powered by Disqus