# count the occurence of words in a file
# prints words with their counts
# to use, save this file as word_freq.py, edit the line filePath, then run “python word_freq.py”
# 2006-10-27 http://xahlee.info/python/python_word_frequency.html
import re, operator, os
filePath = os.path.expanduser("~/web/xahlee_info/python/python_word_frequency.html")
# keys are words, vals are occurrence frequency
freqlist = {}
inF = open(filePath, "r", encoding="utf-8")
s = inF.read()
inF.close()
s = s.lower()
wordlist = re.split(r"\W", s)
for wd in wordlist:
if wd infreqlist:
freqlist[wd] = freqlist[wd] + 1
else:
freqlist[wd] = 1
for k, v insorted(freqlist.items(), key=operator.itemgetter(1), reverse=True):
print(str(v) + " → " + k)
1605 →
157 → li
154 → span
150 → a
86 → class
81 → html
75 → href
42 → ul
39 → python
38 → h4
28 → comment
24 → name
...
Python 2, Count Word Frequency
# -*- coding: utf-8 -*-
# Python
# count the occurence of words in a file
# prints words with their counts
# to use, save this file as word_freq.py, edit the line filePath, then run “python word_freq.py”
# 2006-10-27 http://xahlee.info/python/python_word_frequency.html
import re, operator
filePath = "/Users/xah/web/xahlee_info/python/python_word_frequency.html"# keys are words, vals are occurrence frequency
freqlist={}
inF = open(filePath,'rb')
s=unicode(inF.read(),'utf-8')
inF.close()
s=s.lower()
wordlist = re.split(r'\W',s);
for wd in wordlist:
# if freqlist.has_key(wd):
if wd infreqlist:
freqlist[wd]=freqlist[wd]+1
else:
freqlist[wd]=1
for k,v insorted(freqlist.items(), key=operator.itemgetter(1) ,reverse=True):
printstr(v) + u" → " + k.encode('utf-8')