# count the occurence of words in a file
# prints words with their counts
# to use, save this file as word_freq.py, edit the line filePath, then run βpython word_freq.pyβ
# 2006-10-27 http://xahlee.info/python/python_word_frequency.html
import re, operator, os
filePath = os.path.expanduser("~/web/xahlee_info/python/python_word_frequency.html")
# keys are words, vals are occurrence frequency
freqlist = {}
inF = open(filePath, "r", encoding="utf-8")
s = inF.read()
inF.close()
s = s.lower()
wordlist = re.split(r"\W", s)
for wd in wordlist:
if wd infreqlist:
freqlist[wd] = freqlist[wd] + 1
else:
freqlist[wd] = 1
for k, v insorted(freqlist.items(), key=operator.itemgetter(1), reverse=True):
print(str(v) + " β " + k)
1605 β
157 β li
154 β span
150 β a
86 β class
81 β html
75 β href
42 β ul
39 β python
38 β h4
28 β comment
24 β name
...
Python 2, Count Word Frequency
# -*- coding: utf-8 -*-
# Python
# count the occurence of words in a file
# prints words with their counts
# to use, save this file as word_freq.py, edit the line filePath, then run βpython word_freq.pyβ
# 2006-10-27 http://xahlee.info/python/python_word_frequency.html
import re, operator
filePath = "/Users/xah/web/xahlee_info/python/python_word_frequency.html"# keys are words, vals are occurrence frequency
freqlist={}
inF = open(filePath,'rb')
s=unicode(inF.read(),'utf-8')
inF.close()
s=s.lower()
wordlist = re.split(r'\W',s);
for wd in wordlist:
# if freqlist.has_key(wd):
if wd infreqlist:
freqlist[wd]=freqlist[wd]+1
else:
freqlist[wd]=1
for k,v insorted(freqlist.items(), key=operator.itemgetter(1) ,reverse=True):
printstr(v) + u" β " + k.encode('utf-8')