Python: Count Word Frequency

By Xah Lee. Date: . Last updated: .

Python 3, Count Word Frequency

# count the occurence of words in a file
# prints words with their counts

# to use, save this file as word_freq.py, edit the line filePath, then run “python word_freq.py”

# 2006-10-27 http://xahlee.info/python/python_word_frequency.html

import re, operator, os

filePath = os.path.expanduser("~/web/xahlee_info/python/python_word_frequency.html")

# keys are words, vals are occurrence frequency
freqlist = {}

inF = open(filePath, "r", encoding="utf-8")
s = inF.read()
inF.close()

s = s.lower()

wordlist = re.split(r"\W", s)
for wd in wordlist:
    if wd in freqlist:
        freqlist[wd] = freqlist[wd] + 1
    else:
        freqlist[wd] = 1

for k, v in sorted(freqlist.items(), key=operator.itemgetter(1), reverse=True):
    print(str(v) + " → " + k)
1605 →
157 → li
154 → span
150 → a
86 → class
81 → html
75 → href
42 → ul
39 → python
38 → h4
28 → comment
24 → name
...

Python 2, Count Word Frequency

# -*- coding: utf-8 -*-
# Python

# count the occurence of words in a file
# prints words with their counts

# to use, save this file as word_freq.py, edit the line filePath, then run “python word_freq.py”

# 2006-10-27 http://xahlee.info/python/python_word_frequency.html

import re, operator

filePath = "/Users/xah/web/xahlee_info/python/python_word_frequency.html"

# keys are words, vals are occurrence frequency
freqlist={}

inF = open(filePath,'rb')
s=unicode(inF.read(),'utf-8')
inF.close()

s=s.lower()

wordlist = re.split(r'\W',s);
for wd in wordlist:
     # if freqlist.has_key(wd):
     if wd in freqlist:
         freqlist[wd]=freqlist[wd]+1
     else:
         freqlist[wd]=1

for k,v in sorted(freqlist.items(), key=operator.itemgetter(1) ,reverse=True):
    print str(v) + u" → " + k.encode('utf-8')

Python, Read Write File