Python: Convert File Encoding

By Xah Lee. Date: . Last updated: .

Convert File Encoding of One File

Here's a script to convert a file's encoding.

path1 = "/Users/xah/xxtest1"
path2 = "/Users/xah/xxtest2"

coding1 = "utf-8"
coding2 = "gb18030"

f = open(path1, "r", encoding=coding1)
content = f.read()
f.close()
f = open(path2, "w", encoding=coding2)
f.write(content)
f.close()

print("done")

Here's python 2.

# -*- coding: utf-8 -*-
# python 2

path1 = "/Users/xah/web/xahlee_info/python/xxtest"
path2 = "/Users/xah/web/xahlee_info/python/xxtest2"

coding1 = "utf-8"
coding2 = "gb18030"

f= open(path1, 'rb')
content= unicode(f.read(), coding1 )
f.close()
f= open(path2, 'wb')
f.write(content.encode(coding2))
f.close()

print "done"

Convert File Encoding of All Files in a Dir

Here's a Python program that convert character encoding for all files in a directory.

# -*- coding: utf-8 -*-
# python 2

# a script to convert file encoding

import os

inputDir = '/Users/t/web/p/monkey_king'

def changeEncoding(filePath):
    '''take a full path to a file as input, and change its encoding from gb18030 to utf-16'''
    print filePath

    tempName = filePath+'~-~'

    inputFile = open(filePath,'rb')
    content = unicode(inputFile.read(),'gb18030')
    inputFile.close()

    outputFile = open(tempName,'wb')
    outputFile.write(content.encode('utf-16'))
    outputFile.close()

    os.rename(tempName,filePath)

def fileFilter(dummyArg, thisDir, dirChildrenList):
    for child in dirChildrenList:
        if '.html' == os.path.splitext(child)[1] and os.path.isfile(thisDir+'/'+child):
            changeEncoding(thisDir+'/'+child)
os.path.walk(inputDir, fileFilter, None)

(thanks to Andrew Clover for help.)

Convert File Encoding

Python, Unicode