Python: Convert File Encoding
Convert File Encoding of One File
Here's a script to convert a file's encoding.
path1 = "/Users/xah/xxtest1" path2 = "/Users/xah/xxtest2" coding1 = "utf-8" coding2 = "gb18030" f = open(path1, "r", encoding=coding1) content = f.read() f.close() f = open(path2, "w", encoding=coding2) f.write(content) f.close() print("done")
Here's python 2.
# -*- coding: utf-8 -*- # python 2 path1 = "/Users/xah/web/xahlee_info/python/xxtest" path2 = "/Users/xah/web/xahlee_info/python/xxtest2" coding1 = "utf-8" coding2 = "gb18030" f= open(path1, 'rb') content= unicode(f.read(), coding1 ) f.close() f= open(path2, 'wb') f.write(content.encode(coding2)) f.close() print "done"
Convert File Encoding of All Files in a Dir
Here's a Python program that convert character encoding for all files in a directory.
# -*- coding: utf-8 -*- # python 2 # a script to convert file encoding import os inputDir = '/Users/t/web/p/monkey_king' def changeEncoding(filePath): '''take a full path to a file as input, and change its encoding from gb18030 to utf-16''' print filePath tempName = filePath+'~-~' inputFile = open(filePath,'rb') content = unicode(inputFile.read(),'gb18030') inputFile.close() outputFile = open(tempName,'wb') outputFile.write(content.encode('utf-16')) outputFile.close() os.rename(tempName,filePath) def fileFilter(dummyArg, thisDir, dirChildrenList): for child in dirChildrenList: if '.html' == os.path.splitext(child)[1] and os.path.isfile(thisDir+'/'+child): changeEncoding(thisDir+'/'+child) os.path.walk(inputDir, fileFilter, None)
(thanks to Andrew Clover for help.)