This page shows Python scripts for doing find/replace on Unicode encoded files.
Here's how one'd do it for a file encoded with UTF-16.
# -*- coding: utf-8 -*- # python # find and replace many pairs of strings in sequence in a utf-16 file filePath = "/Users/t/web/p/x/x001.html" outFile = filePath + "~-~" findreplace = [ (u"<title>西游记</title>", u"<title>西游记 (Monkey King)</title>"), # more pair here ] inF = open(filePath, "rb") fContent = unicode(inF.read(), "utf-16") inF.close() for couple in findreplace: outText = fContent.replace(couple[0], couple[1]) fContent = outText outF = open(outFile, "wb") outF.write(outText.encode("utf-16")) outF.close()
Here's a script that does multi-pair find & replace for all HTML files in a dir, assuming the encoding is UTF-8.
# -*- coding: utf-8 -*- # python import os,sys,shutil mydir= '/Users/t/web/p/xyz' findreplace = [ ('find1','replace1'), ('find2','replace2'), ] def replaceStringInFile(filePath): "replaces all findStr by repStr in file filePath" print filePath tempName=filePath+'~x~' backupName=filePath+'~~' inF = open(filePath,'rb') s=unicode(inF.read(),'utf-8') inF.close() for couple in findreplace: outtext=s.replace(couple[0],couple[1]) s=outtext outF = open(tempName,'wb') outF.write(outtext.encode('utf-8')) outF.close() shutil.copy2(filePath,backupName) os.remove(filePath) os.rename(tempName,filePath) def myfun(dummy, thisDir, dirChildrenList): for child in dirChildrenList: if '.html' == os.path.splitext(child)[1] and os.path.isfile(thisDir+'/'+child): replaceStringInFile(thisDir+'/'+child) print child os.path.walk(mydir, myfun, 'dummy')
See also: Perl: Find/Replace on Multiple Files.