Following is prettified code using google-code-prettify. Note that botched the line of regex that parses url. (the line as displayed is missing some 60 chars) Also, it damages the HTML code for the whole page, ⁖ the navigation link “∑ Back to google-code-prettify Tutorial.” at the bottom no longer works.
# -*- coding: utf-8 -*-
# Python
# find and replace in a dir by multiple pairs of regex
import os, sys,shutil,re
mydir= '/Users/xah/some'
findreplace = [
(re.compile(ur'''<a href="http://(?P<urlpart>(?:[^.]+\.)?wikipedia.org[^"]+)">http://([\w\.]+)+/(\w+/)+(?P<title>[^<]+)</a>''',re.U|re.M),
ur'''<a href="http://\g<urlpart>">\g<title>↗</a>'''), # wikipedia
# more regex pairs here
]
def replaceStringInFile(filePath):
"replaces all string by a regex substitution"
backupName=filePath+'~re~'
print 'reading:', filePath
input = open(filePath,'rb')
s=unicode(input.read(),'utf-8')
input.close()
numRep=None
for couple in findreplace:
if numRep == None:
numRep = re.search(couple[0],s)
outtext = re.sub(couple[0],couple[1], s)
s=outtext
if numRep:
print ' writing:', filePath
shutil.copy2(filePath,backupName)
outF = open(filePath,'r+b')
outF.read() # we do this way to preserve file creation date
outF.seek(0)
outF.write(outtext.encode('utf-8'))
outF.truncate()
outF.close()
def myfun(dummy, curdir, filess):
for child in filess:
if re.search(r'.+\.html$',child,re.U) and os.path.isfile(curdir+'/'+child):
replaceStringInFile(curdir+'/'+child)
os.path.walk(mydir, myfun, 3)
blog comments powered by Disqus