Python: Find/Replace Multi-line Strings in a Dir

By Xah Lee. Date: . Last updated: .

Here's a Python script that does find/replace on all HTML files in a dir. The find string can span multiple lines.

Suppose in your HTML file, you want to replace the text:

<body>

<table>

by this text:

<body>
<h1>New Pricing!</h1>
<table>

One solution is to read the file content all at once as a string. Here's the solution:

# -*- coding: utf-8 -*-
# python

# example of find/replace multi-line string for all html files in a dir
# warning: this example does not deal with Unicode encoded files well

import os, sys, shutil

inputDir = "/home/jane/web"

findStr ='''<body>

<table>'''

repStr ='''<body>\n<p>new stuff!</p>\n<table>'''

def replaceStringInFile(findStr, repStr, filePath):
    "replaces all findStr by repStr in file filePath"
    tempName = filePath+'~~'
    backupName = filePath+'~'

    inputFile = open(filePath)
    outputFile = open(tempName, 'w')
  
    textContent = inputFile.read()
    outputFile.write(textContent.replace(findStr, repStr))

    outputFile.close()
    inputFile.close()

    shutil.copy2(filePath, backupName)
    os.rename(tempName, filePath)
    print "file processed: {}".format(filePath)

def filterFile(dummyArg, thisDir, dirChildrenList):
     for thisChild in dirChildrenList:
         if '.html' == os.path.splitext(thisChild)[1] and os.path.isfile(thisDir+'/'+thisChild):
             replaceStringInFile(findStr, repStr, thisDir+'/'+thisChild)

os.path.walk(inputDir, filterFile, None)

Ask me question on patreon