Python: Find Replace in a Dir

By Xah Lee. Date: . Last updated: .

Here's a script that does Find and Replace on all files in a dir.

Features:

For regex version, see Python: Find Replace by Regex

# Python 3

# find and replace mutiple pairs of strings in a dir or list of files

# home page http://xahlee.info/python/find_replace_dir.html

# version 2019-02-14

import os, sys, shutil, re
import datetime

# if this list is not empty, then only these files will be processed
# each must be full path to a file, not dir
file_list = [

]

# must be full path. dir can end with slash or no
INPUT_DIR = "/Users/xah/web/"
MIN_LEVEL = 1 # files and dirs inside INPUT_DIR are level 1.
MAX_LEVEL = 5 # inclusive
FILE_NAME_REGEX = r"\.html$"
PRINT_FILENAME_WHEN_NO_CHANGE = False
BACKUP_FNAME_EXT = '~bk~'
DO_BACKUP = False

# a regex string. any full path that match is skipped
DIRPATH_SKIP_REGEX = r"REC-SVG11-20110816|\
clojure-doc-1.8|\
ocaml_doc|\
css3_spec_bg|\
javascript_ecma-262_5.1_2011|\
python_doc_2.7.6|\
python_doc_3.3.3"

FIND_REPLACE_LIST = [

(
'''<div id="x44506"></div>''',
'<!-- comment_6b83f -->',
),

# more pair here

]

##################################################
# code begin

INPUT_DIR = os.path.normpath(INPUT_DIR)

for x in FIND_REPLACE_LIST:
    if len(x) != 2:
        sys.exit("Error: replacement pair has more than 2 elements. Probably missing a comma.")

def replace_string_in_file(file_path):
    "Replaces find/replace pairs in FIND_REPLACE_LIST in file_path"
    input_file = open(file_path, "r", encoding="utf-8")
    try:
        file_content = input_file.read()
    except UnicodeDecodeError:
        # print("UnicodeDecodeError:{:s}".format(input_file))
        return

    input_file.close()

    num_replaced = 0
    for a_pair in FIND_REPLACE_LIST:
        num_replaced += file_content.count(a_pair[0])
        file_content = file_content.replace(a_pair[0], a_pair[1])

    if num_replaced > 0:
        print("◆ ", num_replaced, " ", file_path.replace(os.sep, "/"))
        if DO_BACKUP:
            backup_fname = file_path + BACKUP_FNAME_EXT
            os.rename(file_path, backup_fname)
        output_file = open(file_path, "w")
        output_file.write(file_content)
        output_file.close()
    else:
        if PRINT_FILENAME_WHEN_NO_CHANGE == True:
            print("no change:", file_path)

##################################################

print(datetime.datetime.now())
print("Input Dir:", INPUT_DIR)
for x in FIND_REPLACE_LIST:
   print("Find string:\n「{}」".format(x[0]))
   print("Replace string:\n「{}」".format(x[1]))
   print("\n")

if (len(file_list) != 0):
   for ff in file_list: replace_string_in_file(os.path.normpath(ff) )
else:
    for dirPath, subdirList, fileList in os.walk(INPUT_DIR):
        curDirLevel = dirPath.count( os.sep) - INPUT_DIR.count( os.sep)
        curFileLevel = curDirLevel + 1
# emacs_manual|\

        if (MIN_LEVEL <= curFileLevel) and (curFileLevel <= MAX_LEVEL) and (not re.search(DIRPATH_SKIP_REGEX, dirPath, re.U)):
            # print (dirPath)
            for fName in fileList:
                if (re.search( FILE_NAME_REGEX, fName, re.U)) and (not (re.search(r"#", fName, re.U))):
                    replace_string_in_file(dirPath + os.sep + fName)
                    # print ("level %d,  %s" % (curFileLevel, os.path.join(dirPath, fName)))

print("Done.")

Sample output:

2019-02-15 21:11:38.944800
Input Dir: /Users/xah/web
Find string:
「actually a file」
Replace string:
「not a string」

◆  1   /Users/xah/web/ergoemacs_org/emacs_manual/elisp/Abbrev-Files.html
◆  1   /Users/xah/web/ergoemacs_org/emacs_manual/emacs/Init-Examples.html
Done.

Here's a screenshot running in emacs.

xah python replace 2020-05-23 vcknj
xah python replace 2020-05-23

Python Text Processing

Find Replace Scripts

If you have a question, put $5 at patreon and message me.

Python

Regex

Text Processing

Web

Misc