Python: Find Replace in a Dir

By Xah Lee. Date: . Last updated: .

Here's a script that does Find and Replace on all files in a dir.

Features:

For regex version, see Python: Find Replace by Regex

# -*- coding: utf-8 -*-
# Python 3

# file name: xah_replace.py3
# find and replace mutiple pairs of strings in a dir or list of files
# home page http://xahlee.info/python/find_replace_dir.html
# version 2005-01-30 2019-02-14 2022-01-11

import os, sys, shutil, re
import datetime

# if this list is not empty, then only these files will be processed
# each must be full path to a file, not dir
FILE_LIST = [
]

# must be full path. dir can end with slash or no
INPUT_DIR = "c:/Users/xah/web/xahlee_info/xx/"

# files and dirs inside INPUT_DIR are level 1.
MIN_LEVEL = 1

# inclusive
MAX_LEVEL = 9

FILE_NAME_REGEX = r"\.html$"
PRINT_FILENAME_WHEN_NO_CHANGE = False
BACKUP_FNAME_EXT = '~bk~'
DO_BACKUP = True

FIND_REPLACE_LIST = [
(
'''commands''',
'''commands''',
),
# more pair here
]

# a regex string. any full path that match is skipped
DIRPATH_SKIP_REGEX = r"emacs_manual|\
REC-SVG11-20110816|\
node_api"

#HHH___________________________________________________________________
# code begin

DATETIME_STR = datetime.datetime.now() .strftime("%Y%m%d_%H%M%S")

INPUT_DIR = os.path.normpath(INPUT_DIR)

for x in FIND_REPLACE_LIST:
    if len(x) != 2:
        sys.exit("Error: replacement pair has more than 2 elements. Probably missing a comma.")

def replace_string_in_file(file_path):
    "Replaces find/replace pairs in FIND_REPLACE_LIST in file_path"
    input_file = open(file_path, "r", encoding="utf-8")
    try:
        file_content = input_file.read()
    except UnicodeDecodeError:
        # print("UnicodeDecodeError:{:s}".format(input_file))
        return

    input_file.close()

    global DATETIME_STR
    num_replaced = 0
    for a_pair in FIND_REPLACE_LIST:
        num_replaced += file_content.count(a_pair[0])
        file_content = file_content.replace(a_pair[0], a_pair[1])

    if num_replaced > 0:
        print( num_replaced, file_path.replace(os.sep, "/"))

        if DO_BACKUP:
            backup_fname = file_path + "~" + DATETIME_STR + BACKUP_FNAME_EXT
            # os.rename(file_path, backup_fname)
            shutil.copy2(file_path, backup_fname)
        output_file = open(file_path, "w", encoding="utf-8")
        output_file.write(file_content)
        output_file.close()
    else:
        if PRINT_FILENAME_WHEN_NO_CHANGE == True:
            print("no change:", file_path)

#HHH___________________________________________________________________

print( "-*- coding: utf-8; mode: xah-find-output -*-")
print("date time:", DATETIME_STR)
print("python intepreter:", sys.executable)
print("python version:", sys.version)
print("Result of:", os.path.realpath(__file__))
print("INPUT_DIR:", INPUT_DIR)
print("FILE_NAME_REGEX:",  FILE_NAME_REGEX)
# Write to file: nil
print("DO_BACKUP:" + str(DO_BACKUP) )

for x in FIND_REPLACE_LIST:
   print("Find string:\n{}".format(x[0]))
   print("Replace string:\n{}".format(x[1]))
   print("\n")

print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" )

if (len(FILE_LIST) != 0):
   for ff in FILE_LIST: replace_string_in_file(os.path.normpath(ff) )
else:
    for dirPath, subdirList, fileList in os.walk(INPUT_DIR):
        curDirLevel = dirPath.count( os.sep) - INPUT_DIR.count( os.sep)
        curFileLevel = curDirLevel + 1
# emacs_manual|\

        if (MIN_LEVEL <= curFileLevel) and (curFileLevel <= MAX_LEVEL) and (not re.search(DIRPATH_SKIP_REGEX, dirPath, re.U)):
            # print (dirPath)
            for fName in fileList:
                if (re.search( FILE_NAME_REGEX, fName, re.U)) and (not (re.search(r"#", fName, re.U))):
                    replace_string_in_file(dirPath + os.sep + fName)
                    # print ("level %d,  %s" % (curFileLevel, os.path.join(dirPath, fName)))

print("Done.")

Here's a screenshot running in emacs.

xah python replace 2020-05-23 vcknj
xah python replace 2020-05-23

Python Text Processing

Find Replace Scripts

Python

Regex

Text Processing

Web

Misc