Python: Find Replace in a Dir

By Xah Lee. Date: . Last updated: .

Here's a script that does Find and Replace on all files in a dir.

Features:

  1. Can do whole directory, or limit by dir depth, or by list of files.
  2. Filter files by file extension or regex.
  3. Automatic backup. Can be turned off.
  4. Can have more than 1 find/replace pairs.
  5. Print number of changes for changed files.

For regex version, see Python: Find Replace by Regex

# Python 3

# find and replace mutiple pairs of strings in a dir or list of files

# home page http://xahlee.info/python/find_replace_dir.html

# version 2019-02-14

import os, sys, shutil, re
import datetime

# if this list is not empty, then only these files will be processed
# each must be full path to a file, not dir
file_list = [

]

# must be full path. dir can end with slash or no
INPUT_DIR = "/Users/xah/web/"
MIN_LEVEL = 1 # files and dirs inside INPUT_DIR are level 1.
MAX_LEVEL = 5 # inclusive
FILE_NAME_REGEX = r"\.html$"
PRINT_FILENAME_WHEN_NO_CHANGE = False
BACKUP_FNAME_EXT = '~bk~'
DO_BACKUP = False

# a regex string. any full path that match is skipped
DIRPATH_SKIP_REGEX = r"REC-SVG11-20110816|\
clojure-doc-1.8|\
ocaml_doc|\
css3_spec_bg|\
javascript_ecma-262_5.1_2011|\
python_doc_2.7.6|\
python_doc_3.3.3"

FIND_REPLACE_LIST = [

(
'''<div id="disqus_thread"></div>''',
'<!-- comment_6b83f -->',
),

# more pair here

]

##################################################
# code begin

INPUT_DIR = os.path.normpath(INPUT_DIR)

for x in FIND_REPLACE_LIST:
    if len(x) != 2:
        sys.exit("Error: replacement pair has more than 2 elements. Probably missing a comma.")

def replace_string_in_file(file_path):
    "Replaces find/replace pairs in FIND_REPLACE_LIST in file_path"
    input_file = open(file_path, "r", encoding="utf-8")
    try:
        file_content = input_file.read()
    except UnicodeDecodeError:
        # print("UnicodeDecodeError:{:s}".format(input_file))
        return

    input_file.close()

    num_replaced = 0
    for a_pair in FIND_REPLACE_LIST:
        num_replaced += file_content.count(a_pair[0])
        file_content = file_content.replace(a_pair[0], a_pair[1])

    if num_replaced > 0:
        print("◆ ", num_replaced, " ", file_path.replace(os.sep, "/"))
        if DO_BACKUP:
            backup_fname = file_path + BACKUP_FNAME_EXT
            os.rename(file_path, backup_fname)
        output_file = open(file_path, "w")
        output_file.write(file_content)
        output_file.close()
    else:
        if PRINT_FILENAME_WHEN_NO_CHANGE == True:
            print("no change:", file_path)

##################################################

print(datetime.datetime.now())
print("Input Dir:", INPUT_DIR)
for x in FIND_REPLACE_LIST:
   print("Find string:\n「{}」".format(x[0]))
   print("Replace string:\n「{}」".format(x[1]))
   print("\n")

if (len(file_list) != 0):
   for ff in file_list: replace_string_in_file(os.path.normpath(ff) )
else:
    for dirPath, subdirList, fileList in os.walk(INPUT_DIR):
        curDirLevel = dirPath.count( os.sep) - INPUT_DIR.count( os.sep)
        curFileLevel = curDirLevel + 1
# emacs_manual|\

        if (MIN_LEVEL <= curFileLevel) and (curFileLevel <= MAX_LEVEL) and (not re.search(DIRPATH_SKIP_REGEX, dirPath, re.U)):
            # print (dirPath)
            for fName in fileList:
                if (re.search( FILE_NAME_REGEX, fName, re.U)) and (not (re.search(r"#", fName, re.U))):
                    replace_string_in_file(dirPath + os.sep + fName)
                    # print ("level %d,  %s" % (curFileLevel, os.path.join(dirPath, fName)))

print("Done.")

Sample output:

2019-02-15 21:11:38.944800
Input Dir: /Users/xah/web
Find string:
「actually a file」
Replace string:
「not a string」

◆  1   /Users/xah/web/ergoemacs_org/emacs_manual/elisp/Abbrev-Files.html
◆  1   /Users/xah/web/ergoemacs_org/emacs_manual/emacs/Init-Examples.html
Done.

Python Text Processing

Find Replace Scripts

If you have a question, put $5 at patreon and message me.

Python

Regex

Text Processing

Web

Misc