#!/usr/bin/env python3 # This project is licensed under the Blue Oak Model License 1.0.0. # See: https://blueoakcouncil.org/license/1.0.0 # Version 1.0 : initial release (10may2025) # texrep : a text replacement utility inspired by version 1.2 of TEXREP, # a 1995 DOS-based text replacement utility by Stan Peters. # Nice, simple, effective. I always loved the DOS program and its easy to # remember syntax. This is a python variant. # Help Flags: -h --h -help --help -? /? # install: chmod +x texrep && sudo install texrep /usr/local/bin # uninstall: sudo rm /usr/local/bin/texrep # .----------------------------------------------------. # | Selected ASCII Escape Codes: | # | | # | NNN | Character | Built-in Alternative | # | ----|----------------------|--------------------- | # | 008 | BACKSPACE (BS) | | # | 009 | HORIZONTAL TAB (TAB) | \TAB | # | 010 | LINE FEED (LF) | \LF | # | 012 | FORM FEED (FF) | | # | 013 | CARRIAGE RETURN (CR) | \CR | # | 027 | ESCAPE (ESC) | | # | 032 | SPACE (SP) | \SPACE | # | 058 | : | | # '----------------------------------------------------' # Tip: if you wish to replace ":7" (ie; colon7) with "TEXT", enter: # "\0587:TEXT" # : The escape codes above should be wrapped within quotes because # bash uses the backslash as an escape character too! # example: texrep "\SPACE:\SPACE\SPACE" infile import sys import re import os VERSION = "1.0" ESCAPE_MAP = { '\\LF': '\n', '\\CR': '\r', '\\TAB': '\t', '\\SPACE': ' ', } HELP_FLAGS = {'-h', '--h', '-help', '--help', '-?', '/?'} VERSION_FLAGS = {'-v', '--v', '-ver', '--ver', '-version', '--version'} def decode_escape(s): for name, char in ESCAPE_MAP.items(): s = s.replace(name, char) return re.sub(r'\\(\d{2,3})', lambda m: chr(int(m.group(1))), s) def show_help(): print(f""" TEXREP v{VERSION} — A Text Replacement Utility - inspired by TEXREP (DOS, 1995) Usage: texrep [-i] [-w] old1:new1 [-w] old2:new2 .. infile [-o outfile] texrep -h | --help | -v | --version Each 'old:new' pair is a replacement operation. Replacements are applied in order. Default output filename: 'infile.NN.ext' (NN is an incremental number). Flags: -i Case-insensitive matching (global) -w The string must be a fully formed word. Matches next pair only. -o FILE Output filename (default "infile.NN"; infile never overwritten) Supported Escape Sequences: Use ASCII Value 0-127 (\\NNN) and/or (\\LF Line Feed) (\\CR Carriage Return) (\\TAB Tab character) (\\SPACE Space character) Note: Wrap arguments containing backslashes (above) within quotes: Examples: texrep "old1:new1" input.txt (input.txt never overwritten) texrep -i "cat:dog" -w "fire:cold" input.txt texrep -h (show this help) """) def parse_args(): if any(arg in HELP_FLAGS for arg in sys.argv): show_help() sys.exit(0) if any(arg in VERSION_FLAGS for arg in sys.argv): print(f"texrep version {VERSION}") sys.exit(0) if len(sys.argv) < 3: show_help() sys.exit(0) args = sys.argv[1:] pairs = [] case_insensitive = False output_file = None next_word_only = False input_file = None i = 0 while i < len(args): arg = args[i] if arg == '-i': case_insensitive = True elif arg == '-w': next_word_only = True elif arg == '-o': if i + 1 < len(args): output_file = args[i + 1] i += 1 else: print("Error: -o flag must be followed by a filename.\n") show_help() sys.exit(1) elif ':' in arg: old, new = arg.split(':', 1) flags = { 'word': next_word_only, 'insensitive': case_insensitive } pairs.append((decode_escape(old), decode_escape(new), flags.copy())) next_word_only = False else: if input_file is not None: print(f"Error: Multiple input files specified: '{input_file}' and '{arg}'\n") show_help() sys.exit(1) input_file = arg i += 1 if not input_file or not os.path.isfile(input_file): print(f"Error: Input file not found: {input_file}\n") show_help() sys.exit(1) return pairs, input_file, output_file seen_pairs = set() for old, new, flags in pairs: # Validate pair content if not old: print(f"Error: Empty search string in pair '{old}:{new}'", file=sys.stderr) sys.exit(1) if not new and new != "": # Explicit empty replacement is allowed print(f"Error: Invalid replacement in pair '{old}:{new}'", file=sys.stderr) sys.exit(1) # Check for duplicates pair_key = (old.lower() if flags['insensitive'] else old, flags['word']) if pair_key in seen_pairs: print(f"Warning: Duplicate replacement for '{old}' (case {'in' if flags['insensitive'] else ''}sensitive)", file=sys.stderr) seen_pairs.add(pair_key) def process_file(pairs, infile, outfile=None): # Check read permissions if not os.access(infile, os.R_OK): print(f"Error: Cannot read input file '{infile}'", file=sys.stderr) sys.exit(1) # Check binary file try: with open(infile, 'r', encoding='utf-8') as f: f.read(1024) # Sample first KB except UnicodeDecodeError: print(f"Error: File '{infile}' appears to be binary (not UTF-8 text)", file=sys.stderr) sys.exit(1) # Check write permissions for output directory outfile = outfile or get_incremented_filename(infile) outdir = os.path.dirname(outfile) or '.' if not os.access(outdir, os.W_OK): print(f"Error: Cannot write to output directory '{outdir}'", file=sys.stderr) sys.exit(1) def apply_replacements(text, pairs): for old, new, flags in pairs: pattern = re.escape(old) if flags['word']: pattern = r'\b' + pattern + r'\b' if flags['insensitive']: text = re.sub(pattern, new, text, flags=re.IGNORECASE) else: text = re.sub(pattern, new, text) return text def get_incremented_filename(filename): """ Generates an incremented filename in the format: filename.NN.ext Example: input.txt -> input.01.txt input.01.txt -> input.02.txt data.json -> data.01.json """ base, ext = os.path.splitext(filename) # Handle existing numbered files (e.g., input.01.txt) base_parts = base.rsplit('.', 1) if len(base_parts) > 1 and base_parts[-1].isdigit(): base = base_parts[0] # Strip existing number ext = f".{base_parts[-1]}{ext}" # Preserve original extension counter = 1 while True: # New format: filename.01.ext candidate = f"{base}.{counter:02d}{ext}" if not os.path.exists(candidate): return candidate counter += 1 if counter > 99: # Safety limit raise ValueError(f"Too many numbered versions exist for {filename}") def process_file(pairs, infile, outfile=None): """Process file with replacements and safety checks""" # --- Input Validation --- if not os.path.exists(infile): print(f"Error: Input file not found: '{infile}'", file=sys.stderr) sys.exit(1) # Check read permissions if not os.access(infile, os.R_OK): print(f"Error: No read permission for input file: '{infile}'", file=sys.stderr) sys.exit(1) # --- Binary File Detection --- try: with open(infile, 'r', encoding='utf-8') as f: f.read(1024) # Sample first KB for text validation except UnicodeDecodeError: print(f"Error: File '{infile}' appears to be binary (not UTF-8 text)", file=sys.stderr) sys.exit(1) # --- File Size Warning --- filesize = os.path.getsize(infile) if filesize > 10 * 1024 * 1024: # 10MB print(f"Warning: Processing large file ({filesize/1024/1024:.1f} MB)", file=sys.stderr) # --- Output File Safety --- if outfile: # Prevent input=output catastrophe if os.path.exists(outfile) and os.path.samefile(infile, outfile): print("Error: Input and output files are identical", file=sys.stderr) sys.exit(1) # Check output directory exists outdir = os.path.dirname(outfile) or '.' if not os.path.exists(outdir): print(f"Error: Output directory doesn't exist: '{outdir}'", file=sys.stderr) sys.exit(1) else: outfile = get_incremented_filename(infile) # Check write permissions if not os.access(os.path.dirname(outfile) or '.', os.W_OK): print(f"Error: No write permission for output location: '{outfile}'", file=sys.stderr) sys.exit(1) # --- Actual File Processing --- try: with open(infile, 'r', encoding='utf-8') as f: text = f.read() result = apply_replacements(text, pairs) with open(outfile, 'w', encoding='utf-8') as f: f.write(result) print(f"Success: Output written to '{outfile}'") return True except IOError as e: print(f"Error: File operation failed - {str(e)}", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Unexpected error: {str(e)}", file=sys.stderr) sys.exit(1) def main(): pairs, infile, outfile = parse_args() process_file(pairs, infile, outfile) if __name__ == "__main__": main()