#!/usr/bin/env python3

# This project is licensed under the Blue Oak Model License 1.0.0.
# See: https://blueoakcouncil.org/license/1.0.0

# Version 1.0 : initial release (10may2025)
# texrep : a text replacement utility inspired by version 1.2 of TEXREP,
# a 1995 DOS-based text replacement utility by Stan Peters.

# Nice, simple, effective. I always loved the DOS program and its easy to
# remember syntax. This is a python variant.

# Help Flags: -h  --h  -help  --help  -?  /?

# install:   chmod +x texrep && sudo install texrep /usr/local/bin
# uninstall: sudo rm /usr/local/bin/texrep

# .----------------------------------------------------.
# | Selected ASCII Escape Codes:                       |
# |                                                    |
# | NNN | Character            | Built-in Alternative  |
# | ----|----------------------|---------------------  |
# | 008 | BACKSPACE (BS)       |                       |
# | 009 | HORIZONTAL TAB (TAB) | \TAB                  |
# | 010 | LINE FEED (LF)       | \LF                   |
# | 012 | FORM FEED (FF)       |                       |
# | 013 | CARRIAGE RETURN (CR) | \CR                   |
# | 027 | ESCAPE (ESC)         |                       |
# | 032 | SPACE (SP)           | \SPACE                |
# | 058 | :                    |                       |
# '----------------------------------------------------'

# Tip: if you wish to replace ":7" (ie; colon7) with "TEXT", enter:
#      "\0587:TEXT"

#    : The escape codes above should be wrapped within quotes because
#      bash uses the backslash as an escape character too!

#      example: texrep "\SPACE:\SPACE\SPACE" infile

import sys
import re
import os

VERSION = "1.0"

ESCAPE_MAP = {
    '\\LF': '\n',
    '\\CR': '\r',
    '\\TAB': '\t',
    '\\SPACE': ' ',
}

HELP_FLAGS = {'-h', '--h', '-help', '--help', '-?', '/?'}
VERSION_FLAGS = {'-v', '--v', '-ver', '--ver', '-version', '--version'}

def decode_escape(s):
    for name, char in ESCAPE_MAP.items():
        s = s.replace(name, char)
    return re.sub(r'\\(\d{2,3})', lambda m: chr(int(m.group(1))), s)

def show_help():
    print(f"""
TEXREP v{VERSION} — A Text Replacement Utility - inspired by TEXREP (DOS, 1995)

Usage:  texrep [-i] [-w] old1:new1 [-w] old2:new2 .. infile [-o outfile]
        texrep -h | --help | -v | --version

Each 'old:new' pair is a replacement operation. Replacements are applied in
order. Default output filename: 'infile.NN.ext' (NN is an incremental number).

Flags:
  -i        Case-insensitive matching (global)
  -w        The string must be a fully formed word. Matches next pair only.
  -o FILE   Output filename (default "infile.NN"; infile never overwritten)

Supported Escape Sequences: Use ASCII Value 0-127 (\\NNN) and/or
  (\\LF   Line Feed)       (\\CR     Carriage Return)
  (\\TAB  Tab character)   (\\SPACE  Space character)

Note: Wrap arguments containing backslashes (above) within quotes:

Examples:
  texrep "old1:new1" input.txt      (input.txt never overwritten)
  texrep -i "cat:dog" -w "fire:cold" input.txt
  texrep -h   (show this help)
""")

def parse_args():
    if any(arg in HELP_FLAGS for arg in sys.argv):
        show_help()
        sys.exit(0)
    if any(arg in VERSION_FLAGS for arg in sys.argv):
        print(f"texrep version {VERSION}")
        sys.exit(0)

    if len(sys.argv) < 3:
        show_help()
        sys.exit(0)

    args = sys.argv[1:]
    pairs = []
    case_insensitive = False
    output_file = None
    next_word_only = False
    input_file = None

    i = 0
    while i < len(args):
        arg = args[i]

        if arg == '-i':
            case_insensitive = True
        elif arg == '-w':
            next_word_only = True
        elif arg == '-o':
            if i + 1 < len(args):
                output_file = args[i + 1]
                i += 1
            else:
                print("Error: -o flag must be followed by a filename.\n")
                show_help()
                sys.exit(1)
        elif ':' in arg:
            old, new = arg.split(':', 1)
            flags = {
                'word': next_word_only,
                'insensitive': case_insensitive
            }
            pairs.append((decode_escape(old), decode_escape(new), flags.copy()))
            next_word_only = False
        else:
            if input_file is not None:
                print(f"Error: Multiple input files specified: '{input_file}' and '{arg}'\n")
                show_help()
                sys.exit(1)
            input_file = arg
        i += 1

    if not input_file or not os.path.isfile(input_file):
        print(f"Error: Input file not found: {input_file}\n")
        show_help()
        sys.exit(1)

    return pairs, input_file, output_file

    seen_pairs = set()
    for old, new, flags in pairs:
        # Validate pair content
        if not old:
            print(f"Error: Empty search string in pair '{old}:{new}'", file=sys.stderr)
            sys.exit(1)
        if not new and new != "":  # Explicit empty replacement is allowed
            print(f"Error: Invalid replacement in pair '{old}:{new}'", file=sys.stderr)
            sys.exit(1)
            
        # Check for duplicates
        pair_key = (old.lower() if flags['insensitive'] else old, flags['word'])
        if pair_key in seen_pairs:
            print(f"Warning: Duplicate replacement for '{old}' (case {'in' if flags['insensitive'] else ''}sensitive)", file=sys.stderr)
        seen_pairs.add(pair_key)

def process_file(pairs, infile, outfile=None):
    # Check read permissions
    if not os.access(infile, os.R_OK):
        print(f"Error: Cannot read input file '{infile}'", file=sys.stderr)
        sys.exit(1)
        
    # Check binary file
    try:
        with open(infile, 'r', encoding='utf-8') as f:
            f.read(1024)  # Sample first KB
    except UnicodeDecodeError:
        print(f"Error: File '{infile}' appears to be binary (not UTF-8 text)", file=sys.stderr)
        sys.exit(1)
    
    # Check write permissions for output directory
    outfile = outfile or get_incremented_filename(infile)
    outdir = os.path.dirname(outfile) or '.'
    if not os.access(outdir, os.W_OK):
        print(f"Error: Cannot write to output directory '{outdir}'", file=sys.stderr)
        sys.exit(1)

def apply_replacements(text, pairs):
    for old, new, flags in pairs:
        pattern = re.escape(old)
        if flags['word']:
            pattern = r'\b' + pattern + r'\b'
        if flags['insensitive']:
            text = re.sub(pattern, new, text, flags=re.IGNORECASE)
        else:
            text = re.sub(pattern, new, text)
    return text

def get_incremented_filename(filename):
    """
    Generates an incremented filename in the format: filename.NN.ext
    Example:
        input.txt -> input.01.txt
        input.01.txt -> input.02.txt
        data.json -> data.01.json
    """
    base, ext = os.path.splitext(filename)
    
    # Handle existing numbered files (e.g., input.01.txt)
    base_parts = base.rsplit('.', 1)
    if len(base_parts) > 1 and base_parts[-1].isdigit():
        base = base_parts[0]  # Strip existing number
        ext = f".{base_parts[-1]}{ext}"  # Preserve original extension
    
    counter = 1
    while True:
        # New format: filename.01.ext
        candidate = f"{base}.{counter:02d}{ext}"
        if not os.path.exists(candidate):
            return candidate
        counter += 1
        if counter > 99:  # Safety limit
            raise ValueError(f"Too many numbered versions exist for {filename}")

def process_file(pairs, infile, outfile=None):
    """Process file with replacements and safety checks"""
    # --- Input Validation ---
    if not os.path.exists(infile):
        print(f"Error: Input file not found: '{infile}'", file=sys.stderr)
        sys.exit(1)

    # Check read permissions
    if not os.access(infile, os.R_OK):
        print(f"Error: No read permission for input file: '{infile}'", file=sys.stderr)
        sys.exit(1)

    # --- Binary File Detection ---
    try:
        with open(infile, 'r', encoding='utf-8') as f:
            f.read(1024)  # Sample first KB for text validation
    except UnicodeDecodeError:
        print(f"Error: File '{infile}' appears to be binary (not UTF-8 text)", file=sys.stderr)
        sys.exit(1)

    # --- File Size Warning ---
    filesize = os.path.getsize(infile)
    if filesize > 10 * 1024 * 1024:  # 10MB
        print(f"Warning: Processing large file ({filesize/1024/1024:.1f} MB)", file=sys.stderr)

    # --- Output File Safety ---
    if outfile:
        # Prevent input=output catastrophe
        if os.path.exists(outfile) and os.path.samefile(infile, outfile):
            print("Error: Input and output files are identical", file=sys.stderr)
            sys.exit(1)
        
        # Check output directory exists
        outdir = os.path.dirname(outfile) or '.'
        if not os.path.exists(outdir):
            print(f"Error: Output directory doesn't exist: '{outdir}'", file=sys.stderr)
            sys.exit(1)
    else:
        outfile = get_incremented_filename(infile)

    # Check write permissions
    if not os.access(os.path.dirname(outfile) or '.', os.W_OK):
        print(f"Error: No write permission for output location: '{outfile}'", file=sys.stderr)
        sys.exit(1)

    # --- Actual File Processing ---
    try:
        with open(infile, 'r', encoding='utf-8') as f:
            text = f.read()

        result = apply_replacements(text, pairs)

        with open(outfile, 'w', encoding='utf-8') as f:
            f.write(result)

        print(f"Success: Output written to '{outfile}'")
        return True

    except IOError as e:
        print(f"Error: File operation failed - {str(e)}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"Unexpected error: {str(e)}", file=sys.stderr)
        sys.exit(1)

def main():
    pairs, infile, outfile = parse_args()
    process_file(pairs, infile, outfile)

if __name__ == "__main__":
    main()