When is file automation useful?

For repetitive tasks: bulk cleanup, backups, renames, format conversion, and log rotation.

pathlib is more modern and readable. Prefer pathlib for new projects.

A high-level module for copying, moving, and removing files and directories.

[2026] Python File Automation | Organize, Rename, and Back Up Files

2026년 3월 28일 · 18분 읽기 · 수정 2026년 3월 28일 Intermediate Tutorial

이 글의 핵심

Automate file workflows in Python: find and rename files, organize by extension, backups with shutil, duplicate detection, and log cleanup—patterns and code you can reuse.

Introduction

“Automate the repetitive work”

Automating file operations in Python saves a lot of time in real workflows.

1. Finding files

Files with a given extension

아래 코드는 python를 사용한 구현 예제입니다. 필요한 모듈을 import하고, 함수를 통해 로직을 구현합니다. 각 부분의 역할을 이해하면서 코드를 살펴보시기 바랍니다.

from pathlib import Path
def find_files(directory, extension):
    """Find files with a specific extension."""
    path = Path(directory)
    return list(path.glob(f'**/*.{extension}'))
# Usage
pdf_files = find_files('.', 'pdf')
for file in pdf_files:
    print(file)

Conditional search

다음은 python를 활용한 상세한 구현 코드입니다. 필요한 모듈을 import하고, 함수를 통해 로직을 구현합니다. 각 부분의 역할을 이해하면서 코드를 살펴보시기 바랍니다.

import os
from datetime import datetime, timedelta
def find_old_files(directory, days=30):
    """Find files older than N days."""
    cutoff = datetime.now() - timedelta(days=days)
    old_files = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            filepath = Path(root) / file
            mtime = datetime.fromtimestamp(filepath.stat().st_mtime)
            
            if mtime < cutoff:
                old_files.append(filepath)
    
    return old_files
# Usage
old_files = find_old_files('.', days=90)
print(f"{len(old_files)} old file(s)")

2. Renaming files

Batch rename

from pathlib import Path
def rename_files(directory, old_pattern, new_pattern):
    """Batch rename files in a directory."""
    path = Path(directory)
    
    for file in path.glob('*'):
        if old_pattern in file.name:
            new_name = file.name.replace(old_pattern, new_pattern)
            file.rename(file.parent / new_name)
            print(f"{file.name} → {new_name}")
# Usage
rename_files('.', 'old_', 'new_')

Adding sequence numbers

아래 코드는 python를 사용한 구현 예제입니다. 함수를 통해 로직을 구현합니다. 각 부분의 역할을 이해하면서 코드를 살펴보시기 바랍니다.

def add_numbers(directory, extension):
    """Prefix files with a zero-padded sequence number."""
    path = Path(directory)
    files = sorted(path.glob(f'*.{extension}'))
    
    for i, file in enumerate(files, 1):
        new_name = f"{i:03d}_{file.name}"
        file.rename(file.parent / new_name)
        print(f"{file.name} → {new_name}")
# Usage
add_numbers('./images', 'jpg')
# photo.jpg → 001_photo.jpg

3. Organizing files

Sort into folders by extension

import shutil
from pathlib import Path
def organize_files(directory):
    """Move files into subfolders named by extension."""
    path = Path(directory)
    
    for file in path.iterdir():
        if file.is_file():
            # Extension without dot
            ext = file.suffix[1:]  # .jpg → jpg
            
            if ext:
                # Create folder
                target_dir = path / ext
                target_dir.mkdir(exist_ok=True)
                
                # Move file
                shutil.move(str(file), str(target_dir / file.name))
                print(f"{file.name} → {ext}/")
# Usage
organize_files('./downloads')

4. Automated backups

Backup script

import shutil
from pathlib import Path
from datetime import datetime
def backup_directory(source, backup_root):
    """Back up a directory tree."""
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_name = f"backup_{timestamp}"
    backup_path = Path(backup_root) / backup_name
    
    # Copy tree
    shutil.copytree(source, backup_path)
    print(f"Backup done: {backup_path}")
    
    # Zip
    shutil.make_archive(str(backup_path), 'zip', backup_path)
    shutil.rmtree(backup_path)  # remove unzipped folder
    print(f"Archive created: {backup_path}.zip")
# Usage
backup_directory('./project', './backups')

Pruning old backups

아래 코드는 python를 사용한 구현 예제입니다. 함수를 통해 로직을 구현합니다. 각 부분의 역할을 이해하면서 코드를 살펴보시기 바랍니다.

def cleanup_old_backups(backup_dir, keep_count=5):
    """Keep only the N most recent backups."""
    path = Path(backup_dir)
    backups = sorted(path.glob('backup_*.zip'), key=lambda x: x.stat().st_mtime)
    
    for backup in backups[:-keep_count]:
        backup.unlink()
        print(f"Deleted: {backup.name}")
# Usage
cleanup_old_backups('./backups', keep_count=5)

5. Finding duplicates

Hash-based duplicate detection

import hashlib
from collections import defaultdict
def find_duplicates(directory):
    """Find duplicate files using MD5 hashes."""
    hashes = defaultdict(list)
    
    for file in Path(directory).rglob('*'):
        if file.is_file():
            with open(file, 'rb') as f:
                file_hash = hashlib.md5(f.read()).hexdigest()
            hashes[file_hash].append(file)
    
    duplicates = {h: files for h, files in hashes.items() if len(files) > 1}
    
    for hash_val, files in duplicates.items():
        print(f"\nDuplicate group ({hash_val[:8]}...):")
        for file in files:
            print(f"  - {file}")
    
    return duplicates
# Usage
duplicates = find_duplicates('./documents')

6. Real-world example

Log cleanup script

from pathlib import Path
import gzip
from datetime import datetime, timedelta
def cleanup_logs(log_dir, archive_days=7, delete_days=30):
    """
    Log maintenance:
    - Older than archive_days: gzip
    - Older than delete_days: delete
    """
    path = Path(log_dir)
    now = datetime.now()
    
    for log_file in path.glob('*.log'):
        mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
        age = (now - mtime).days
        
        if age >= delete_days:
            log_file.unlink()
            print(f"Deleted: {log_file.name} ({age} days)")
        
        elif age >= archive_days:
            gz_path = log_file.with_suffix('.log.gz')
            
            with open(log_file, 'rb') as f_in:
                with gzip.open(gz_path, 'wb') as f_out:
                    f_out.writelines(f_in)
            
            log_file.unlink()
            print(f"Compressed: {log_file.name} → {gz_path.name}")
# Usage
cleanup_logs('./logs', archive_days=7, delete_days=30)

Practical tips

File automation checklist

다음은 python를 활용한 상세한 구현 코드입니다. 필요한 모듈을 import하고, 에러 처리를 통해 안정성을 확보합니다. 각 부분의 역할을 이해하면서 코드를 살펴보시기 바랍니다.

# ✅ Safer file operations
# 1. Back up first
# 2. Dry-run mode (preview before destructive steps)
# 3. Logging
# ✅ Error handling
try:
    shutil.move(src, dst)
except PermissionError:
    print("Permission denied")
except FileNotFoundError:
    print("File not found")
# ✅ Progress feedback
from tqdm import tqdm
for file in tqdm(files, desc="Processing"):
    process(file)

Summary

Key takeaways

Finding files: glob, rglob
Renaming: rename()
Moving files: shutil.move()
Backups: copytree(), make_archive()
Duplicates: compare hashes

Next steps

Python file handling | Read, write, CSV, JSON