mirror of
https://github.com/serty2005/rmser.git
synced 2026-02-04 19:02:33 -06:00
279 lines
8.4 KiB
Python
279 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
pack_project_dump.py
|
||
|
||
Упаковывает код проекта в один текстовый файл, удобный для анализа:
|
||
- дерево файлов
|
||
- затем содержимое каждого файла в блоках с маркерами
|
||
- фильтрация мусорных директорий (node_modules, dist, build и т.п.)
|
||
- лимит размера на файл, чтобы не раздувать дамп
|
||
- попытка декодирования utf-8 с заменой ошибок
|
||
|
||
Пример:
|
||
python pack_project_dump.py --root . --out project_dump.txt
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import fnmatch
|
||
import hashlib
|
||
import os
|
||
from dataclasses import dataclass
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Iterable, List, Optional, Tuple
|
||
|
||
|
||
DEFAULT_EXCLUDE_DIRS = {
|
||
"node_modules",
|
||
"dist",
|
||
"build",
|
||
".next",
|
||
".cache",
|
||
".turbo",
|
||
".vercel",
|
||
"coverage",
|
||
".git",
|
||
".idea",
|
||
".vscode",
|
||
}
|
||
|
||
DEFAULT_EXCLUDE_FILES = {
|
||
"package-lock.json", # можно оставить, но часто огромный
|
||
"yarn.lock", # можно оставить, но часто огромный
|
||
"pnpm-lock.yaml", # можно оставить, но часто огромный
|
||
}
|
||
|
||
DEFAULT_TEXT_EXTS = {
|
||
".js", ".jsx", ".ts", ".tsx",
|
||
".json", ".md", ".css", ".scss", ".sass", ".less",
|
||
".html", ".yml", ".yaml",
|
||
".env", ".env.example",
|
||
".gitignore", ".editorconfig",
|
||
".txt",
|
||
".mjs", ".cjs", "Dockerfile",
|
||
}
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class FileEntry:
|
||
rel_path: str
|
||
size: int
|
||
sha256: str
|
||
|
||
|
||
def sha256_bytes(data: bytes) -> str:
|
||
h = hashlib.sha256()
|
||
h.update(data)
|
||
return h.hexdigest()
|
||
|
||
|
||
def is_probably_text(path: Path, extra_exts: Optional[set[str]] = None) -> bool:
|
||
ext = path.suffix.lower()
|
||
if extra_exts and ext in extra_exts:
|
||
return True
|
||
if ext in DEFAULT_TEXT_EXTS:
|
||
return True
|
||
# Файлы без расширения, но “текстовые” по имени
|
||
if path.name in {".eslintrc", ".prettierrc"}:
|
||
return True
|
||
return False
|
||
|
||
|
||
def should_exclude_path(
|
||
rel_parts: Tuple[str, ...],
|
||
exclude_dirs: set[str],
|
||
exclude_file_globs: List[str],
|
||
exclude_files: set[str],
|
||
) -> bool:
|
||
# исключаем директории по любому сегменту пути
|
||
if any(part in exclude_dirs for part in rel_parts[:-1]):
|
||
return True
|
||
|
||
name = rel_parts[-1] if rel_parts else ""
|
||
if name in exclude_files:
|
||
return True
|
||
|
||
rel_str = "/".join(rel_parts)
|
||
for pat in exclude_file_globs:
|
||
if fnmatch.fnmatch(rel_str, pat) or fnmatch.fnmatch(name, pat):
|
||
return True
|
||
|
||
return False
|
||
|
||
|
||
def iter_project_files(
|
||
root: Path,
|
||
exclude_dirs: set[str],
|
||
exclude_files: set[str],
|
||
exclude_file_globs: List[str],
|
||
) -> Iterable[Path]:
|
||
for dirpath, dirnames, filenames in os.walk(root):
|
||
# фильтруем dirnames на месте, чтобы os.walk не заходил внутрь
|
||
dirnames[:] = [d for d in dirnames if d not in exclude_dirs]
|
||
|
||
for fname in filenames:
|
||
p = Path(dirpath) / fname
|
||
rel = p.relative_to(root)
|
||
rel_parts = tuple(rel.parts)
|
||
if should_exclude_path(rel_parts, exclude_dirs, exclude_file_globs, exclude_files):
|
||
continue
|
||
yield p
|
||
|
||
|
||
def build_tree_listing(paths: List[Path], root: Path) -> str:
|
||
rels = sorted(str(p.relative_to(root)).replace(os.sep, "/") for p in paths)
|
||
lines = ["Дерево файлов:"]
|
||
for r in rels:
|
||
lines.append(f"- {r}")
|
||
return "\n".join(lines) + "\n"
|
||
|
||
|
||
def read_file_bytes(path: Path, max_file_bytes: int) -> Tuple[bytes, bool]:
|
||
data = path.read_bytes()
|
||
if len(data) > max_file_bytes:
|
||
return data[:max_file_bytes], True
|
||
return data, False
|
||
|
||
|
||
def decode_text(data: bytes) -> str:
|
||
# Пытаемся utf-8; если ошибки — заменяем, чтобы не падать
|
||
return data.decode("utf-8", errors="replace")
|
||
|
||
|
||
def pack_dump(
|
||
root: Path,
|
||
out_path: Path,
|
||
include_globs: List[str],
|
||
exclude_dirs: set[str],
|
||
exclude_files: set[str],
|
||
exclude_file_globs: List[str],
|
||
max_file_kb: int,
|
||
only_text: bool,
|
||
) -> None:
|
||
max_file_bytes = max_file_kb * 1024
|
||
|
||
all_files = list(iter_project_files(root, exclude_dirs, exclude_files, exclude_file_globs))
|
||
|
||
# apply include globs if provided
|
||
if include_globs:
|
||
def match_any(rel: str) -> bool:
|
||
return any(fnmatch.fnmatch(rel, g) for g in include_globs)
|
||
|
||
filtered = []
|
||
for p in all_files:
|
||
rel = str(p.relative_to(root)).replace(os.sep, "/")
|
||
if match_any(rel):
|
||
filtered.append(p)
|
||
all_files = filtered
|
||
|
||
entries: List[FileEntry] = []
|
||
blocks: List[str] = []
|
||
|
||
# дерево проекта
|
||
blocks.append(f"Снимок проекта: {root.resolve()}")
|
||
blocks.append(f"Дата (UTC): {datetime.now(timezone.utc).isoformat()}")
|
||
blocks.append("")
|
||
blocks.append(build_tree_listing(all_files, root))
|
||
|
||
for p in sorted(all_files, key=lambda x: str(x)):
|
||
rel = str(p.relative_to(root)).replace(os.sep, "/")
|
||
|
||
if only_text and not is_probably_text(p):
|
||
continue
|
||
|
||
try:
|
||
raw, truncated = read_file_bytes(p, max_file_bytes)
|
||
except Exception as e:
|
||
blocks.append("<<<FILE_BEGIN>>>")
|
||
blocks.append(f"path: {rel}")
|
||
blocks.append("error: не удалось прочитать файл")
|
||
blocks.append(f"exception: {type(e).__name__}: {e}")
|
||
blocks.append("<<<FILE_END>>>")
|
||
blocks.append("")
|
||
continue
|
||
|
||
sha = sha256_bytes(raw)
|
||
size_on_disk = p.stat().st_size
|
||
entries.append(FileEntry(rel_path=rel, size=size_on_disk, sha256=sha))
|
||
|
||
text = decode_text(raw)
|
||
|
||
blocks.append("<<<FILE_BEGIN>>>")
|
||
blocks.append(f"path: {rel}")
|
||
blocks.append(f"size_bytes: {size_on_disk}")
|
||
blocks.append(f"sha256_first_{max_file_kb}kb: {sha}")
|
||
if truncated:
|
||
blocks.append(f"truncated: true (первые {max_file_kb} KB)")
|
||
else:
|
||
blocks.append("truncated: false")
|
||
blocks.append("<<<CONTENT>>>")
|
||
blocks.append(text)
|
||
blocks.append("<<<FILE_END>>>")
|
||
blocks.append("")
|
||
|
||
# краткий индекс
|
||
blocks.insert(
|
||
0,
|
||
"Индекс файлов (путь | размер | sha256 первых N KB):\n"
|
||
+ "\n".join(f"- {e.rel_path} | {e.size} | {e.sha256}" for e in entries)
|
||
+ "\n"
|
||
)
|
||
|
||
out_path.write_text("\n".join(blocks), encoding="utf-8")
|
||
|
||
|
||
def parse_args() -> argparse.Namespace:
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("--root", default=".", help="Корень проекта")
|
||
ap.add_argument("--out", default="react_ts_frontend.txt", help="Файл-выход (один)")
|
||
ap.add_argument(
|
||
"--include",
|
||
action="append",
|
||
default=[],
|
||
help="Глоб-паттерн для включения (можно несколько), например: 'src/**' или '**/*.tsx'",
|
||
)
|
||
ap.add_argument(
|
||
"--exclude-file",
|
||
action="append",
|
||
default=[],
|
||
help="Глоб-паттерн для исключения файлов, например: '**/*.min.js'",
|
||
)
|
||
ap.add_argument(
|
||
"--max-file-kb",
|
||
type=int,
|
||
default=512,
|
||
help="Максимальный объём на один файл (KB). Остальное отрежется.",
|
||
)
|
||
ap.add_argument(
|
||
"--only-text",
|
||
action="store_true",
|
||
help="Включать только вероятно текстовые файлы по расширению/имени",
|
||
)
|
||
return ap.parse_args()
|
||
|
||
|
||
def main() -> None:
|
||
args = parse_args()
|
||
root = Path(args.root).resolve()
|
||
out_path = Path(args.out).resolve()
|
||
|
||
pack_dump(
|
||
root=root,
|
||
out_path=out_path,
|
||
include_globs=args.include,
|
||
exclude_dirs=set(DEFAULT_EXCLUDE_DIRS),
|
||
exclude_files=set(DEFAULT_EXCLUDE_FILES),
|
||
exclude_file_globs=args.exclude_file,
|
||
max_file_kb=args.max_file_kb,
|
||
only_text=args.only_text,
|
||
)
|
||
|
||
print(f"Готово: {out_path}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |