import os import requests import json import mimetypes # Папка с фото/excel INPUT_DIR = "./test_receipts" # Папка для результатов OUTPUT_DIR = "./json_results" # Адрес сервиса API_URL = "http://10.25.100.250:5006/recognize" def test_parsing(): if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) if not os.path.exists(INPUT_DIR): print(f"Папка {INPUT_DIR} не найдена.") return files = [f for f in os.listdir(INPUT_DIR) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.xlsx'))] print(f"Найдено {len(files)} файлов. Тестируем парсинг...") for filename in files: file_path = os.path.join(INPUT_DIR, filename) # Определение MIME if filename.lower().endswith('.xlsx'): mime_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' else: mime_type, _ = mimetypes.guess_type(file_path) mime_type = mime_type or 'image/jpeg' print(f"Processing {filename} ({mime_type})...", end=" ") try: with open(file_path, 'rb') as f: files = {'image': (filename, f, mime_type)} # Тайм-аут побольше, так как Excel + LLM может быть долгим response = requests.post(API_URL, files=files, timeout=60) if response.status_code == 200: data = response.json() items = data.get("items", []) source = data.get("source", "unknown") doc_number = data.get("doc_number", "") # Сохраняем JSON out_name = f"{filename}_RESULT.json" with open(os.path.join(OUTPUT_DIR, out_name), "w", encoding="utf-8") as out: json.dump(data, out, ensure_ascii=False, indent=2) print(f"OK ({source}) -> Found {len(items)} items. Doc#: {doc_number}") else: print(f"FAIL: {response.status_code} - {response.text}") except Exception as e: print(f"ERROR: {e}") if __name__ == "__main__": test_parsing()