import os
import requests
import json
import mimetypes

# Папка, куда вы положите фото чеков для теста
INPUT_DIR = "./test_receipts"
# Папка, куда сохраним сырой текст
OUTPUT_DIR = "./raw_outputs"
# Адрес запущенного OCR сервиса
API_URL = "http://10.25.100.250:5006/recognize"

def process_images():
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    if not os.path.exists(INPUT_DIR):
        os.makedirs(INPUT_DIR)
        print(f"Папка {INPUT_DIR} создана. Положите туда фото чеков и перезапустите скрипт.")
        return

    files = [f for f in os.listdir(INPUT_DIR) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.xlsx'))]
    
    if not files:
        print(f"В папке {INPUT_DIR} нет изображений.")
        return

    print(f"Найдено {len(files)} файлов. Начинаю обработку...")

    for filename in files:
        file_path = os.path.join(INPUT_DIR, filename)
        
        # Явное определение mime_type для Excel файлов
        if filename.lower().endswith('.xlsx'):
            mime_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        else:
            mime_type, _ = mimetypes.guess_type(file_path)
            mime_type = mime_type or 'image/jpeg'
        
        print(f"Processing {filename}...", end=" ")
        
        try:
            with open(file_path, 'rb') as f:
                files = {'image': (filename, f, mime_type or 'image/jpeg')}
                response = requests.post(API_URL, files=files, timeout=30)
            
            if response.status_code == 200:
                data = response.json()
                raw_text = data.get("raw_text", "")
                source = data.get("source", "unknown")
                
                # Сохраняем RAW текст
                out_name = f"{filename}_RAW.txt"
                with open(os.path.join(OUTPUT_DIR, out_name), "w", encoding="utf-8") as out:
                    out.write(f"Source: {source}\n")
                    out.write("="*20 + "\n")
                    out.write(raw_text)
                
                print(f"OK ({source}) -> {out_name}")
            else:
                print(f"FAIL: {response.status_code} - {response.text}")
                
        except Exception as e:
            print(f"ERROR: {e}")

if __name__ == "__main__":
    process_images()