rmser/ocr-service/main.py

import logging
import os
from typing import List

from fastapi import FastAPI, File, UploadFile, HTTPException
from pydantic import BaseModel
import cv2
import numpy as np

# Импортируем модули
from imgproc import preprocess_image
from parser import parse_receipt_text, ParsedItem
from ocr import ocr_engine
from qr_manager import detect_and_decode_qr, fetch_data_from_api
# Импортируем новый модуль
from yandex_ocr import yandex_engine
from llm_parser import llm_parser

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

app = FastAPI(title="RMSER OCR Service (Hybrid: QR + Yandex + Tesseract)")

class RecognitionResult(BaseModel):
    source: str # 'qr_api', 'yandex_vision', 'tesseract_ocr'
    items: List[ParsedItem]
    raw_text: str = ""

@app.get("/health")
def health_check():
    return {"status": "ok"}

@app.post("/recognize", response_model=RecognitionResult)
async def recognize_receipt(image: UploadFile = File(...)):
    """
    Стратегия:
    1. QR Code + FNS API (Приоритет 1 - Идеальная точность)
    2. Yandex Vision OCR (Приоритет 2 - Высокая точность, если настроен)
    3. Tesseract OCR (Приоритет 3 - Локальный фолбэк)
    """
    logger.info(f"Received file: {image.filename}, content_type: {image.content_type}")

    if not image.content_type.startswith("image/"):
        raise HTTPException(status_code=400, detail="File must be an image")

    try:
        # Читаем сырые байты
        content = await image.read()

        # Конвертируем в numpy для QR и локального препроцессинга
        nparr = np.frombuffer(content, np.uint8)
        original_cv_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

        if original_cv_image is None:
             raise HTTPException(status_code=400, detail="Invalid image data")

        # --- ЭТАП 1: QR Code Strategy ---
        logger.info("--- Stage 1: QR Code Detection ---")
        qr_raw = detect_and_decode_qr(original_cv_image)

        if qr_raw:
            logger.info("QR found! Fetching data from API...")
            api_items = fetch_data_from_api(qr_raw)

            if api_items:
                logger.info(f"Success: Retrieved {len(api_items)} items via QR API.")
                return RecognitionResult(
                    source="qr_api",
                    items=api_items,
                    raw_text=f"QR Content: {qr_raw}"
                )
            else:
                logger.warning("QR found but API failed. Falling back to OCR.")
        else:
            logger.info("QR code not found. Proceeding to OCR.")

        # --- ЭТАП 2: Yandex Vision Strategy (Cloud OCR) ---
        # Проверяем, настроен ли Яндекс
        if yandex_engine.oauth_token and yandex_engine.folder_id:
            logger.info("--- Stage 2: Yandex Vision OCR ---")

            # Яндекс принимает сырые байты картинки (Base64), ему не нужен наш препроцессинг
            yandex_text = yandex_engine.recognize(content)

            if yandex_text and len(yandex_text) > 10:
                logger.info(f"Yandex OCR success. Text length: {len(yandex_text)}")
                logger.info(f"Yandex RAW OUTPUT:\n{yandex_text}")
                yandex_items = parse_receipt_text(yandex_text)
                logger.info(f"Parsed items preview: {yandex_items[:3]}...")
                # Если Regex не нашел позиций (как в нашем случае со счетом)
                if not yandex_items:
                    logger.info("Regex found nothing. Calling YandexGPT for semantic parsing...")
                    iam_token = yandex_engine._get_iam_token()
                    yandex_items = llm_parser.parse_with_llm(yandex_text, iam_token)
                    logger.info(f"Semantic parsed items preview: {yandex_items[:3]}...")

                return RecognitionResult(
                    source="yandex_vision",
                    items=yandex_items,
                    raw_text=yandex_text
                )
            else:
                logger.warning("Yandex Vision returned empty text or failed. Falling back to Tesseract.")
        else:
            logger.info("Yandex Vision credentials not set. Skipping Stage 2.")

        # --- ЭТАП 3: Tesseract Strategy (Local Fallback) ---
        logger.info("--- Stage 3: Tesseract OCR (Local) ---")

        # 1. Image Processing (бинаризация, выравнивание)
        processed_img = preprocess_image(content)

        # 2. OCR
        tesseract_text = ocr_engine.recognize(processed_img)

        # 3. Parsing
        ocr_items = parse_receipt_text(tesseract_text)

        return RecognitionResult(
            source="tesseract_ocr",
            items=ocr_items,
            raw_text=tesseract_text
        )

    except Exception as e:
        logger.error(f"Error processing request: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=5000)