Files
rmser/ocr-service/main.py

134 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import os
from typing import List
from fastapi import FastAPI, File, UploadFile, HTTPException
from pydantic import BaseModel
import cv2
import numpy as np
# Импортируем модули
from imgproc import preprocess_image
from parser import parse_receipt_text, ParsedItem
from ocr import ocr_engine
from qr_manager import detect_and_decode_qr, fetch_data_from_api
# Импортируем новый модуль
from yandex_ocr import yandex_engine
from llm_parser import llm_parser
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = FastAPI(title="RMSER OCR Service (Hybrid: QR + Yandex + Tesseract)")
class RecognitionResult(BaseModel):
source: str # 'qr_api', 'yandex_vision', 'tesseract_ocr'
items: List[ParsedItem]
raw_text: str = ""
@app.get("/health")
def health_check():
return {"status": "ok"}
@app.post("/recognize", response_model=RecognitionResult)
async def recognize_receipt(image: UploadFile = File(...)):
"""
Стратегия:
1. QR Code + FNS API (Приоритет 1 - Идеальная точность)
2. Yandex Vision OCR (Приоритет 2 - Высокая точность, если настроен)
3. Tesseract OCR (Приоритет 3 - Локальный фолбэк)
"""
logger.info(f"Received file: {image.filename}, content_type: {image.content_type}")
if not image.content_type.startswith("image/"):
raise HTTPException(status_code=400, detail="File must be an image")
try:
# Читаем сырые байты
content = await image.read()
# Конвертируем в numpy для QR и локального препроцессинга
nparr = np.frombuffer(content, np.uint8)
original_cv_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if original_cv_image is None:
raise HTTPException(status_code=400, detail="Invalid image data")
# --- ЭТАП 1: QR Code Strategy ---
logger.info("--- Stage 1: QR Code Detection ---")
qr_raw = detect_and_decode_qr(original_cv_image)
if qr_raw:
logger.info("QR found! Fetching data from API...")
api_items = fetch_data_from_api(qr_raw)
if api_items:
logger.info(f"Success: Retrieved {len(api_items)} items via QR API.")
return RecognitionResult(
source="qr_api",
items=api_items,
raw_text=f"QR Content: {qr_raw}"
)
else:
logger.warning("QR found but API failed. Falling back to OCR.")
else:
logger.info("QR code not found. Proceeding to OCR.")
# --- ЭТАП 2: Yandex Vision Strategy (Cloud OCR) ---
# Проверяем, настроен ли Яндекс
if yandex_engine.oauth_token and yandex_engine.folder_id:
logger.info("--- Stage 2: Yandex Vision OCR ---")
# Яндекс принимает сырые байты картинки (Base64), ему не нужен наш препроцессинг
yandex_text = yandex_engine.recognize(content)
if yandex_text and len(yandex_text) > 10:
logger.info(f"Yandex OCR success. Text length: {len(yandex_text)}")
logger.info(f"Yandex RAW OUTPUT:\n{yandex_text}")
yandex_items = parse_receipt_text(yandex_text)
logger.info(f"Parsed items preview: {yandex_items[:3]}...")
# Если Regex не нашел позиций (как в нашем случае со счетом)
if not yandex_items:
logger.info("Regex found nothing. Calling YandexGPT for semantic parsing...")
iam_token = yandex_engine._get_iam_token()
yandex_items = llm_parser.parse_with_llm(yandex_text, iam_token)
logger.info(f"Semantic parsed items preview: {yandex_items[:3]}...")
return RecognitionResult(
source="yandex_vision",
items=yandex_items,
raw_text=yandex_text
)
else:
logger.warning("Yandex Vision returned empty text or failed. Falling back to Tesseract.")
else:
logger.info("Yandex Vision credentials not set. Skipping Stage 2.")
# --- ЭТАП 3: Tesseract Strategy (Local Fallback) ---
logger.info("--- Stage 3: Tesseract OCR (Local) ---")
# 1. Image Processing (бинаризация, выравнивание)
processed_img = preprocess_image(content)
# 2. OCR
tesseract_text = ocr_engine.recognize(processed_img)
# 3. Parsing
ocr_items = parse_receipt_text(tesseract_text)
return RecognitionResult(
source="tesseract_ocr",
items=ocr_items,
raw_text=tesseract_text
)
except Exception as e:
logger.error(f"Error processing request: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=5000)