Добавил черновики накладных и OCR через Яндекс. LLM для расшифровки универсальный

2026-02-04 19:02:33 -06:00 · 2025-12-17 03:38:24 +03:00
parent fda30276a5
commit e2df2350f7
32 changed files with 1785 additions and 214 deletions
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from typing import List

 from fastapi import FastAPI, File, UploadFile, HTTPException
@@ -10,8 +11,10 @@ import numpy as np
 from imgproc import preprocess_image
 from parser import parse_receipt_text, ParsedItem
 from ocr import ocr_engine
-# Импортируем новый модуль
 from qr_manager import detect_and_decode_qr, fetch_data_from_api
+# Импортируем новый модуль
+from yandex_ocr import yandex_engine
+from llm_parser import llm_parser

 logging.basicConfig(
    level=logging.INFO,
@@ -19,10 +22,10 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)

-app = FastAPI(title="RMSER OCR Service (Hybrid: QR + OCR)")
+app = FastAPI(title="RMSER OCR Service (Hybrid: QR + Yandex + Tesseract)")

 class RecognitionResult(BaseModel):
-    source: str # 'qr_api' или 'ocr'
+    source: str # 'qr_api', 'yandex_vision', 'tesseract_ocr'
    items: List[ParsedItem]
    raw_text: str = ""

@@ -33,9 +36,10 @@ def health_check():
@app.post("/recognize", response_model=RecognitionResult)
 async def recognize_receipt(image: UploadFile = File(...)):
    """
-    1. Попытка найти QR-код.
-    2. Если QR найден -> запрос к API -> возврат идеальных данных.
-    3. Если QR не найден -> Preprocessing -> OCR -> Regex Parsing.
+    Стратегия:
+    1. QR Code + FNS API (Приоритет 1 - Идеальная точность)
+    2. Yandex Vision OCR (Приоритет 2 - Высокая точность, если настроен)
+    3. Tesseract OCR (Приоритет 3 - Локальный фолбэк)
    """
    logger.info(f"Received file: {image.filename}, content_type: {image.content_type}")

@@ -43,19 +47,18 @@ async def recognize_receipt(image: UploadFile = File(...)):
        raise HTTPException(status_code=400, detail="File must be an image")

    try:
-        # Читаем байты
+        # Читаем сырые байты
        content = await image.read()
        
-        # Конвертируем в numpy для работы (нужен и для QR, и для OCR)
+        # Конвертируем в numpy для QR и локального препроцессинга
        nparr = np.frombuffer(content, np.uint8)
-        # Оригинальное изображение (цветное/серое)
        original_cv_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

        if original_cv_image is None:
             raise HTTPException(status_code=400, detail="Invalid image data")

        # --- ЭТАП 1: QR Code Strategy ---
-        logger.info("Attempting QR code detection...")
+        logger.info("--- Stage 1: QR Code Detection ---")
        qr_raw = detect_and_decode_qr(original_cv_image)
        
        if qr_raw:
@@ -63,34 +66,63 @@ async def recognize_receipt(image: UploadFile = File(...)):
            api_items = fetch_data_from_api(qr_raw)
            
            if api_items:
-                logger.info(f"Successfully retrieved {len(api_items)} items via API.")
+                logger.info(f"Success: Retrieved {len(api_items)} items via QR API.")
                return RecognitionResult(
                    source="qr_api",
                    items=api_items,
                    raw_text=f"QR Content: {qr_raw}"
                )
            else:
-                logger.warning("QR found but API failed to return items. Falling back to OCR.")
+                logger.warning("QR found but API failed. Falling back to OCR.")
        else:
-            logger.info("QR code not found. Falling back to OCR.")
+            logger.info("QR code not found. Proceeding to OCR.")

-        # --- ЭТАП 2: OCR Strategy (Fallback) ---
+        # --- ЭТАП 2: Yandex Vision Strategy (Cloud OCR) ---
+        # Проверяем, настроен ли Яндекс
+        if yandex_engine.oauth_token and yandex_engine.folder_id:
+            logger.info("--- Stage 2: Yandex Vision OCR ---")
+            
+            # Яндекс принимает сырые байты картинки (Base64), ему не нужен наш препроцессинг
+            yandex_text = yandex_engine.recognize(content)
+            
+            if yandex_text and len(yandex_text) > 10:
+                logger.info(f"Yandex OCR success. Text length: {len(yandex_text)}")
+                logger.info(f"Yandex RAW OUTPUT:\n{yandex_text}") 
+                yandex_items = parse_receipt_text(yandex_text)
+                logger.info(f"Parsed items preview: {yandex_items[:3]}...") 
+                # Если Regex не нашел позиций (как в нашем случае со счетом)
+                if not yandex_items:
+                    logger.info("Regex found nothing. Calling YandexGPT for semantic parsing...")
+                    iam_token = yandex_engine._get_iam_token()
+                    yandex_items = llm_parser.parse_with_llm(yandex_text, iam_token)
+                    logger.info(f"Semantic parsed items preview: {yandex_items[:3]}...")
+                
+                return RecognitionResult(
+                    source="yandex_vision",
+                    items=yandex_items,
+                    raw_text=yandex_text
+                )
+            else:
+                logger.warning("Yandex Vision returned empty text or failed. Falling back to Tesseract.")
+        else:
+            logger.info("Yandex Vision credentials not set. Skipping Stage 2.")
+
+        # --- ЭТАП 3: Tesseract Strategy (Local Fallback) ---
+        logger.info("--- Stage 3: Tesseract OCR (Local) ---")
        
-        # 1. Image Processing (получаем бинарное изображение)
-        # Передаем исходные байты, так как функция внутри декодирует их заново
-        # (можно оптимизировать, но оставим совместимость с текущим кодом)
+        # 1. Image Processing (бинаризация, выравнивание)
        processed_img = preprocess_image(content)
        
        # 2. OCR
-        full_text = ocr_engine.recognize(processed_img)
+        tesseract_text = ocr_engine.recognize(processed_img)
        
        # 3. Parsing
-        ocr_items = parse_receipt_text(full_text)
+        ocr_items = parse_receipt_text(tesseract_text)
        
        return RecognitionResult(
-            source="ocr",
+            source="tesseract_ocr",
            items=ocr_items,
-            raw_text=full_text
+            raw_text=tesseract_text
        )

    except Exception as e: