Files
rmser/ocr-service/imgproc.py
SERTY 91923b8616 .venv deleted
ocr ready to test
2025-11-29 12:29:08 +03:00

97 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import numpy as np
import logging
logger = logging.getLogger(__name__)
def order_points(pts):
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
rect = order_points(pts)
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
def preprocess_image(image_bytes: bytes) -> np.ndarray:
"""
Возвращает БИНАРНОЕ (Ч/Б) изображение для Tesseract.
"""
nparr = np.frombuffer(image_bytes, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if image is None:
raise ValueError("Could not decode image")
# Ресайз для поиска контуров
ratio = image.shape[0] / 500.0
orig = image.copy()
image_small = cv2.resize(image, (int(image.shape[1] / ratio), 500))
gray = cv2.cvtColor(image_small, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
cnts, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
screenCnt = None
found = False
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
screenCnt = approx
found = True
break
# Изображение, с которым будем работать дальше
target_img = None
if found:
logger.info("Receipt contour found (Tesseract mode).")
target_img = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
else:
logger.warning("Receipt contour NOT found. Using full image.")
target_img = orig
# --- Подготовка для Tesseract (Бинаризация) ---
# Переводим в Gray
gray_final = cv2.cvtColor(target_img, cv2.COLOR_BGR2GRAY)
# Адаптивный порог (превращаем в чисто черное и белое)
# block_size=11, C=2 - классические параметры для текста
thresh = cv2.adaptiveThreshold(
gray_final, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
# Немного убираем шум
# thresh = cv2.medianBlur(thresh, 3)
return thresh