mirror of
https://github.com/serty2005/rmser.git
synced 2026-02-04 19:02:33 -06:00
добавил гигачада и заворачивание в проверку чека, если данные для QR распознались
This commit is contained in:
@@ -2,32 +2,29 @@ import os
|
||||
import requests
|
||||
import logging
|
||||
import json
|
||||
from typing import List
|
||||
import uuid
|
||||
import time
|
||||
from typing import List, Optional
|
||||
from parser import ParsedItem
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
YANDEX_GPT_URL = "https://llm.api.cloud.yandex.net/foundationModels/v1/completion"
|
||||
GIGACHAT_OAUTH_URL = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"
|
||||
GIGACHAT_COMPLETION_URL = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions"
|
||||
|
||||
class YandexGPTParser:
|
||||
def __init__(self):
|
||||
self.folder_id = os.getenv("YANDEX_FOLDER_ID")
|
||||
self.api_key = os.getenv("YANDEX_OAUTH_TOKEN") # Используем тот же доступ
|
||||
self.api_key = os.getenv("YANDEX_OAUTH_TOKEN")
|
||||
|
||||
def parse_with_llm(self, raw_text: str, iam_token: str) -> List[ParsedItem]:
|
||||
"""
|
||||
Отправляет текст в YandexGPT для структурирования.
|
||||
"""
|
||||
def parse(self, raw_text: str, iam_token: str) -> List[ParsedItem]:
|
||||
if not iam_token:
|
||||
return []
|
||||
|
||||
|
||||
prompt = {
|
||||
"modelUri": f"gpt://{self.folder_id}/yandexgpt/latest",
|
||||
"completionOptions": {
|
||||
"stream": False,
|
||||
"temperature": 0.1, # Низкая температура для точности
|
||||
"maxTokens": "2000"
|
||||
},
|
||||
"completionOptions": {"stream": False, "temperature": 0.1, "maxTokens": "2000"},
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -38,10 +35,7 @@ class YandexGPTParser:
|
||||
"Если количество не указано, считай 1.0. Не пиши ничего, кроме JSON."
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"text": raw_text
|
||||
}
|
||||
{"role": "user", "text": raw_text}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -54,21 +48,103 @@ class YandexGPTParser:
|
||||
try:
|
||||
response = requests.post(YANDEX_GPT_URL, headers=headers, json=prompt, timeout=30)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Извлекаем текст ответа
|
||||
content = result['result']['alternatives'][0]['message']['text']
|
||||
|
||||
# Очищаем от возможных markdown-оберток ```json ... ```
|
||||
content = response.json()['result']['alternatives'][0]['message']['text']
|
||||
clean_json = content.replace("```json", "").replace("```", "").strip()
|
||||
|
||||
items_raw = json.loads(clean_json)
|
||||
|
||||
parsed_items = [ParsedItem(**item) for item in items_raw]
|
||||
return parsed_items
|
||||
|
||||
return [ParsedItem(**item) for item in json.loads(clean_json)]
|
||||
except Exception as e:
|
||||
logger.error(f"LLM Parsing error: {e}")
|
||||
logger.error(f"YandexGPT Parsing error: {e}")
|
||||
return []
|
||||
|
||||
llm_parser = YandexGPTParser()
|
||||
class GigaChatParser:
|
||||
def __init__(self):
|
||||
self.auth_key = os.getenv("GIGACHAT_AUTH_KEY")
|
||||
self._access_token = None
|
||||
self._expires_at = 0
|
||||
|
||||
def _get_token(self) -> Optional[str]:
|
||||
if self._access_token and time.time() < self._expires_at:
|
||||
return self._access_token
|
||||
|
||||
logger.info("Obtaining GigaChat access token...")
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Accept': 'application/json',
|
||||
'RqUID': str(uuid.uuid4()),
|
||||
'Authorization': f'Basic {self.auth_key}'
|
||||
}
|
||||
payload = {'scope': 'GIGACHAT_API_PERS'}
|
||||
|
||||
try:
|
||||
# verify=False может понадобиться, если сертификаты Минцифры не в системном хранилище,
|
||||
# но вы указали, что установите их в контейнер.
|
||||
response = requests.post(GIGACHAT_OAUTH_URL, headers=headers, data=payload, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
self._access_token = data['access_token']
|
||||
self._expires_at = data['expires_at'] / 1000 # Переводим мс в сек
|
||||
return self._access_token
|
||||
except Exception as e:
|
||||
logger.error(f"GigaChat Auth error: {e}")
|
||||
return None
|
||||
|
||||
def parse(self, raw_text: str) -> List[ParsedItem]:
|
||||
token = self._get_token()
|
||||
if not token:
|
||||
return []
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {token}'
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": "GigaChat",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Ты — эксперт по распознаванию чеков. Извлеки товары из текста. "
|
||||
"Верни ТОЛЬКО JSON массив объектов с полями: raw_name (строка), "
|
||||
"amount (число), price (число), sum (число). "
|
||||
"Если данных нет, верни []. Никаких пояснений."
|
||||
)
|
||||
},
|
||||
{"role": "user", "content": raw_text}
|
||||
],
|
||||
"temperature": 0.1
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(GIGACHAT_COMPLETION_URL, headers=headers, json=payload, timeout=30)
|
||||
response.raise_for_status()
|
||||
content = response.json()['choices'][0]['message']['content']
|
||||
clean_json = content.replace("```json", "").replace("```", "").strip()
|
||||
return [ParsedItem(**item) for item in json.loads(clean_json)]
|
||||
except Exception as e:
|
||||
logger.error(f"GigaChat Parsing error: {e}")
|
||||
return []
|
||||
|
||||
class LLMManager:
|
||||
def __init__(self):
|
||||
self.yandex = YandexGPTParser()
|
||||
self.giga = GigaChatParser()
|
||||
self.engine = os.getenv("LLM_ENGINE", "yandex").lower()
|
||||
|
||||
def parse_with_priority(self, raw_text: str, yandex_iam_token: Optional[str] = None) -> List[ParsedItem]:
|
||||
if self.engine == "gigachat":
|
||||
logger.info("Using GigaChat as primary LLM")
|
||||
items = self.giga.parse(raw_text)
|
||||
if not items and yandex_iam_token:
|
||||
logger.info("GigaChat failed, falling back to YandexGPT")
|
||||
items = self.yandex.parse(raw_text, yandex_iam_token)
|
||||
return items
|
||||
else:
|
||||
logger.info("Using YandexGPT as primary LLM")
|
||||
items = self.yandex.parse(raw_text, yandex_iam_token) if yandex_iam_token else []
|
||||
if not items:
|
||||
logger.info("YandexGPT failed, falling back to GigaChat")
|
||||
items = self.giga.parse(raw_text)
|
||||
return items
|
||||
|
||||
llm_parser = LLMManager()
|
||||
Reference in New Issue
Block a user