import os import requests import logging import json import uuid import time from typing import List, Optional from parser import ParsedItem logger = logging.getLogger(__name__) YANDEX_GPT_URL = "https://llm.api.cloud.yandex.net/foundationModels/v1/completion" GIGACHAT_OAUTH_URL = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth" GIGACHAT_COMPLETION_URL = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions" class YandexGPTParser: def __init__(self): self.folder_id = os.getenv("YANDEX_FOLDER_ID") self.api_key = os.getenv("YANDEX_OAUTH_TOKEN") def parse(self, raw_text: str, iam_token: str) -> List[ParsedItem]: if not iam_token: return [] prompt = { "modelUri": f"gpt://{self.folder_id}/yandexgpt/latest", "completionOptions": {"stream": False, "temperature": 0.1, "maxTokens": "2000"}, "messages": [ { "role": "system", "text": ( "Ты — помощник по бухгалтерии. Извлеки список товаров из текста документа. " "Верни ответ строго в формате JSON: " '[{"raw_name": string, "amount": float, "price": float, "sum": float}]. ' "Если количество не указано, считай 1.0. Не пиши ничего, кроме JSON." ) }, {"role": "user", "text": raw_text} ] } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {iam_token}", "x-folder-id": self.folder_id } try: response = requests.post(YANDEX_GPT_URL, headers=headers, json=prompt, timeout=30) response.raise_for_status() content = response.json()['result']['alternatives'][0]['message']['text'] clean_json = content.replace("```json", "").replace("```", "").strip() return [ParsedItem(**item) for item in json.loads(clean_json)] except Exception as e: logger.error(f"YandexGPT Parsing error: {e}") return [] class GigaChatParser: def __init__(self): self.auth_key = os.getenv("GIGACHAT_AUTH_KEY") self._access_token = None self._expires_at = 0 def _get_token(self) -> Optional[str]: if self._access_token and time.time() < self._expires_at: return self._access_token logger.info("Obtaining GigaChat access token...") headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json', 'RqUID': str(uuid.uuid4()), 'Authorization': f'Basic {self.auth_key}' } payload = {'scope': 'GIGACHAT_API_PERS'} try: # verify=False может понадобиться, если сертификаты Минцифры не в системном хранилище, # но вы указали, что установите их в контейнер. response = requests.post(GIGACHAT_OAUTH_URL, headers=headers, data=payload, timeout=10) response.raise_for_status() data = response.json() self._access_token = data['access_token'] self._expires_at = data['expires_at'] / 1000 # Переводим мс в сек return self._access_token except Exception as e: logger.error(f"GigaChat Auth error: {e}") return None def parse(self, raw_text: str) -> List[ParsedItem]: token = self._get_token() if not token: return [] headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'Authorization': f'Bearer {token}' } payload = { "model": "GigaChat", "messages": [ { "role": "system", "content": ( "Ты — эксперт по распознаванию чеков. Извлеки товары из текста. " "Верни ТОЛЬКО JSON массив объектов с полями: raw_name (строка), " "amount (число), price (число), sum (число). " "Если данных нет, верни []. Никаких пояснений." ) }, {"role": "user", "content": raw_text} ], "temperature": 0.1 } try: response = requests.post(GIGACHAT_COMPLETION_URL, headers=headers, json=payload, timeout=30) response.raise_for_status() content = response.json()['choices'][0]['message']['content'] clean_json = content.replace("```json", "").replace("```", "").strip() return [ParsedItem(**item) for item in json.loads(clean_json)] except Exception as e: logger.error(f"GigaChat Parsing error: {e}") return [] class LLMManager: def __init__(self): self.yandex = YandexGPTParser() self.giga = GigaChatParser() self.engine = os.getenv("LLM_ENGINE", "yandex").lower() def parse_with_priority(self, raw_text: str, yandex_iam_token: Optional[str] = None) -> List[ParsedItem]: if self.engine == "gigachat": logger.info("Using GigaChat as primary LLM") items = self.giga.parse(raw_text) if not items and yandex_iam_token: logger.info("GigaChat failed, falling back to YandexGPT") items = self.yandex.parse(raw_text, yandex_iam_token) return items else: logger.info("Using YandexGPT as primary LLM") items = self.yandex.parse(raw_text, yandex_iam_token) if yandex_iam_token else [] if not items: logger.info("YandexGPT failed, falling back to GigaChat") items = self.giga.parse(raw_text) return items llm_parser = LLMManager()