Advertisement
xosski

Nvidia LLM

Mar 26th, 2025
12
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.49 KB | None | 0 0
  1. from fastapi import FastAPI, File, UploadFile
  2. import shutil, os
  3. from ocr_processor import ocr_from_pdf
  4. from llama_cpp import Llama
  5.  
  6. app = FastAPI()
  7. MODEL_PATH = "./models/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf"
  8. llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_gpu_layers=41, verbose=True)
  9.  
  10. @app.post("/upload/")
  11. async def upload_pdf(file: UploadFile = File(...)):
  12. temp_path = f"/tmp/{file.filename}"
  13. with open(temp_path, "wb") as f:
  14. shutil.copyfileobj(file.file, f)
  15.  
  16. ocr_text = ocr_from_pdf(temp_path)
  17. os.remove(temp_path)
  18.  
  19. result = llm(ocr_text)
  20. return {"output": result["choices"][0]["text"].strip()}
  21. from fastapi import FastAPI, File, UploadFile
  22. import shutil, os
  23. from ocr_processor import ocr_from_pdf
  24. from llama_cpp import Llama
  25.  
  26. app = FastAPI()
  27. MODEL_PATH = "./models/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf"
  28. llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_gpu_layers=41, verbose=True)
  29.  
  30. @app.post("/upload/")
  31. async def upload_pdf(file: UploadFile = File(...)):
  32. temp_path = f"/tmp/{file.filename}"
  33. with open(temp_path, "wb") as f:
  34. shutil.copyfileobj(file.file, f)
  35.  
  36. ocr_text = ocr_from_pdf(temp_path)
  37. os.remove(temp_path)
  38.  
  39. result = llm(ocr_text)
  40. return {"output": result["choices"][0]["text"].strip()}
  41. FROM nvidia/cuda:12.3.1-devel-ubuntu22.04
  42. SHELL ["/bin/bash", "-c"]
  43.  
  44. RUN apt-get update && apt-get install -y \
  45. python3-dev python3-pip \
  46. curl \
  47. build-essential \
  48. software-properties-common \
  49. tesseract-ocr \
  50. poppler-utils \
  51. libglib2.0-0 \
  52. libsm6 \
  53. libxext6 \
  54. libxrender-dev \
  55. && rm -rf /var/lib/apt/lists/*
  56.  
  57. RUN apt-get update && apt-get install -y ninja-build git
  58.  
  59. ENV CUDACXX=/usr/local/cuda-12/bin/nvcc
  60. ENV CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all-major"
  61.  
  62. WORKDIR /app
  63. COPY . /app
  64.  
  65. RUN pip install --no-cache-dir --upgrade pip && \
  66. pip install --no-cache-dir -r requirements.txt
  67. services:
  68. llm-ocr:
  69. build: .
  70. container_name: llm_ocr_gpu
  71. volumes:
  72. - ./models:/app/models
  73. deploy:
  74. resources:
  75. reservations:
  76. devices:
  77. - driver: nvidia
  78. device_ids: ['0']
  79. capabilities: [gpu]
  80. ports:
  81. - "8000:8000"
  82. stdin_open: true
  83. tty: true
  84. command: ["uvicorn", "api_server:app", "--host", "0.0.0.0", "--port", "8000"]
  85. from llama_cpp import Llama
  86.  
  87. print('Loading model...')
  88. MODEL_PATH = "./models/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf"
  89. llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_gpu_layers=41, verbose=True)
  90. print('Model loaded.')
  91. from fastapi import FastAPI, File, UploadFile, Header, HTTPException
  92. import shutil, os
  93. from ocr_processor import ocr_from_pdf
  94. from llama_cpp import Llama
  95.  
  96. app = FastAPI()
  97. MODEL_PATH = "./models/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf"
  98. llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_gpu_layers=41, verbose=True)
  99.  
  100. API_TOKEN = "supersecret" # Change this to a secure token
  101.  
  102. def authorize(authorization: str):
  103. if not authorization or not authorization.startswith("Bearer "):
  104. raise HTTPException(status_code=401, detail="Missing or invalid authorization header")
  105. token = authorization.split(" ")[1]
  106. if token != API_TOKEN:
  107. raise HTTPException(status_code=403, detail="Invalid token")
  108.  
  109. @app.post("/upload/")
  110. async def upload_pdf(file: UploadFile = File(...), authorization: str = Header(None)):
  111. authorize(authorization)
  112.  
  113. temp_path = f"/tmp/{file.filename}"
  114. with open(temp_path, "wb") as f:
  115. shutil.copyfileobj(file.file, f)
  116.  
  117. ocr_text = ocr_from_pdf(temp_path)
  118. os.remove(temp_path)
  119.  
  120. result = llm(ocr_text)
  121. return {"output": result["choices"][0]["text"].strip()}
  122. from fastapi import Request, HTTPException
  123. from starlette.middleware.base import BaseHTTPMiddleware
  124.  
  125. WHITELIST = {"127.0.0.1", "192.168.1.100"} # Set your safe IPs
  126. BLACKLIST = {"10.0.0.5", "172.16.0.8"} # Block these IPs
  127.  
  128. class IPFilterMiddleware(BaseHTTPMiddleware):
  129. async def dispatch(self, request: Request, call_next):
  130. client_ip = request.client.host
  131. if client_ip in BLACKLIST:
  132. raise HTTPException(status_code=403, detail="IP blocked")
  133. if WHITELIST and client_ip not in WHITELIST:
  134. raise HTTPException(status_code=403, detail="IP not in whitelist")
  135. return await call_next(request)
  136. from fastapi import FastAPI, Depends, File, UploadFile, HTTPException, Security
  137. from fastapi.security import OAuth2PasswordBearer, SecurityScopes
  138. from starlette.status import HTTP_403_FORBIDDEN
  139. import shutil, os
  140. from ocr_processor import ocr_from_pdf
  141. from llama_cpp import Llama
  142. from ip_filter import IPFilterMiddleware
  143.  
  144. app = FastAPI()
  145. app.add_middleware(IPFilterMiddleware)
  146.  
  147. oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token", scopes={"read": "Read OCR", "write": "Run OCR + LLM"})
  148.  
  149. FAKE_TOKENS = {
  150. "admin-token": {"scopes": ["read", "write"]},
  151. "read-only-token": {"scopes": ["read"]}
  152. }
  153.  
  154. MODEL_PATH = "./models/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf"
  155. llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_gpu_layers=41, verbose=True)
  156.  
  157. def get_current_user(security_scopes: SecurityScopes, token: str = Depends(oauth2_scheme)):
  158. if token not in FAKE_TOKENS:
  159. raise HTTPException(status_code=401, detail="Invalid token")
  160. token_scopes = FAKE_TOKENS[token]["scopes"]
  161. for scope in security_scopes.scopes:
  162. if scope not in token_scopes:
  163. raise HTTPException(status_code=403, detail="Not enough permissions")
  164. return token
  165.  
  166. @app.post("/upload/")
  167. async def upload_pdf(file: UploadFile = File(...), token: str = Security(get_current_user, scopes=["write"])):
  168. temp_path = f"/tmp/{file.filename}"
  169. with open(temp_path, "wb") as f:
  170. shutil.copyfileobj(file.file, f)
  171.  
  172. ocr_text = ocr_from_pdf(temp_path)
  173. os.remove(temp_path)
  174.  
  175. result = llm(ocr_text)
  176. return {"output": result["choices"][0]["text"].strip()}
  177.  
  178. /////////
  179. For server_api.py testing
  180. from fastapi.security import OAuth2PasswordRequestForm
  181.  
  182. @app.post("/token")
  183. async def login(form_data: OAuth2PasswordRequestForm = Depends()):
  184. if form_data.username == "admin" and form_data.password == "secret":
  185. return {"access_token": "admin-token", "token_type": "bearer"}
  186. elif form_data.username == "reader" and form_data.password == "read":
  187. return {"access_token": "read-only-token", "token_type": "bearer"}
  188. else:
  189. raise HTTPException(status_code=400, detail="Invalid credentials")
  190.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement