jarekmor

Llama OCR

Nov 28th, 2024
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.36 KB | None | 0 0
  1. # export TOGETHER_API_KEY=your_api_key
  2. # pip install -U pip together
  3.  
  4. import base64
  5. import os
  6.  
  7. from together import Together
  8.  
  9. TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
  10.  
  11. client = Together()
  12.  
  13. def encode_image(image_path):
  14.         """Encode image to base64"""
  15.         with open(image_path, "rb") as image_file:
  16.             return base64.b64encode(image_file.read()).decode('utf-8')
  17.  
  18. def convert_image_to_text(image_path):
  19.     """Convert image to text
  20.  
  21.    Args:
  22.        image_path (str): Path to the image to be converted
  23.    """
  24.    
  25.     encoded_image = encode_image(image_path)
  26.  
  27.     response = client.chat.completions.create(
  28.         model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
  29.         messages=[
  30.             {
  31.                 "role": "system",
  32.                 "content": (
  33.                     "Convert the provided image into Markdown format. Ensure that all content from the page is included, "
  34.                     "such as headers, footers, subtexts, images (with alt text if possible), tables, and any other elements.\n\n"
  35.                     "Requirements:\n\n"
  36.                     "- Output Only Markdown: Return solely the Markdown content without any additional explanations or comments.\n"
  37.                     "- No Delimiters: Do not use code fences or delimiters like \\`\\`\\`markdown.\n"
  38.                     "- Complete Content: Do not omit any part of the page, including headers, footers, and subtext."
  39.                 )
  40.             },
  41.             {
  42.                 "role": "user",
  43.                 "content": [
  44.                     {
  45.                         "type": "text",
  46.                         "text": "convert the uploaded image to text"
  47.                     },
  48.                     {
  49.                         "type": "image_url",
  50.                         "image_url": {
  51.                             "url": f"data:image/jpeg;base64,{encoded_image}"
  52.                         }
  53.                     }
  54.                 ]
  55.             }
  56.         ],
  57.  
  58.         temperature=0.0,
  59.         stream=True
  60.     )
  61.     for token in response:
  62.         if hasattr(token, 'choices') and token.choices:
  63.             print(token.choices[0].delta.content, end='', flush=True)
  64.  
  65.     return ""
  66.  
  67. IMAGAES_DIR = "images"
  68.  
  69. for image in os.listdir(IMAGAES_DIR):
  70.     images_path = os.path.join(IMAGAES_DIR, image)
  71.     convert_image_to_text(images_path)
Add Comment
Please, Sign In to add comment