Advertisement
CHU2

ocr chuchu

Feb 11th, 2025
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.13 KB | Source Code | 0 0
  1. from google.oauth2.service_account import Credentials
  2. from googleapiclient.discovery import build
  3. import gspread
  4. import mimetypes
  5. import datetime
  6. from googleapiclient.http import MediaFileUpload
  7. from flask import Flask, request, jsonify, send_from_directory
  8. import os
  9. import re
  10. import cv2
  11. import numpy as np
  12. import pytesseract
  13. import requests
  14.  
  15. # Flask App
  16. app = Flask(_name_)
  17. UPLOAD_FOLDER = "captured_images"
  18. os.makedirs(UPLOAD_FOLDER, exist_ok=True)
  19.  
  20. # Google API Setup
  21. scopes = [
  22.     "https://www.googleapis.com/auth/spreadsheets",
  23.     "https://www.googleapis.com/auth/drive"
  24. ]
  25. creds = Credentials.from_service_account_file("credentials.json", scopes=scopes)
  26. client = gspread.authorize(creds)
  27. drive_service = build("drive", "v3", credentials=creds)
  28.  
  29. # Google Sheets & Drive Info
  30. sheet_id = "1KcrG1me5UIWw203CqH1q5YWhv3C3LYUIvHsXx8fhbUQ"
  31. sheet = client.open_by_key(sheet_id)
  32. parent_folder_id = "1HMwzXbjakVOM-oVusTzyr1V8RVNEcD29"
  33.  
  34. # ESP32 & Blynk Config
  35. ESP32_IP = "http://192.168.254.111"
  36. BLYNK_AUTH = "b189l6OU64UNP8s1R9JgfIJLwqZocuMr"
  37.  
  38. # Tesseract OCR Config
  39. pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
  40.  
  41. # Public URL
  42. PUBLIC_URL = "https://205b-216-247-24-239.ngrok-free.app"
  43. ANNOTATED_IMAGE_FILENAME = "latest_vehicle_annotated.jpg"
  44.  
  45. def upload_image(file_path, folder_id):
  46.     """Uploads an image to Google Drive and returns the file link."""
  47.     file_name = file_path.split("/")[-1]
  48.     mime_type = mimetypes.guess_type(file_path)[0] or "application/octet-stream"
  49.    
  50.     file_metadata = {
  51.         "name": file_name,
  52.         "parents": [folder_id]
  53.     }
  54.     media = MediaFileUpload(file_path, mimetype=mime_type)
  55.     uploaded_file = drive_service.files().create(body=file_metadata, media_body=media, fields="id").execute()
  56.    
  57.     file_id = uploaded_file.get("id")
  58.     drive_service.permissions().create(
  59.         fileId=file_id,
  60.         body={"role": "reader", "type": "anyone"}
  61.     ).execute()
  62.    
  63.     return f"https://drive.google.com/uc?id={file_id}"
  64.  
  65. def preprocess_image(image_path):
  66.     """Preprocesses the image for better OCR results."""
  67.     image = cv2.imread(image_path)
  68.     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  69.     blurred = cv2.GaussianBlur(gray, (5, 5), 0)
  70.     thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
  71.    
  72.     contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  73.     if contours:
  74.         max_contour = max(contours, key=cv2.contourArea)
  75.         x, y, w, h = cv2.boundingRect(max_contour)
  76.         cropped = gray[y:y+h, x:x+w]
  77.     else:
  78.         cropped = gray
  79.    
  80.     return cv2.threshold(cropped, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
  81.  
  82. def extract_text(image_path):
  83.     """Extracts text from the image using OCR."""
  84.     processed_image = preprocess_image(image_path)
  85.     raw_text = pytesseract.image_to_string(processed_image, lang="eng", config="--psm 6").strip()
  86.     return filter_extracted_text(raw_text)
  87.  
  88. def filter_extracted_text(text):
  89.     """Filters extracted text to remove unwanted characters."""
  90.     words = re.findall(r'\b[a-zA-Z0-9]{3,} [a-zA-Z0-9]{3,}\b', text)
  91.     return " ".join(words)
  92.  
  93. def overlay_text(image_path, text):
  94.     """Overlays extracted text on the image."""
  95.     img = cv2.imread(image_path)
  96.     if img is None:
  97.         return None
  98.    
  99.     font = cv2.FONT_HERSHEY_DUPLEX
  100.     font_scale = 4
  101.     font_thickness = 2
  102.     text_color = (0, 255, 0)
  103.     bg_color = (0, 0, 0)
  104.    
  105.     (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, font_thickness)
  106.     x = img.shape[1] - text_width - 20
  107.     y = img.shape[0] - 20
  108.    
  109.     cv2.rectangle(img, (x - 5, y - text_height - 5), (x + text_width + 5, y + baseline + 5), bg_color, -1)
  110.     cv2.putText(img, text, (x, y), font, font_scale, text_color, font_thickness)
  111.    
  112.     annotated_image_path = os.path.join(UPLOAD_FOLDER, ANNOTATED_IMAGE_FILENAME)
  113.     cv2.imwrite(annotated_image_path, img)
  114.     return annotated_image_path
  115.  
  116. @app.route("/capture", methods=["POST"])
  117. def capture():
  118.     """Handles image capture, OCR, and data logging."""
  119.     try:
  120.         image_data = request.data
  121.         if not image_data:
  122.             return jsonify({"status": "error", "message": "No image received"}), 400
  123.        
  124.         timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  125.         image_filename = f"vehicle_{timestamp.replace(':', '-')}.jpg"
  126.         image_path = os.path.join(UPLOAD_FOLDER, image_filename)
  127.        
  128.         with open(image_path, "wb") as image_file:
  129.             image_file.write(image_data)
  130.        
  131.         detected_text = extract_text(image_path)
  132.        
  133.         print(f"🔎 Extracted Text: {detected_text}")
  134.        
  135.         # Send extracted text to ESP32
  136.         response = requests.get(f"{ESP32_IP}/receive_text", params={"text": detected_text})
  137.         print(f"📡 ESP32 Response: {response.text}")
  138.        
  139.         annotated_image_path = overlay_text(image_path, detected_text)
  140.         if not annotated_image_path:
  141.             return jsonify({"status": "error", "message": "Failed to process image"}), 500
  142.        
  143.         image_drive_link = upload_image(annotated_image_path, parent_folder_id)
  144.        
  145.         annotated_image_url = f"{PUBLIC_URL}/uploads/{ANNOTATED_IMAGE_FILENAME}"
  146.        
  147.         # Save Data to Google Sheets
  148.         worksheet = sheet.worksheet("Sheet1")
  149.         worksheet.append_rows([[timestamp, detected_text, image_drive_link]])
  150.  
  151.         return jsonify({
  152.             "status": "success",
  153.             "timestamp": timestamp,
  154.             "extracted_text": detected_text,
  155.             "image_drive_link": image_drive_link,
  156.             "annotated_image_url": annotated_image_url
  157.         }), 200
  158.     except Exception as e:
  159.         return jsonify({"status": "error", "message": str(e)}), 500
  160.  
  161. @app.route("/uploads/latest_vehicle_annotated.jpg")
  162. def get_annotated_image():
  163.     """Serves the latest annotated image."""
  164.     return send_from_directory(UPLOAD_FOLDER, ANNOTATED_IMAGE_FILENAME)
  165.  
  166. if _name_ == "_main_":
  167.     app.run(host="0.0.0.0", port=5000, debug=True)
  168. www.googleapis.com
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement