Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pytesseract
- import cv2
- pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
- import numpy as np
- img = cv2.imread('test.png')
- # get grayscale image
- def get_grayscale(image):
- return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- # noise removal
- def remove_noise(image):
- return cv2.medianBlur(image, 5)
- # thresholding
- def thresholding(image):
- return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY)[1]
- # dilation
- def dilate(image):
- kernel = np.ones((5, 5), np.uint8)
- return cv2.dilate(image, kernel, iterations=1)
- # erosion
- def erode(image):
- kernel = np.ones((5, 5), np.uint8)
- return cv2.erode(image, kernel, iterations=1)
- # opening - erosion followed by dilation
- def opening(image):
- kernel = np.ones((5, 5), np.uint8)
- return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
- # canny edge detection
- def canny(image):
- return cv2.Canny(image, 100, 200)
- img = get_grayscale(img)
- #img = remove_noise(img)
- #img = thresholding(img)
- #img = dilate(img)
- #img = erode(img)
- #img = opening(img)
- #img = canny(img)
- # selection and output of the found words on the picture
- custom_config = r'-c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz-\ QWERTYUIOPASDFGHJKLZXCVBNM --psm 12'
- text = pytesseract.image_to_string(img, config=custom_config)
- words = text.split()
- with open('WordsFromWindowsScreen.txt', 'w') as f:
- for word in words:
- if (len(word) > 2): f.write(word + " ")
- data = pytesseract.image_to_data(img, config=custom_config)
- for i, item in enumerate(data.splitlines()):
- if i == 0:
- continue
- item = item.split()
- try:
- x, y, w, h = int(item[6]), int(item[7]), int(item[8]), int(item[9])
- cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1)
- cv2.putText(img, item[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 0), 1)
- except IndexError:
- print("missed")
- # output of the result
- cv2.imshow("Output", img)
- cv2.waitKey(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement