OCR 1.0

from PIL import Image, ImageEnhance
import pytesseract
import cv2
import os

pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
image = 'test.png'

preprocess = "thresh"

#increase the contrast
img1 = Image.open('test.png')
enhancer = ImageEnhance.Contrast(img1)
img1 = enhancer.enhance(2)

#cv2 read picture
image = cv2.imread(image)

#convert to black+white pic
bw_file = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

if preprocess == "blur":
    bw_file = cv2.medianBlur(bw_file, 3)

#delete noises
elif preprocess == "thresh":
    bw_file = cv2.threshold(bw_file, 0, 255,
                            cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
#recognition
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, bw_file)
config = r'--oem 3 --psm 12'
text = pytesseract.image_to_string(bw_file, config=config)
os.remove(filename)

#write text-answer
with open('WordsFromWindowsScreen.txt', 'w') as f:
    f.write(text)

#selection and output of the found words on the picture
data = pytesseract.image_to_data(bw_file, config=config)
for i, item in enumerate(data.splitlines()):
    if i == 0:
        continue
    item = item.split()
    try:
        x, y, w, h = int(item[6]), int(item[7]), int(item[8]), int(item[9])
        cv2.rectangle(bw_file, (x, y), (x + w, y + h), (255, 0, 0), 1)
        cv2.putText(bw_file, item[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 0), 1)
    except IndexError:
        print("missed")

#output of the result
cv2.imshow("Output", bw_file)
cv2.waitKey(0)