forked from linus-jansson/btd6farmer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr.py
90 lines (76 loc) · 3.84 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# width, height, top, left
import pytesseract
import numpy as np
import cv2
import sys
import time
if sys.platform == "win32":
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def formatImageOCR(originalScreenshot):
screenshot = np.array(originalScreenshot, dtype=np.uint8)
# Get local maximum:
kernelSize = 5
maxKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernelSize, kernelSize))
localMax = cv2.morphologyEx(screenshot, cv2.MORPH_CLOSE, maxKernel, None, None, 1, cv2.BORDER_REFLECT101)
# Perform gain division
# print(screenshot, localMax)
gainDivision = np.where(localMax == 0, 0, (screenshot / localMax))
# Clip the values to [0,255]
gainDivision = np.clip((255 * gainDivision), 0, 255)
# Convert the mat type from float to uint8:
gainDivision = gainDivision.astype("uint8")
# Convert RGB to grayscale:
grayscaleImage = cv2.cvtColor(gainDivision, cv2.COLOR_BGR2GRAY)
# Resize image to improve the quality
grayscaleImage = cv2.resize(grayscaleImage,(0,0), fx=3.0, fy=3.0)
# Get binary image via Otsu:
_, final_image = cv2.threshold(grayscaleImage, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# cv2.imwrite(f"./DEBUG/OCR_FORMAT_BINARY_IMAGE_{str(time.time())}.png", final_image, [cv2.IMWRITE_PNG_COMPRESSION, 0])
# Set kernel (structuring element) size:
kernelSize = 3
# Set morph operation iterations:
opIterations = 1
# Get the structuring element:
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernelSize, kernelSize))
# Perform closing:
final_image = cv2.morphologyEx( final_image, cv2.MORPH_CLOSE, morphKernel, None, None, opIterations, cv2.BORDER_REFLECT101 )
# cv2.imwrite(f"./DEBUG/OCR_FORMAT_BEFORE_FLOOD_{str(time.time())}.png", final_image, [cv2.IMWRITE_PNG_COMPRESSION, 0])
# Flood fill (white + black):
cv2.floodFill(final_image, mask=None, seedPoint=(int(0), int(0)), newVal=(255))
# Invert image so target blobs are colored in white:
final_image = 255 - final_image
# Find the blobs on the binary image:
contours, hierarchy = cv2.findContours(final_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Process the contours:
for i, c in enumerate(contours):
# Get contour hierarchy:
currentHierarchy = hierarchy[0][i][3]
# Look only for children contours (the holes):
if currentHierarchy != -1:
# Get the contour bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rect:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Get the center of the contour the will act as
# seed point to the Flood-Filling:
fx = rectX + 0.5 * rectWidth
fy = rectY + 0.5 * rectHeight
# Fill the hole:
cv2.floodFill(final_image, mask=None, seedPoint=(int(fx), int(fy)), newVal=(0))
# cv2.imwrite(f"./DEBUG/OCR_FLOOD_{i}_{str(time.time())}.png", final_image, [cv2.IMWRITE_PNG_COMPRESSION, 0])
return final_image
# Change to https://stackoverflow.com/questions/66334737/pytesseract-is-very-slow-for-real-time-ocr-any-way-to-optimise-my-code
# or https://www.reddit.com/r/learnpython/comments/kt5zzw/how_to_speed_up_pytesseract_ocr_processing/
def getTextFromImage(image):
""" returns text from image """
imageCandidate = formatImageOCR(image)
# Write result to disk:
# DEBUG log round to disk
# import time
# cv2.imwrite(f"./DEBUG/{str(time.time())}.png", imageCandidate, [cv2.IMWRITE_PNG_COMPRESSION, 0])
# NOTE: This part seems to be buggy
# Get current round from screenshot with tesseract
return pytesseract.image_to_string(imageCandidate, config='--psm 7').replace("\n", ""), imageCandidate