Text Detection and OCR

# Define list to store the vocabulary, the recognizable characters.
vocabulary =[]

# Open file to import the vocabulary.
with open("./resources/alphabet_94.txt") as f:
    # Read the file line by line, and append each into the vocabulary list.
    for l in f:
        vocabulary.append(l.strip())
    f.close()
print("Vocabulary:", vocabulary)
print("Vocabulary size: ", len(vocabulary))

# DB model for text-detection based on resnet50.
textDetector = cv2.dnn_TextDetectionModel_DB("./resources/DB_TD500_resnet50.onnx")

inputSize = (640, 640)

# Set threshold for Binary Map creation and polygon detection.
binThresh = 0.3
polyThresh = 0.5

mean = (122.67891434, 116.66876762, 104.00698793)

textDetector.setBinaryThreshold(binThresh).setPolygonThreshold(polyThresh)
textDetector.setInputParams(1.0/255, inputSize, mean, True)

# CRNN model for text-recognition.
textRecognizer = cv2.dnn_TextRecognitionModel("./resources/crnn_cs.onnx")
textRecognizer.setDecodeType("CTC-greedy")
textRecognizer.setVocabulary(vocabulary)
textRecognizer.setInputParams(1/127.5, (100,32), (127.5, 127.5, 127.5), True)

# Use the DB text detector initialized previously to detect the presence of text in the image.
boxes, confs = textDetector.detect(image)

# Draw the bounding boxes of text detected.
cv2.polylines(image, boxes, True, (255, 0, 255), 4)
# Display the image with the bounding boxes drawn
plt.figure(figsize=(10, 8))
plt.imshow(image[:, :, ::-1]); plt.title('Bounding boxes');

def fourPointsTransform(frame, vertices):
    """Extracts and transforms roi of frame defined by vertices into a rectangle."""
    # Get vertices of each bounding box 
    vertices = np.asarray(vertices).astype(np.float32)
    outputSize = (100, 32)
    targetVertices = np.array([
        [0, outputSize[1] - 1],
        [0, 0],
        [outputSize[0] - 1, 0],
        [outputSize[0] - 1, outputSize[1] - 1]], dtype="float32")
    # Apply perspective transform
    rotationMatrix = cv2.getPerspectiveTransform(vertices, targetVertices)
    result = cv2.warpPerspective(frame, rotationMatrix, outputSize)
    return result

# Display the transformed output of the first detected text box.
warped_detection = fourPointsTransform(image, boxes[0])
plt.figure(figsize=(10, 8))
plt.imshow(warped_detection[:, :, ::-1]); plt.title('Transformed detected text');

References