-
Notifications
You must be signed in to change notification settings - Fork 0
/
digitsaccuracy.py
52 lines (46 loc) · 1.62 KB
/
digitsaccuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
""""coding: UTF-8"""
from PIL import Image
from tesserocr import PyTessBaseAPI
import pickle
# load data
test_file = "path to a pickle downloaded from google drive"
inputs = pickle.load(open(test_file)) # all the inputs, with labels.
# initialize variables
index = 0
correct = 0
accs = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
samples = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # in case of different number of samples for each digit
classifications = [] # prepare array for storing the incorrect predictions for each digit (string type)
for lk in range(0, 10):
classifications.append("")
# Feed the images to tesseract, one by one.
with PyTessBaseAPI(psm = 10) as api:
api.SetVariable("tessedit_char_whitelist", "0987654321") # only allow these characters
for l in inputs:
t = Image.fromarray(l[0])
api.SetImage(t)
text = api.GetUTF8Text().encode("utf-8")
text = text.strip("\n")
real = str(l[1]) # string type for comparison
a = int(l[1])
samples[a] += 1 # count number of samples of each type
if text in real:
correct += 1
accs[a] += 1
else:
classifications[a] += text
index += 1
# printing results
print correct
print index
accuracy = 100.0 * correct / index
print "Accuracy in %: " + str(accuracy)
print "Individual number correct: "
print accs # the individual accuracies
print "Individual number of samples: "
print samples
# print incorrect classifications if wanted:
for p in range(0, len(classifications)):
print "The incorrect classifications for " + str(p) + " were: "
a = classifications[p]
print ''.join(sorted(a))