forked from FIGLAB/ubicoustics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_liveprediction_simple.py
127 lines (114 loc) · 3.65 KB
/
example_liveprediction_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from keras.models import load_model
import tensorflow as tf
import numpy as np
from vggish_input import waveform_to_examples
import ubicoustics
import pyaudio
from pathlib import Path
import time
import argparse
import wget
# Variables
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = RATE
MICROPHONES_DESCRIPTION = []
FPS = 60.0
###########################
# Checl Microphone
###########################
print("=====")
print("1 / 2: Checking Microphones... ")
print("=====")
import microphones
desc, mics, indices = microphones.list_microphones()
if (len(mics) == 0):
print("Error: No microphone found.")
exit()
#############
# Read Command Line Args
#############
MICROPHONE_INDEX = indices[0]
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--mic", help="Select which microphone / input device to use")
args = parser.parse_args()
try:
if args.mic:
MICROPHONE_INDEX = int(args.mic)
print("User selected mic: %d" % MICROPHONE_INDEX)
else:
mic_in = input("Select microphone [%d]: " % MICROPHONE_INDEX).strip()
if (mic_in!=''):
MICROPHONE_INDEX = int(mic_in)
except:
print("Invalid microphone")
exit()
# Find description that matches the mic index
mic_desc = ""
for k in range(len(indices)):
i = indices[k]
if (i==MICROPHONE_INDEX):
mic_desc = mics[k]
print("Using mic: %s" % mic_desc)
###########################
# Download model, if it doesn't exist
###########################
MODEL_URL = "https://www.dropbox.com/s/cq1d7uqg0l28211/example_model.hdf5?dl=1"
MODEL_PATH = "models/example_model.hdf5"
print("=====")
print("2 / 2: Checking model... ")
print("=====")
model_filename = "models/example_model.hdf5"
ubicoustics_model = Path(model_filename)
if (not ubicoustics_model.is_file()):
print("Downloading example_model.hdf5 [867MB]: ")
wget.download(MODEL_URL,MODEL_PATH)
##############################
# Load Deep Learning Model
##############################
print("Using deep learning model: %s" % (model_filename))
model = load_model(model_filename)
graph = tf.get_default_graph()
context = ubicoustics.everything
label = dict()
for k in range(len(context)):
label[k] = context[k]
##############################
# Setup Audio Callback
##############################
def audio_samples(in_data, frame_count, time_info, status_flags):
global graph
np_wav = np.fromstring(in_data, dtype=np.int16) / 32768.0 # Convert to [-1.0, +1.0]
x = waveform_to_examples(np_wav, RATE)
predictions = []
with graph.as_default():
if x.shape[0] != 0:
x = x.reshape(len(x), 96, 64, 1)
pred = model.predict(x)
predictions.append(pred)
for prediction in predictions:
m = np.argmax(prediction[0])
if (m < len(label)):
p = label[m]
print("Prediction: %s (%0.2f)" % (ubicoustics.to_human_labels[label[m]], prediction[0,m]))
n_items = prediction.shape[1]
else:
print("KeyError: %s" % m)
return (in_data, pyaudio.paContinue)
##############################
# Launch Application
##############################
while(1):
##############################
# Setup Audio
##############################
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=audio_samples, input_device_index=MICROPHONE_INDEX)
##############################
# Start Non-Blocking Stream
##############################
print("# Live Prediction Using Microphone: %s" % (mic_desc))
stream.start_stream()
while stream.is_active():
time.sleep(0.1)