-
Notifications
You must be signed in to change notification settings - Fork 1
/
deepSpeechTranscriptNative.js
160 lines (121 loc) · 3.49 KB
/
deepSpeechTranscriptNative.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/**
*
*
* @See DeepSpeech API documentation
* https://deepspeech.readthedocs.io/en/v0.9.3/NodeJS-API.html
*
* @See DeepSpeech NodeJs binding code
* https://github.com/mozilla/DeepSpeech/blob/master/native_client/javascript/index.ts
*
* @See DeepSpeech NodeJs examples
* https://github.com/mozilla/DeepSpeech-examples#javascript
* https://github.com/mozilla/DeepSpeech-examples/blob/r0.9/nodejs_wav/index.js
*/
const fs = require('fs').promises
const path = require('path')
const DeepSpeech = require('deepspeech')
/**
* deepSpeechInitialize
*
* Initialize the model
* with specified pbmm and scorer files
*
* @param {String} modelPath
* @param {String} scorerPath
*
* @return {Object} DeepSpeech Model
*
* TODO
* - add metadata object as parameter
*
*/
function deepSpeechInitialize(modelPath, scorerPath) {
const model = new DeepSpeech.Model(modelPath)
model.enableExternalScorer(scorerPath)
return model
}
/**
* deepSpeechFreeModel
*
* Frees associated resources and destroys model object.
*
* @param {DeepSpeechMemoryModelObject} model
*
*/
function deepSpeechFreeModel(model) {
DeepSpeech.FreeModel(model)
}
/**
* deepSpeechTranscript
*
* return the speech to text (transcript)
* of the audio contained in the specified filename
*
* The function is async to avoid the caller thread is blocked
* - during audio file reading
* - but especially during the DeepSpeech engine processing.
*
* @param {String} audioFile
* @param {DeepSpeechMemoryModelObject} model
*
* @return {Promise<String>} text transcript
*/
async function deepSpeechTranscript(audioFile, model) {
let audioBuffer
// read the Wav file in memory
try {
audioBuffer = await fs.readFile(audioFile)
}
catch (error) {
throw `readFile error: ${error}`
}
// WARNING:
// no audioBuffer validation is done.
// The audio fle must be a WAV audio in raw format.
try {
const transcript = model.stt(audioBuffer)
return transcript
}
catch (error) {
throw `model.stt error: ${error}`
}
}
async function main() {
const modelPath = process.argv[2] || './models/deepspeech-0.9.3-models.pbmm'
const scorerPath = process.argv[3] || './models/deepspeech-0.9.3-models.scorer'
const audioFile = process.argv[4] || './audio/4507-16021-0012.wav'
const scriptName = path.basename(__filename, '.js')
console.log(`\nusage: node ${scriptName} [<model pbmm file>] [<model scorer file>] [<audio file>]`)
console.log(`using: node ${scriptName} ${modelPath} ${scorerPath} ${audioFile}\n`)
let start, end
start = new Date()
//
// Initialize DeepSpeech model
//
const model = deepSpeechInitialize(modelPath, scorerPath)
end = new Date() - start
console.log()
console.log(`pbmm : ${modelPath}`)
console.log(`scorer : ${scorerPath}`)
console.log(`elapsed : ${end}ms\n`)
start = new Date()
//
// transcript an audio file
//
const result = await deepSpeechTranscript(audioFile, model)
end = new Date() - start
console.log(`audio file : ${audioFile}`)
console.log(`transcript : ${result}`)
console.log(`elapsed : ${end}ms\n`)
start = new Date()
deepSpeechFreeModel(model)
end = new Date() - start
console.log(`free model elapsed : ${end}ms\n`)
}
if (require.main === module)
main()
module.exports = {
deepSpeechInitialize,
deepSpeechTranscript,
deepSpeechFreeModel
}