commit b49a6bc32f432d7e7c9bf1db7fc0a87a7a887c1b
parent c4dc4d886efbf7ed0bf735178edb7151a672c6ae
Author: umhau <umhau@users.noreply.github.com>
Date: Tue, 1 Nov 2016 19:08:35 -0400
file import now works without errors
Diffstat:
| A | acousticfiles.sh | | | 37 | +++++++++++++++++++++++++++++++++++++ |
| A | buildLM.sh | | | 43 | +++++++++++++++++++++++++++++++++++++++++++ |
| A | format_text.py | | | 174 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | getaudio.py | | | 129 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | vmc.sh | | | 23 | +++++++++++++++++++---- |
| A | voicemodel.sh | | | 99 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
6 files changed, 501 insertions(+), 4 deletions(-)
diff --git a/acousticfiles.sh b/acousticfiles.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Produce acoustic feature files from user-supplied voice recordings. These are stored with
+# the associated audio files, and named similarly with an .mfc extension.
+#
+# USAGE
+#
+# bash acousticfiles.sh /audio/folder/path /path/to/model-name.fileids
+#
+# EXAMPLE
+#
+# bash /opt/vmc/functions/acousticfiles.sh ~/audio ~/audio/newmodel.fileids
+#
+# DEPENDENCIES
+#
+# CMU Sphinx
+#
+
+# VARIABLES DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+folderpath=${1%/}
+
+fid_filepath=$2 # filename format: model-name.fileids
+
+
+# FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# generate some acoustic feature files
+echo "Generating acoustic feature files..."
+cd $folderpath # sphinx_fe likes to have a consistent working directory
+sphinx_fe -argfile /opt/vmc/tools/en-us/feat.params -samprate 16000 -c $fid_filepath -di . -do . -ei wav -eo mfc -mswav yes &> /dev/null
+
+
+
+
diff --git a/buildLM.sh b/buildLM.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Produce binary language model from plain sentence list. Invokes CMU-created perl script
+# located in /opt/vmc/tools. Saves file in given directory.
+#
+# USAGE
+#
+# bash buildLM.sh sentence-list model-name save-directory
+#
+# DEPENDENCIES
+#
+# CMU Sphinx
+#
+# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sentence_list_path=$1
+
+model_name=$2
+
+save_directory=$3
+
+tools_dir=/opt/vmc/tools
+
+# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# run perl script to create language model
+perl $tools_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
+
+sentence_list=`basename $sentence_list_path`
+
+sentence_list_dir=`dirname $sentence_list_path`
+
+# rename output
+src=$sentence_list_path.arpabo
+dst=$sentence_list_dir/$model_name.lm
+mv $src $dst
+
+# convert lm to binary (bin) format (command was too complex for python to handle)
+filename=$sentence_list_dir/$model_name.lm
+sphinx_lm_convert -i $dst -o $dst.bin &> /dev/null
+
diff --git a/format_text.py b/format_text.py
@@ -0,0 +1,174 @@
+#!/usr/bin/python3
+#
+# DESCRIPTION
+#
+# Creates a number of text files dependent on a sentence file which are required for building
+# a CMU Sphinx voice model. Also uses the extended PocketSphinx pronunciation dictionary.
+#
+# Note that the target directory is the directory where the files should be saved into. This
+# should be similar to the directory the initial command was given from within.
+#
+# USAGE
+#
+# python3 format_text.py /path/to/sentence-file.txt model-name target-directory
+#
+# EXAMPLE
+#
+# python3 /opt/vmc/functions/format_text.py ~/sentence-file.txt model-name target-directory
+#
+# DEPENDENCIES
+#
+# python3
+#
+# IMPORTS =========================================================================================
+
+import pathlib, re, sys, os, string
+
+# VARIABLE DEFINITIONS ============================================================================
+
+sentence_file = sys.argv[1] # os.path.basename() to get just the filename
+
+model_name = sys.argv[2]
+
+target_directory = sys.argv[3].rstrip(os.sep)
+
+pronunciation_dictionary = '/opt/vmc/tools/cmudict-en-us.dict'
+
+# FUNCTION DEFINITION =============================================================================
+
+# Print iterations progress
+def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100):
+ """
+ Call in a loop to create terminal progress bar
+ @params:
+ iteration - Required : current iteration (Int)
+ total - Required : total iterations (Int)
+ prefix - Optional : prefix string (Str)
+ suffix - Optional : suffix string (Str)
+ decimals - Optional : positive number of decimals in percent complete (Int)
+ barLength - Optional : character length of bar (Int)
+ """
+ formatStr = "{0:." + str(decimals) + "f}"
+ percents = formatStr.format(100 * (iteration / float(total)))
+ filledLength = int(round(barLength * iteration / float(total)))
+ bar = '█' * filledLength + '-' * (barLength - filledLength)
+ sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
+ if iteration == total:
+ sys.stdout.write('\n')
+ sys.stdout.flush()
+
+# LOGIC ===========================================================================================
+
+sentences_text = ""
+
+j=0
+
+with open(sentence_file) as f:
+
+ for line in f:
+
+ sentences_text = sentences_text+' '+line
+
+ j+=1
+ afno = str('%04d'%j)
+
+ # get rid of punctuation
+ exclude = set(string.punctuation)
+ sentence = ''.join(ch for ch in line if ch not in exclude)
+
+ nice_text = sentence.lower().rstrip()
+ formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n"
+ # formatted_text = nice_text+"\n"
+ formatted_filename = target_directory + "/" +model_name + '.transcription'
+ hs = open(formatted_filename,"a")
+ hs.write(formatted_text)
+ hs.close()
+
+ #fileid
+ formatted_text = model_name + "_" + afno + "\n"
+ formatted_filename = target_directory + "/" +model_name + '.fileids'
+ hs = open(formatted_filename,"a")
+ hs.write(formatted_text)
+ hs.close()
+
+ sentences_text = sentences_text+' '+line
+
+# def sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary):
+
+# create unique, sorted word list from sentence list
+words = []
+print("Creating unique, sorted word list...")
+[words.append(word.strip(string.punctuation).upper()) for word in sentences_text.split()]
+# set() uniques the list, sorted() puts them a-z.
+uwords = sorted(list(set(words)))
+
+# save word list to file
+print("Saving word list to file...")
+uwordsfilename = str(target_directory+'/'+model_name+'.vocab') # correct extension
+uwordsfile = open(uwordsfilename, 'w')
+for word in uwords:
+ uwordsfile.write("%s\n" % word)
+
+# create pronunciation dictionary from word list
+cmudict = []
+print("Opening pronunciation dictionary...")
+with open(pronunciation_dictionary) as f:
+ for line in f:
+ cmudict.append(line)
+
+pdict = []
+missing_words = []
+l = len(uwords)
+i = 0
+print("Extracting entries corresponding to word list...")
+printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
+
+curr_line = 0
+
+for word in uwords:
+
+ wordmatch=False # a counter to help with efficiency
+ for line in cmudict[curr_line:]:
+
+ regex_string = str('^(?P<text>'+str(word.lower()) + '(\(\d\))?)( |\t)(?P<phones>.+)$')
+
+ if re.match(regex_string, line):
+ # print("match!")
+ ms = re.search(regex_string, line)
+ pdict.append(str(ms.group('text')+' '+ms.group('phones')))
+ wordmatch=True
+
+ # if I already made a match and I'm not now, time to break. this allows for finding
+ # alternate pronunciations
+ elif wordmatch:
+ # curr_line +=1
+ break
+
+ # curr_line +=1
+
+ # check for words the pronunciation dictionary doesn't have & save
+ if not wordmatch:
+ missing_words.append(word)
+
+ i +=1
+ printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
+
+# save missing words list to file
+if missing_words:
+ missing_words_filename = str(target_directory+'/'+model_name+'.missing')
+ print("\nWord(s) missing from pronunciation dictionary. See ")
+ print(missing_words_filename+" for list.")
+ mwordsfile = open(missing_words_filename, 'w')
+ for word in missing_words:
+ mwordsfile.write("%s\n" % word)
+
+# save pronunciation dictionary to file
+print("Saving pronunciation dictionary to file...")
+pdictfilename = str(target_directory+'/'+model_name+'.dic')
+pdictfile = open(pdictfilename, 'w')
+for word_entry in pdict:
+ pdictfile.write("%s\n" % word_entry)
+
+# final instructions
+print("Data files created.")
+
diff --git a/getaudio.py b/getaudio.py
@@ -0,0 +1,129 @@
+#!/usr/bin/python3
+#
+# DESCRIPTION
+#
+# getaudio is used to sequentially prompt the user for dictations of displayed sentences.
+#
+# DEPENDENCIES
+#
+# python3-pyaudio, python3
+#
+# USAGE
+#
+# python3 getaudio.py sentence-file /output/folder recording-repetitions model-name
+#
+# LIBRARY IMPORTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+import sys, os, _thread, pyaudio, wave, contextlib
+
+# VARIABLE DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+pronunciation_dictionary="cmudict-en-us.dict"
+
+chunk = 1024
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 16000
+
+sentence_file = sys.argv[1]
+
+output_folder = sys.argv[2].rstrip(os.sep)
+
+reps = int(sys.argv[3])
+
+model_name = sys.argv[4]
+
+
+# FUNCTION DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# ignore sdterr messages: as from pyaudio
+@contextlib.contextmanager
+def ignore_stderr():
+ devnull = os.open(os.devnull, os.O_WRONLY)
+ old_stderr = os.dup(2)
+ sys.stderr.flush()
+ os.dup2(devnull, 2)
+ os.close(devnull)
+ try:
+ yield
+ finally:
+ os.dup2(old_stderr, 2)
+ os.close(old_stderr)
+
+def record_until_keypress(audio_filepath):
+
+ # detect keypress [enter]
+ def input_thread(L):
+ input()
+ L.append(None)
+
+ # initialize audio stream - and keep it quiet
+ with ignore_stderr():
+ p = pyaudio.PyAudio()
+ stream = p.open(format = FORMAT,
+ channels = CHANNELS,
+ rate = RATE,
+ input = True,
+ frames_per_buffer = chunk)
+
+ # create interrupt thread
+ L = []
+ _thread.start_new_thread(input_thread, (L,))
+
+ # record data during loop
+ frames = []
+ while True:
+ data = stream.read(chunk)
+ frames.append(data)
+ if L:
+ stream.stop_stream()
+ break
+
+ # exit cleanly after break
+ stream.close()
+ p.terminate()
+
+ # write data to WAVE file
+ data = b''.join(frames)
+ wf = wave.open(audio_filepath, 'wb')
+ wf.setnchannels(CHANNELS)
+ wf.setsampwidth(p.get_sample_size(FORMAT))
+ wf.setframerate(RATE)
+ wf.writeframes(data)
+ wf.close()
+
+
+# LOGIC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if not os.path.exists(output_folder):
+ os.makedirs(output_folder)
+
+# create list of sentences for prompt
+sentence_list = []
+with open(sentence_file) as f:
+ for line in f:
+ sentence_list.append(line)
+
+num_recs = len(sentence_list)*reps
+
+# collect audio files
+try:
+
+ input("Press [enter], read text, & press [enter].")
+
+ j=0
+
+ for sentence in sentence_list*reps:
+ #recording number
+ j+=1
+
+ # record audio with visual
+ print("Recording no. %04d of %04d: \n\n\t%s" % (j, num_recs, sentence), end='\r')
+
+ # recording file should look like this (e.g.): ./bespoke_training_data/audio/arctic_0001.wav
+ record_until_keypress(str(output_folder + os.sep + model_name + "_%04d.wav" % j))
+
+except KeyboardInterrupt:
+ pass
+
+
diff --git a/vmc.sh b/vmc.sh
@@ -68,18 +68,21 @@ fdir=/opt/vmc/functions
# OBTAIN REQUISITE FILES --------------------------------------------------------------------------
-# get audio files
-if [ $1 = '-record' ]; then
+echo
+echo "Collecting required files..."
+
+# get audio files and put them where they go
+if [[ $2 = '-record' ]]; then
mkdir -p $audio_folder
python3 $fdir/getaudio.py $sentence_file $audio_folder $iterations $model_name
-elif [ $1 = '-import' ]; then
+elif [[ $2 = '-import' ]]; then
mkdir -p $audio_folder
- cp -r $audio_file_directory $audio_folder
+ cp -a $audio_file_directory/*.wav $audio_folder/
fi
@@ -88,14 +91,23 @@ cp -r $tdir/en-us $output_folder
# PRODUCE DERIVATIVE FILES ------------------------------------------------------------------------
+echo
+echo "Producing sentence file derivatives..."
+
# get derivatives of sentence file
python3 $fdir/format_text.py $sentence_file $model_name $output_folder
+echo
+echo "Producing audio file derivatives..."
+
# get derivatives of audio files
bash $fdir/acousticfiles.sh $audio_folder $output_folder/$model_name.fileids
# CREATE MODELS -----------------------------------------------------------------------------------
+echo
+echo "Creating models..."
+
# build language model
bash $fdir/buildLM.sh $sentence_file $model_name $output_folder
@@ -103,3 +115,5 @@ bash $fdir/buildLM.sh $sentence_file $model_name $output_folder
bash $fdir/voicemodel.sh $model_name $output_folder $audio_folder $output_folder
+echo
+echo "Process complete."
+\ No newline at end of file
diff --git a/voicemodel.sh b/voicemodel.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Given acoustic feature files and sentence file derivatives, produce voice model.
+#
+# USAGE
+#
+# bash voicemodel.sh model-name model-dir acoustic-files-dir sentence-file-derivatives-dir
+#
+# EXAMPLE
+#
+# bash voicemodel.sh new_model ~/tools/new_model ~/tools/new_model/audio ~/tools/new_model
+#
+# DEPENDENCIES
+#
+# CMU Sphinx
+#
+# NOTES
+#
+# This script is primarily using a copy of en-us that is being actively edited as it is
+# adapted to become a custom voice model.
+#
+# Binaries are located in /opt/vmc/tools.
+#
+# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+model_name=$1
+model_dir=$2 # location of adapted voice model files: copy of en-us, audio files, etc.
+af_dir=$3 # directory containing audio files and audio feature files
+sf_dir=$4 # directory containing sentence file derivatives
+
+tools_dir=/opt/vmc/tools
+
+pronunciation_dictionary=$tools_dir/cmudict-en-us.dict
+
+# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# convert binary mdef file to .txt
+cd $model_dir
+pocketsphinx_mdef_convert -text $model_dir/en-us/mdef $model_dir/en-us/mdef.txt &> /dev/null
+
+# run tools to create voice model
+cd $af_dir
+
+# sphinx_fe
+sphinx_fe \
+ -argfile $model_dir/en-us/feat.params \
+ -samprate 16000 \
+ -c $sf_dir/$model_name.fileids \
+ -di . \
+ -do . \
+ -ei wav \
+ -eo mfc \
+ -mswav yes \
+ &> /dev/null
+
+$tools_dir/bw \
+ -hmmdir $model_dir/en-us \
+ -moddeffn $model_dir/en-us/mdef.txt \
+ -ts2cbfn .ptm. \
+ -feat 1s_c_d_dd \
+ -svspec 0-12/13-25/26-38 \
+ -cmn current \
+ -agc none \
+ -dictfn $pronunciation_dictionary \
+ -ctlfn $sf_dir/$model_name.fileids \
+ -lsnfn $sf_dir/$model_name.transcription \
+ -accumdir . \
+ &> /dev/null
+
+$tools_dir/mllr_solve \
+ -meanfn $model_dir/en-us/means \
+ -varfn $model_dir/en-us/variances \
+ -outmllrfn mllr_matrix \
+ -accumdir . \
+ &> /dev/null
+
+$tools_dir/map_adapt \
+ -moddeffn $model_dir/en-us/mdef.txt \
+ -ts2cbfn .ptm. \
+ -meanfn $model_dir/en-us/means \
+ -varfn $model_dir/en-us/variances \
+ -mixwfn $model_dir/en-us/mixture_weights \
+ -tmatfn $model_dir/en-us/transition_matrices \
+ -accumdir . \
+ -mapmeanfn $model_dir/en-us/means \
+ -mapvarfn $model_dir/en-us/variances \
+ -mapmixwfn $model_dir/en-us/mixture_weights \
+ -maptmatfn $model_dir/en-us/transition_matrices\
+ &> /dev/null
+
+$tools_dir/mk_s2sendump \
+ -pocketsphinx yes \
+ -moddeffn $model_dir/en-us/mdef.txt \
+ -mixwfn $model_dir/en-us/mixture_weights \
+ -sendumpfn $model_dir/en-us/sendump \
+ &> /dev/null
+\ No newline at end of file