‹ projects

vmc

a voice model creator for CMU Sphinx
Log | Files | Refs | README | LICENSE

commit b49a6bc32f432d7e7c9bf1db7fc0a87a7a887c1b
parent c4dc4d886efbf7ed0bf735178edb7151a672c6ae
Author: umhau <umhau@users.noreply.github.com>
Date:   Tue,  1 Nov 2016 19:08:35 -0400

file import now works without errors
Diffstat:
Aacousticfiles.sh | 37+++++++++++++++++++++++++++++++++++++
AbuildLM.sh | 43+++++++++++++++++++++++++++++++++++++++++++
Aformat_text.py | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agetaudio.py | 129+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mvmc.sh | 23+++++++++++++++++++----
Avoicemodel.sh | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 501 insertions(+), 4 deletions(-)

diff --git a/acousticfiles.sh b/acousticfiles.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# DESCRIPTION +# +# Produce acoustic feature files from user-supplied voice recordings. These are stored with +# the associated audio files, and named similarly with an .mfc extension. +# +# USAGE +# +# bash acousticfiles.sh /audio/folder/path /path/to/model-name.fileids +# +# EXAMPLE +# +# bash /opt/vmc/functions/acousticfiles.sh ~/audio ~/audio/newmodel.fileids +# +# DEPENDENCIES +# +# CMU Sphinx +# + +# VARIABLES DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +folderpath=${1%/} + +fid_filepath=$2 # filename format: model-name.fileids + + +# FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# generate some acoustic feature files +echo "Generating acoustic feature files..." +cd $folderpath # sphinx_fe likes to have a consistent working directory +sphinx_fe -argfile /opt/vmc/tools/en-us/feat.params -samprate 16000 -c $fid_filepath -di . -do . -ei wav -eo mfc -mswav yes &> /dev/null + + + + diff --git a/buildLM.sh b/buildLM.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# DESCRIPTION +# +# Produce binary language model from plain sentence list. Invokes CMU-created perl script +# located in /opt/vmc/tools. Saves file in given directory. +# +# USAGE +# +# bash buildLM.sh sentence-list model-name save-directory +# +# DEPENDENCIES +# +# CMU Sphinx +# +# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +sentence_list_path=$1 + +model_name=$2 + +save_directory=$3 + +tools_dir=/opt/vmc/tools + +# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# run perl script to create language model +perl $tools_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null + +sentence_list=`basename $sentence_list_path` + +sentence_list_dir=`dirname $sentence_list_path` + +# rename output +src=$sentence_list_path.arpabo +dst=$sentence_list_dir/$model_name.lm +mv $src $dst + +# convert lm to binary (bin) format (command was too complex for python to handle) +filename=$sentence_list_dir/$model_name.lm +sphinx_lm_convert -i $dst -o $dst.bin &> /dev/null + diff --git a/format_text.py b/format_text.py @@ -0,0 +1,174 @@ +#!/usr/bin/python3 +# +# DESCRIPTION +# +# Creates a number of text files dependent on a sentence file which are required for building +# a CMU Sphinx voice model. Also uses the extended PocketSphinx pronunciation dictionary. +# +# Note that the target directory is the directory where the files should be saved into. This +# should be similar to the directory the initial command was given from within. +# +# USAGE +# +# python3 format_text.py /path/to/sentence-file.txt model-name target-directory +# +# EXAMPLE +# +# python3 /opt/vmc/functions/format_text.py ~/sentence-file.txt model-name target-directory +# +# DEPENDENCIES +# +# python3 +# +# IMPORTS ========================================================================================= + +import pathlib, re, sys, os, string + +# VARIABLE DEFINITIONS ============================================================================ + +sentence_file = sys.argv[1] # os.path.basename() to get just the filename + +model_name = sys.argv[2] + +target_directory = sys.argv[3].rstrip(os.sep) + +pronunciation_dictionary = '/opt/vmc/tools/cmudict-en-us.dict' + +# FUNCTION DEFINITION ============================================================================= + +# Print iterations progress +def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100): + """ + Call in a loop to create terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + barLength - Optional : character length of bar (Int) + """ + formatStr = "{0:." + str(decimals) + "f}" + percents = formatStr.format(100 * (iteration / float(total))) + filledLength = int(round(barLength * iteration / float(total))) + bar = '█' * filledLength + '-' * (barLength - filledLength) + sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), + if iteration == total: + sys.stdout.write('\n') + sys.stdout.flush() + +# LOGIC =========================================================================================== + +sentences_text = "" + +j=0 + +with open(sentence_file) as f: + + for line in f: + + sentences_text = sentences_text+' '+line + + j+=1 + afno = str('%04d'%j) + + # get rid of punctuation + exclude = set(string.punctuation) + sentence = ''.join(ch for ch in line if ch not in exclude) + + nice_text = sentence.lower().rstrip() + formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n" + # formatted_text = nice_text+"\n" + formatted_filename = target_directory + "/" +model_name + '.transcription' + hs = open(formatted_filename,"a") + hs.write(formatted_text) + hs.close() + + #fileid + formatted_text = model_name + "_" + afno + "\n" + formatted_filename = target_directory + "/" +model_name + '.fileids' + hs = open(formatted_filename,"a") + hs.write(formatted_text) + hs.close() + + sentences_text = sentences_text+' '+line + +# def sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary): + +# create unique, sorted word list from sentence list +words = [] +print("Creating unique, sorted word list...") +[words.append(word.strip(string.punctuation).upper()) for word in sentences_text.split()] +# set() uniques the list, sorted() puts them a-z. +uwords = sorted(list(set(words))) + +# save word list to file +print("Saving word list to file...") +uwordsfilename = str(target_directory+'/'+model_name+'.vocab') # correct extension +uwordsfile = open(uwordsfilename, 'w') +for word in uwords: + uwordsfile.write("%s\n" % word) + +# create pronunciation dictionary from word list +cmudict = [] +print("Opening pronunciation dictionary...") +with open(pronunciation_dictionary) as f: + for line in f: + cmudict.append(line) + +pdict = [] +missing_words = [] +l = len(uwords) +i = 0 +print("Extracting entries corresponding to word list...") +printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20) + +curr_line = 0 + +for word in uwords: + + wordmatch=False # a counter to help with efficiency + for line in cmudict[curr_line:]: + + regex_string = str('^(?P<text>'+str(word.lower()) + '(\(\d\))?)( |\t)(?P<phones>.+)$') + + if re.match(regex_string, line): + # print("match!") + ms = re.search(regex_string, line) + pdict.append(str(ms.group('text')+' '+ms.group('phones'))) + wordmatch=True + + # if I already made a match and I'm not now, time to break. this allows for finding + # alternate pronunciations + elif wordmatch: + # curr_line +=1 + break + + # curr_line +=1 + + # check for words the pronunciation dictionary doesn't have & save + if not wordmatch: + missing_words.append(word) + + i +=1 + printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20) + +# save missing words list to file +if missing_words: + missing_words_filename = str(target_directory+'/'+model_name+'.missing') + print("\nWord(s) missing from pronunciation dictionary. See ") + print(missing_words_filename+" for list.") + mwordsfile = open(missing_words_filename, 'w') + for word in missing_words: + mwordsfile.write("%s\n" % word) + +# save pronunciation dictionary to file +print("Saving pronunciation dictionary to file...") +pdictfilename = str(target_directory+'/'+model_name+'.dic') +pdictfile = open(pdictfilename, 'w') +for word_entry in pdict: + pdictfile.write("%s\n" % word_entry) + +# final instructions +print("Data files created.") + diff --git a/getaudio.py b/getaudio.py @@ -0,0 +1,129 @@ +#!/usr/bin/python3 +# +# DESCRIPTION +# +# getaudio is used to sequentially prompt the user for dictations of displayed sentences. +# +# DEPENDENCIES +# +# python3-pyaudio, python3 +# +# USAGE +# +# python3 getaudio.py sentence-file /output/folder recording-repetitions model-name +# +# LIBRARY IMPORTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +import sys, os, _thread, pyaudio, wave, contextlib + +# VARIABLE DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +pronunciation_dictionary="cmudict-en-us.dict" + +chunk = 1024 +FORMAT = pyaudio.paInt16 +CHANNELS = 1 +RATE = 16000 + +sentence_file = sys.argv[1] + +output_folder = sys.argv[2].rstrip(os.sep) + +reps = int(sys.argv[3]) + +model_name = sys.argv[4] + + +# FUNCTION DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# ignore sdterr messages: as from pyaudio +@contextlib.contextmanager +def ignore_stderr(): + devnull = os.open(os.devnull, os.O_WRONLY) + old_stderr = os.dup(2) + sys.stderr.flush() + os.dup2(devnull, 2) + os.close(devnull) + try: + yield + finally: + os.dup2(old_stderr, 2) + os.close(old_stderr) + +def record_until_keypress(audio_filepath): + + # detect keypress [enter] + def input_thread(L): + input() + L.append(None) + + # initialize audio stream - and keep it quiet + with ignore_stderr(): + p = pyaudio.PyAudio() + stream = p.open(format = FORMAT, + channels = CHANNELS, + rate = RATE, + input = True, + frames_per_buffer = chunk) + + # create interrupt thread + L = [] + _thread.start_new_thread(input_thread, (L,)) + + # record data during loop + frames = [] + while True: + data = stream.read(chunk) + frames.append(data) + if L: + stream.stop_stream() + break + + # exit cleanly after break + stream.close() + p.terminate() + + # write data to WAVE file + data = b''.join(frames) + wf = wave.open(audio_filepath, 'wb') + wf.setnchannels(CHANNELS) + wf.setsampwidth(p.get_sample_size(FORMAT)) + wf.setframerate(RATE) + wf.writeframes(data) + wf.close() + + +# LOGIC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +if not os.path.exists(output_folder): + os.makedirs(output_folder) + +# create list of sentences for prompt +sentence_list = [] +with open(sentence_file) as f: + for line in f: + sentence_list.append(line) + +num_recs = len(sentence_list)*reps + +# collect audio files +try: + + input("Press [enter], read text, & press [enter].") + + j=0 + + for sentence in sentence_list*reps: + #recording number + j+=1 + + # record audio with visual + print("Recording no. %04d of %04d: \n\n\t%s" % (j, num_recs, sentence), end='\r') + + # recording file should look like this (e.g.): ./bespoke_training_data/audio/arctic_0001.wav + record_until_keypress(str(output_folder + os.sep + model_name + "_%04d.wav" % j)) + +except KeyboardInterrupt: + pass + + diff --git a/vmc.sh b/vmc.sh @@ -68,18 +68,21 @@ fdir=/opt/vmc/functions # OBTAIN REQUISITE FILES -------------------------------------------------------------------------- -# get audio files -if [ $1 = '-record' ]; then +echo +echo "Collecting required files..." + +# get audio files and put them where they go +if [[ $2 = '-record' ]]; then mkdir -p $audio_folder python3 $fdir/getaudio.py $sentence_file $audio_folder $iterations $model_name -elif [ $1 = '-import' ]; then +elif [[ $2 = '-import' ]]; then mkdir -p $audio_folder - cp -r $audio_file_directory $audio_folder + cp -a $audio_file_directory/*.wav $audio_folder/ fi @@ -88,14 +91,23 @@ cp -r $tdir/en-us $output_folder # PRODUCE DERIVATIVE FILES ------------------------------------------------------------------------ +echo +echo "Producing sentence file derivatives..." + # get derivatives of sentence file python3 $fdir/format_text.py $sentence_file $model_name $output_folder +echo +echo "Producing audio file derivatives..." + # get derivatives of audio files bash $fdir/acousticfiles.sh $audio_folder $output_folder/$model_name.fileids # CREATE MODELS ----------------------------------------------------------------------------------- +echo +echo "Creating models..." + # build language model bash $fdir/buildLM.sh $sentence_file $model_name $output_folder @@ -103,3 +115,5 @@ bash $fdir/buildLM.sh $sentence_file $model_name $output_folder bash $fdir/voicemodel.sh $model_name $output_folder $audio_folder $output_folder +echo +echo "Process complete." +\ No newline at end of file diff --git a/voicemodel.sh b/voicemodel.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# +# DESCRIPTION +# +# Given acoustic feature files and sentence file derivatives, produce voice model. +# +# USAGE +# +# bash voicemodel.sh model-name model-dir acoustic-files-dir sentence-file-derivatives-dir +# +# EXAMPLE +# +# bash voicemodel.sh new_model ~/tools/new_model ~/tools/new_model/audio ~/tools/new_model +# +# DEPENDENCIES +# +# CMU Sphinx +# +# NOTES +# +# This script is primarily using a copy of en-us that is being actively edited as it is +# adapted to become a custom voice model. +# +# Binaries are located in /opt/vmc/tools. +# +# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +model_name=$1 +model_dir=$2 # location of adapted voice model files: copy of en-us, audio files, etc. +af_dir=$3 # directory containing audio files and audio feature files +sf_dir=$4 # directory containing sentence file derivatives + +tools_dir=/opt/vmc/tools + +pronunciation_dictionary=$tools_dir/cmudict-en-us.dict + +# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# convert binary mdef file to .txt +cd $model_dir +pocketsphinx_mdef_convert -text $model_dir/en-us/mdef $model_dir/en-us/mdef.txt &> /dev/null + +# run tools to create voice model +cd $af_dir + +# sphinx_fe +sphinx_fe \ + -argfile $model_dir/en-us/feat.params \ + -samprate 16000 \ + -c $sf_dir/$model_name.fileids \ + -di . \ + -do . \ + -ei wav \ + -eo mfc \ + -mswav yes \ + &> /dev/null + +$tools_dir/bw \ + -hmmdir $model_dir/en-us \ + -moddeffn $model_dir/en-us/mdef.txt \ + -ts2cbfn .ptm. \ + -feat 1s_c_d_dd \ + -svspec 0-12/13-25/26-38 \ + -cmn current \ + -agc none \ + -dictfn $pronunciation_dictionary \ + -ctlfn $sf_dir/$model_name.fileids \ + -lsnfn $sf_dir/$model_name.transcription \ + -accumdir . \ + &> /dev/null + +$tools_dir/mllr_solve \ + -meanfn $model_dir/en-us/means \ + -varfn $model_dir/en-us/variances \ + -outmllrfn mllr_matrix \ + -accumdir . \ + &> /dev/null + +$tools_dir/map_adapt \ + -moddeffn $model_dir/en-us/mdef.txt \ + -ts2cbfn .ptm. \ + -meanfn $model_dir/en-us/means \ + -varfn $model_dir/en-us/variances \ + -mixwfn $model_dir/en-us/mixture_weights \ + -tmatfn $model_dir/en-us/transition_matrices \ + -accumdir . \ + -mapmeanfn $model_dir/en-us/means \ + -mapvarfn $model_dir/en-us/variances \ + -mapmixwfn $model_dir/en-us/mixture_weights \ + -maptmatfn $model_dir/en-us/transition_matrices\ + &> /dev/null + +$tools_dir/mk_s2sendump \ + -pocketsphinx yes \ + -moddeffn $model_dir/en-us/mdef.txt \ + -mixwfn $model_dir/en-us/mixture_weights \ + -sendumpfn $model_dir/en-us/sendump \ + &> /dev/null +\ No newline at end of file