‹ projects

vmc

a voice model creator for CMU Sphinx
Log | Files | Refs | README | LICENSE

commit 9a92f854db955576e9ffcec46053c4541cfb1799
parent 6cbb226e04617078b44e7e901db5464086570fcc
Author: umhau <umhau@users.noreply.github.com>
Date:   Tue,  1 Nov 2016 19:10:12 -0400

file import now works without errors
Diffstat:
Mfunctions/buildLM.sh | 19++++++++++---------
Mfunctions/format_text.py | 37+++++++++++++++++++++++++++++--------
2 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/functions/buildLM.sh b/functions/buildLM.sh @@ -15,7 +15,7 @@ # # VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -sentence_list=$1 +sentence_list_path=$1 model_name=$2 @@ -26,17 +26,18 @@ tools_dir=/opt/vmc/tools # COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # run perl script to create language model -perl $tools_dir/quick_lm.pl -s $sentence_list &> /dev/null +perl $tools_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null -# quick_lm creates output in its own directory. I can fix that later, after I learn perl. -src=$tools_dir/$sentence_list.arpabo -dst=$tools_dir/$model_name.lm +sentence_list=`basename $sentence_list_path` + +sentence_list_dir=`dirname $sentence_list_path` + +# rename output +src=$sentence_list_path.arpabo +dst=$sentence_list_dir/$model_name.lm mv $src $dst # convert lm to binary (bin) format (command was too complex for python to handle) -filename=$tools_dir/$model_name.lm +filename=$sentence_list_dir/$model_name.lm sphinx_lm_convert -i $dst -o $dst.bin &> /dev/null -# move into working directory -mv $tools_dir/$model_name.lm $save_directory/$model_name.lm - diff --git a/functions/format_text.py b/functions/format_text.py @@ -20,11 +20,11 @@ # # python3 # -# IMPORTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# IMPORTS ========================================================================================= -import pathlib, re, sys, os +import pathlib, re, sys, os, string -# VARIABLE DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# VARIABLE DEFINITIONS ============================================================================ sentence_file = sys.argv[1] # os.path.basename() to get just the filename @@ -32,9 +32,32 @@ model_name = sys.argv[2] target_directory = sys.argv[3].rstrip(os.sep) -pronunciation_dictionary = '/opt/vmc/cmudict-en-us.dict' - -# LOGIC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +pronunciation_dictionary = '/opt/vmc/tools/cmudict-en-us.dict' + +# FUNCTION DEFINITION ============================================================================= + +# Print iterations progress +def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100): + """ + Call in a loop to create terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + barLength - Optional : character length of bar (Int) + """ + formatStr = "{0:." + str(decimals) + "f}" + percents = formatStr.format(100 * (iteration / float(total))) + filledLength = int(round(barLength * iteration / float(total))) + bar = '█' * filledLength + '-' * (barLength - filledLength) + sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), + if iteration == total: + sys.stdout.write('\n') + sys.stdout.flush() + +# LOGIC =========================================================================================== sentences_text = "" @@ -69,8 +92,6 @@ with open(sentence_file) as f: hs.close() sentences_text = sentences_text+' '+line - sentences_list.append(line) - # def sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary):