commit 9a92f854db955576e9ffcec46053c4541cfb1799
parent 6cbb226e04617078b44e7e901db5464086570fcc
Author: umhau <umhau@users.noreply.github.com>
Date: Tue, 1 Nov 2016 19:10:12 -0400
file import now works without errors
Diffstat:
2 files changed, 39 insertions(+), 17 deletions(-)
diff --git a/functions/buildLM.sh b/functions/buildLM.sh
@@ -15,7 +15,7 @@
#
# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-sentence_list=$1
+sentence_list_path=$1
model_name=$2
@@ -26,17 +26,18 @@ tools_dir=/opt/vmc/tools
# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# run perl script to create language model
-perl $tools_dir/quick_lm.pl -s $sentence_list &> /dev/null
+perl $tools_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
-# quick_lm creates output in its own directory. I can fix that later, after I learn perl.
-src=$tools_dir/$sentence_list.arpabo
-dst=$tools_dir/$model_name.lm
+sentence_list=`basename $sentence_list_path`
+
+sentence_list_dir=`dirname $sentence_list_path`
+
+# rename output
+src=$sentence_list_path.arpabo
+dst=$sentence_list_dir/$model_name.lm
mv $src $dst
# convert lm to binary (bin) format (command was too complex for python to handle)
-filename=$tools_dir/$model_name.lm
+filename=$sentence_list_dir/$model_name.lm
sphinx_lm_convert -i $dst -o $dst.bin &> /dev/null
-# move into working directory
-mv $tools_dir/$model_name.lm $save_directory/$model_name.lm
-
diff --git a/functions/format_text.py b/functions/format_text.py
@@ -20,11 +20,11 @@
#
# python3
#
-# IMPORTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# IMPORTS =========================================================================================
-import pathlib, re, sys, os
+import pathlib, re, sys, os, string
-# VARIABLE DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# VARIABLE DEFINITIONS ============================================================================
sentence_file = sys.argv[1] # os.path.basename() to get just the filename
@@ -32,9 +32,32 @@ model_name = sys.argv[2]
target_directory = sys.argv[3].rstrip(os.sep)
-pronunciation_dictionary = '/opt/vmc/cmudict-en-us.dict'
-
-# LOGIC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+pronunciation_dictionary = '/opt/vmc/tools/cmudict-en-us.dict'
+
+# FUNCTION DEFINITION =============================================================================
+
+# Print iterations progress
+def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100):
+ """
+ Call in a loop to create terminal progress bar
+ @params:
+ iteration - Required : current iteration (Int)
+ total - Required : total iterations (Int)
+ prefix - Optional : prefix string (Str)
+ suffix - Optional : suffix string (Str)
+ decimals - Optional : positive number of decimals in percent complete (Int)
+ barLength - Optional : character length of bar (Int)
+ """
+ formatStr = "{0:." + str(decimals) + "f}"
+ percents = formatStr.format(100 * (iteration / float(total)))
+ filledLength = int(round(barLength * iteration / float(total)))
+ bar = '█' * filledLength + '-' * (barLength - filledLength)
+ sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
+ if iteration == total:
+ sys.stdout.write('\n')
+ sys.stdout.flush()
+
+# LOGIC ===========================================================================================
sentences_text = ""
@@ -69,8 +92,6 @@ with open(sentence_file) as f:
hs.close()
sentences_text = sentences_text+' '+line
- sentences_list.append(line)
-
# def sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary):