file import now works without errors - vmc - a voice model creator for CMU Sphinx

commit 9a92f854db955576e9ffcec46053c4541cfb1799
parent 6cbb226e04617078b44e7e901db5464086570fcc
Author: umhau <umhau@users.noreply.github.com>
Date:   Tue,  1 Nov 2016 19:10:12 -0400

file import now works without errors
Diffstat:
M functions/buildLM.sh  | 19 ++++++++++---------
M functions/format_text.py  | 37 +++++++++++++++++++++++++++++--------

2 files changed, 39 insertions(+), 17 deletions(-)
diff --git a/functions/buildLM.sh b/functions/buildLM.sh
@@ -15,7 +15,7 @@
 # 
 # VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-sentence_list=$1
+sentence_list_path=$1
 
 model_name=$2
 
@@ -26,17 +26,18 @@ tools_dir=/opt/vmc/tools
 # COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 # run perl script to create language model
-perl $tools_dir/quick_lm.pl -s $sentence_list &> /dev/null
+perl $tools_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
 
-# quick_lm creates output in its own directory.  I can fix that later, after I learn perl.
-src=$tools_dir/$sentence_list.arpabo
-dst=$tools_dir/$model_name.lm
+sentence_list=`basename $sentence_list_path`
+
+sentence_list_dir=`dirname $sentence_list_path`
+
+# rename output
+src=$sentence_list_path.arpabo
+dst=$sentence_list_dir/$model_name.lm
 mv $src $dst
 
 # convert lm to binary (bin) format (command was too complex for python to handle)
-filename=$tools_dir/$model_name.lm
+filename=$sentence_list_dir/$model_name.lm
 sphinx_lm_convert -i $dst -o $dst.bin &> /dev/null
 
-# move into working directory
-mv $tools_dir/$model_name.lm $save_directory/$model_name.lm
-
diff --git a/functions/format_text.py b/functions/format_text.py
@@ -20,11 +20,11 @@
 # 
 #       python3
 # 
-# IMPORTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# IMPORTS =========================================================================================
 
-import pathlib, re, sys, os
+import pathlib, re, sys, os, string
 
-# VARIABLE DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# VARIABLE DEFINITIONS ============================================================================
 
 sentence_file = sys.argv[1] # os.path.basename() to get just the filename
 
@@ -32,9 +32,32 @@ model_name = sys.argv[2]
 
 target_directory = sys.argv[3].rstrip(os.sep)
 
-pronunciation_dictionary = '/opt/vmc/cmudict-en-us.dict'
-
-# LOGIC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+pronunciation_dictionary = '/opt/vmc/tools/cmudict-en-us.dict'
+
+# FUNCTION DEFINITION =============================================================================
+
+# Print iterations progress
+def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100):
+    """
+    Call in a loop to create terminal progress bar
+    @params:
+        iteration   - Required  : current iteration (Int)
+        total       - Required  : total iterations (Int)
+        prefix      - Optional  : prefix string (Str)
+        suffix      - Optional  : suffix string (Str)
+        decimals    - Optional  : positive number of decimals in percent complete (Int)
+        barLength   - Optional  : character length of bar (Int)
+    """
+    formatStr       = "{0:." + str(decimals) + "f}"
+    percents        = formatStr.format(100 * (iteration / float(total)))
+    filledLength    = int(round(barLength * iteration / float(total)))
+    bar             = '█' * filledLength + '-' * (barLength - filledLength)
+    sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
+    if iteration == total:
+        sys.stdout.write('\n')
+    sys.stdout.flush()
+
+# LOGIC ===========================================================================================
 
 sentences_text = ""
 
@@ -69,8 +92,6 @@ with open(sentence_file) as f:
         hs.close() 
 
         sentences_text = sentences_text+' '+line
-        sentences_list.append(line)
-
 
 # def sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary):

‹ projects	vmc a voice model creator for CMU Sphinx
	Log \| Files \| Refs \| README \| LICENSE

M	functions/buildLM.sh	\|	19	++++++++++---------
M	functions/format_text.py	\|	37	+++++++++++++++++++++++++++++--------