split lm and vm input text files - vmc - a voice model creator for CMU Sphinx

commit c76bee8e7c35e4cbb975141132a072d57ebdb2d5
parent 618b68aaeddcc7734a4c4e5040b9cdd5a9630609
Author: umhau <umhau@users.noreply.github.com>
Date:   Thu,  3 Nov 2016 22:23:31 -0400

split lm and vm input text files

also the keyphrase file for keyword spotting mode is no longer asked for or included in the outputted voice model, which is appropriate.
Diffstat:
M vmc.sh  | 48 ++++++++++++++++++++++++++++++++++++++----------

1 file changed, 38 insertions(+), 10 deletions(-)
diff --git a/vmc.sh b/vmc.sh
@@ -8,7 +8,13 @@
 # 
 # USAGE
 # 
-#       vmc.sh model-name [ -record OR -import audio/file/directory ] sentence-file output-folder [reps]
+#       vmc.sh 
+#           model-name                  (used to name most of the internal files)
+#           [ -record OR -import audio/file/directory ]
+#           vm-training-file            (sentences the user should record for training purposes)
+#           lm-training-file            (for use by the statistical language model creator)
+#           output-folder               (this is a complete file path)
+#           [reps]                      (how many times to get a recording of each sentence)
 #
 # DEPENDENCIES
 # 
@@ -23,6 +29,9 @@
 #       parameters and the script will not fail if it is not specified.
 # 
 #       Having been installed to /usr/local/bin, this command can be called from anywhere.
+#
+#       After installation, a keyphrase list should be added in order to use the voice model for 
+#       keyword spotting.
 # 
 
 # VARIABLES =======================================================================================
@@ -33,11 +42,12 @@ export LD_LIBRARY_PATH=/usr/local/lib
 
 if [[ $2 = '-record' ]]; then 
 
-    sentence_file=$3
-    output_folder=$4
+    vm_training_file=$3
+    lm_training_file=$4
+    output_folder=$5
 
     if [[ -n $5 ]]; then
-        iterations=$5
+        iterations=$6
     else
         iterations=1
     fi
@@ -46,12 +56,30 @@ elif [[ $2 = '-import' ]]; then
 
     audio_file_directory=$3
     sentence_file=$4
-    output_folder=$5
+    lm_training_file=$5
+    output_folder=$6
     iterations=1
 
 else
-
-    echo "USAGE: vmc.sh model-name [ -record OR -import audio/file/dir ] sentence-file output-folder [reps]"
+    
+    echo
+    echo -e "USAGE: \tvmc.sh "
+    echo 
+    echo -e "\tmodel-name\t\t(used to name most of the internal files)"
+    echo -e "\t[ -record OR -import audio/file/directory ]"
+    echo -e "\tvm-training-file\t(sentences for the user to record)"
+    echo -e "\tlm-training-file\t(sentences to train the language model)"
+    echo -e "\toutput-folder\t\t(this is a complete file path)"
+    echo -e "\t[reps]\t\t\t(number of voice recordings per sentence)"
+    echo
+
+#       vmc.sh 
+#           model-name 
+#           [ -record OR -import audio/file/directory ] 
+#           sentence-file-for-voice-recordings
+#           lm-training-file 
+#           output-folder 
+#           [reps]
     exit 1
 
 fi
@@ -77,7 +105,7 @@ if [[ $2 = '-record' ]]; then
 
     mkdir -p $audio_folder
 
-    python3 $fdir/getaudio.py $sentence_file $audio_folder $iterations $model_name
+    python3 $fdir/getaudio.py $vm_training_file $audio_folder $iterations $model_name
 
 elif [[ $2 = '-import' ]]; then
 
@@ -96,7 +124,7 @@ echo
 echo "Producing sentence file derivatives..."
 
 # get derivatives of sentence file
-python3 $fdir/format_text.py $sentence_file $model_name $output_folder $iterations
+python3 $fdir/format_text.py $vm_training_file $model_name $output_folder $iterations
 
 echo 
 echo "Producing audio file derivatives..."
@@ -110,7 +138,7 @@ echo
 echo "Creating models..."
 
 # build language model
-bash $fdir/buildLM.sh $sentence_file $model_name $output_folder
+bash $fdir/buildLM.sh $lm_training_file $model_name $output_folder
 
 # build voice model
 bash $fdir/voicemodel.sh $model_name $output_folder $audio_folder $output_folder

‹ projects	vmc a voice model creator for CMU Sphinx
	Log \| Files \| Refs \| README \| LICENSE