commit 4dbfdd0f095f03c2c487d5dc072096b217b1fe73
parent c76bee8e7c35e4cbb975141132a072d57ebdb2d5
Author: umhau <umhau@users.noreply.github.com>
Date: Thu, 3 Nov 2016 23:56:39 -0400
language model creation is now a separate tool
call 'lmt.sh' from the command line to activate.
Diffstat:
4 files changed, 62 insertions(+), 18 deletions(-)
diff --git a/installvmc.sh b/installvmc.sh
@@ -48,12 +48,14 @@ sudo mv $tdir/cmusphinx-en-us-ptm-5.2 $tdir/en-us
# move functions
sudo cp -r $scriptpath/functions/* $fdir/
-# move vmc into user's path
+# move vmc into user's path & set as executable
sudo cp $scriptpath/vmc.sh /usr/local/bin/vmc.sh
-
-# set vmc.sh as executable
sudo chmod +x /usr/local/bin/vmc.sh
+# move lmt into user's path & set as executable
+sudo cp $scriptpath/lmt.sh /usr/local/bin/lmt.sh
+sudo chmod +x /usr/local/bin/lmt.sh
+
# GET SPHINXTRAIN BINARIES ========================================================================
# copy binary tools into model folder
diff --git a/lmt.sh b/lmt.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Given a list of sentences, create a statistical language model.
+#
+# USAGE
+#
+# lmt.sh lm-training-file lm-file-name output-location
+#
+# DEPENDENCIES
+#
+# CMU Sphinx, Perl, and other misc. packages.
+#
+
+# VARIABLES =======================================================================================
+
+if [[ -z $1 ]]; then
+
+ echo
+ echo -e "USAGE: \tlmt.sh "
+ echo
+ echo -e "\tlm-training-file\t\t(used to name most of the internal files)"
+ echo -e "\tsentence-list\t\t(sentences for the user to record)"
+ echo -e "\toutput-location\t\t(location to save the LM into)"
+ echo
+
+ exit 1
+
+fi
+
+lm_training_file=$1
+
+lm_file_name=$2
+
+output_location=$3
+
+fdir=/opt/vmc/functions
+
+# COMMANDS ========================================================================================
+
+# build language model
+bash $fdir/buildLM.sh $lm_training_file $lm_file_name $output_location
+
+echo
+echo "Done."
+echo
diff --git a/uninstallvmc.sh b/uninstallvmc.sh
@@ -11,4 +11,6 @@ sudo rm -r /opt/vmc
sudo rm /usr/local/bin/vmc.sh
-echo "vmc removed."
-\ No newline at end of file
+sudo rm /usr/local/bin/lmt.sh
+
+echo "vmc removed."
diff --git a/vmc.sh b/vmc.sh
@@ -6,13 +6,18 @@
# a specified location. Indicate whether to record (-record) or import (-import) the
# audio files.
#
+# The statistical language model is now produced separately, run
+#
+# $ lmt.sh
+#
+# to see what the parameters are.
+#
# USAGE
#
# vmc.sh
# model-name (used to name most of the internal files)
# [ -record OR -import audio/file/directory ]
# vm-training-file (sentences the user should record for training purposes)
-# lm-training-file (for use by the statistical language model creator)
# output-folder (this is a complete file path)
# [reps] (how many times to get a recording of each sentence)
#
@@ -73,13 +78,6 @@ else
echo -e "\t[reps]\t\t\t(number of voice recordings per sentence)"
echo
-# vmc.sh
-# model-name
-# [ -record OR -import audio/file/directory ]
-# sentence-file-for-voice-recordings
-# lm-training-file
-# output-folder
-# [reps]
exit 1
fi
@@ -135,14 +133,10 @@ bash $fdir/acousticfiles.sh $audio_folder $output_folder/$model_name.fileids
# CREATE MODELS -----------------------------------------------------------------------------------
echo
-echo "Creating models..."
-
-# build language model
-bash $fdir/buildLM.sh $lm_training_file $model_name $output_folder
+echo "Creating voice model..."
# build voice model
bash $fdir/voicemodel.sh $model_name $output_folder $audio_folder $output_folder
-
echo
echo "Process complete."