clean up language model 'trainer' & give it a new name - vmc

commit 391d5755a66cf41b7bdf25108bf880642e22a423
parent 7dc13bc7348ed6d91ceeb8f60c673611f9271d39
Author: umhau <umhau@alum.gcc.edu>
Date:   Mon, 19 Jun 2017 18:31:46 -0400

clean up language model 'trainer' & give it a new name

Diffstat:
M install.sh  | 6 +++---
D lib/buildLM.sh  | 43 -------------------------------------------
M lib/quick_lm.pl  | 2 +-
A lmc  | 43 +++++++++++++++++++++++++++++++++++++++++++
D lmt  | 36 ------------------------------------
M vmc  | 4 ++--

6 files changed, 49 insertions(+), 85 deletions(-)
diff --git a/install.sh b/install.sh
@@ -118,9 +118,9 @@ sudo mv  $libdir/cmusphinx-en-us-ptm-5.2 $libdir/en-us
 sudo cp $scriptpath/vmc /usr/local/bin/vmc
 sudo chmod +x /usr/local/bin/vmc
 
-# move lmt into user's path  & set as executable
-sudo cp $scriptpath/lmt /usr/local/bin/lmt
-sudo chmod +x /usr/local/bin/lmt
+# move lmc into user's path  & set as executable
+sudo cp $scriptpath/lmc /usr/local/bin/lmc
+sudo chmod +x /usr/local/bin/lmc
 
 # GET SPHINXTRAIN BINARIES ========================================================================
 
diff --git a/lib/buildLM.sh b/lib/buildLM.sh
@@ -1,43 +0,0 @@
-#!/bin/bash
-# 
-# DESCRIPTION
-# 
-#       Produce binary language model from plain sentence list.  Invokes CMU-created perl script
-#       located in /opt/vmc/lib.  Saves file in given directory.
-# 
-# USAGE
-# 
-#       bash buildLM.sh sentence-list model-name save-directory
-#
-# DEPENDENCIES
-# 
-#       CMU Sphinx
-# 
-# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-sentence_list_path=$1
-
-model_name=$2
-
-save_directory=$3
-
-lib_dir=/opt/vmc/lib
-
-# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-# run perl script to create language model
-perl $lib_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
-
-sentence_list=`basename $sentence_list_path`
-
-sentence_list_dir=`dirname $sentence_list_path`
-
-# rename output
-src=$sentence_list_path.arpabo
-dst=$save_directory/$model_name.lm
-mv $src $dst
-
-# convert lm to binary (bin) format (command was too complex for python to handle)
-filename=$save_directory/$model_name.lm
-sphinx_lm_convert -i $filename -o $filename.bin &> /dev/null
-
diff --git a/lib/quick_lm.pl b/lib/quick_lm.pl
@@ -46,7 +46,7 @@
 # n-grams observed, since each n-gram is stored as a hash key. (So 
 # smaller vocabularies may turn out to be a problem as well.)
 # 
-# This package computes a stadard back-off language model. It differs 
+# This package computes a standard back-off language model. It differs 
 # in one significant respect, which is the computation of the 
 # discount. We adopt a "proportional" (or ratio) discount in which a 
 # certain percentage of probability mass is removed (typically 50%) 
diff --git a/lmc b/lmc
@@ -0,0 +1,43 @@
+#!/bin/bash
+# 
+# DESCRIPTION
+# 
+#       Given a list of sentences, create a statistical language model.  
+#
+#       (Produces a binary language model from plain sentence list.  Invokes 
+#       CMU-created perl script located in /opt/vmc/lib.  Saves file in given 
+#       directory.)
+#
+# USAGE: 
+#
+#       lmc sentence-list lm-filename save-directory
+#
+# VARIABLES ===================================================================
+
+sentence_list_path=$1
+lm_filename=$2
+save_directory=$3
+
+lib_dir=/opt/vmc/lib
+
+# CHECK IF HELP NEEDED ========================================================
+
+if [[ -z $1 ]]; then  
+    echo -e "\nUSAGE: \tlmc "
+    echo -e "\n\tsentence_list_file\t(input file with sample sentences)"
+    echo -e "\toutput_lm_file_name\t(desired base name of output lm file)"
+    echo -e "\tsave_directory\t\t(location to save the ouput file into)\n"
+    exit 1
+fi
+
+# COMMANDS ====================================================================
+
+# run perl script to create language model
+perl $lib_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
+
+# rename output
+mv "$sentence_list_path.arpabo" "$save_directory/$lm_filename.lm"
+
+# convert lm from text to binary 
+filenamedir=$save_directory/$lm_filename.lm
+sphinx_lm_convert -i $filenamedir -o $filenamedir.bin &> /dev/null
diff --git a/lmt b/lmt
@@ -1,36 +0,0 @@
-#!/bin/bash
-# 
-# DESCRIPTION
-# 
-#       Given a list of sentences, create a statistical language model.  
-#
-# USAGE: lmt lm-training-file lm-file-name output-location
-#
-# VARIABLES ===================================================================
-
-sentence_list_file=$1; output_lm_file_name=$2; save_directory=$3; 
-
-fdir=/opt/vmc/lib
-
-# CHECK IF HELP NEEDED ========================================================
-
-if [[ -z $1 ]]; then 
-   
-    echo
-    echo -e "USAGE: \tlmt "
-    echo 
-    echo -e "\tsentence_list_file\t(input file with sample sentences)"
-    echo -e "\toutput_lm_file_name\t(desired base name of output lm file)"
-    echo -e "\tsave_directory\t\t(location to save the ouput file into)"
-    echo
-
-    exit 1
-
-fi
-
-# COMMANDS ====================================================================
-
-# build language model
-bash $fdir/buildLM.sh $sentence_list_file $output_lm_file_name $save_directory
-
-
diff --git a/vmc b/vmc
@@ -6,7 +6,7 @@
 #       a voice model in a specified location.  Indicate whether to record 
 #       (-newrecordings) or import (-importrecordings) the audio files.
 # 
-#       The statistical language model is now produced separately, run 'lmt' to
+#       The statistical language model is now produced separately, run 'lmc' to
 #       see what the parameters are.
 #
 #       There are two modes of operation: 1) adapt an existing model to use new
@@ -70,7 +70,7 @@ if [ $1 == '-remove' ] || [ $1 == '-uninstall' ]; then
     echo -n "Press [enter] to confirm removing VMC."; read
     sudo rm -rf /opt/vmc
     sudo rm -f /usr/local/bin/vmc
-    sudo rm -f /usr/local/bin/lmt
+    sudo rm -f /usr/local/bin/lmc
     echo "vmc removed."
 fi

‹ projects	vmc a voice model creator for CMU Sphinx
	Log \| Files \| Refs \| README \| LICENSE

M	install.sh	\|	6	+++---
D	lib/buildLM.sh	\|	43	-------------------------------------------
M	lib/quick_lm.pl	\|	2	+-
A	lmc	\|	43	+++++++++++++++++++++++++++++++++++++++++++
D	lmt	\|	36	------------------------------------
M	vmc	\|	4	++--