commit 391d5755a66cf41b7bdf25108bf880642e22a423
parent 7dc13bc7348ed6d91ceeb8f60c673611f9271d39
Author: umhau <umhau@alum.gcc.edu>
Date: Mon, 19 Jun 2017 18:31:46 -0400
clean up language model 'trainer' & give it a new name
Diffstat:
6 files changed, 49 insertions(+), 85 deletions(-)
diff --git a/install.sh b/install.sh
@@ -118,9 +118,9 @@ sudo mv $libdir/cmusphinx-en-us-ptm-5.2 $libdir/en-us
sudo cp $scriptpath/vmc /usr/local/bin/vmc
sudo chmod +x /usr/local/bin/vmc
-# move lmt into user's path & set as executable
-sudo cp $scriptpath/lmt /usr/local/bin/lmt
-sudo chmod +x /usr/local/bin/lmt
+# move lmc into user's path & set as executable
+sudo cp $scriptpath/lmc /usr/local/bin/lmc
+sudo chmod +x /usr/local/bin/lmc
# GET SPHINXTRAIN BINARIES ========================================================================
diff --git a/lib/buildLM.sh b/lib/buildLM.sh
@@ -1,43 +0,0 @@
-#!/bin/bash
-#
-# DESCRIPTION
-#
-# Produce binary language model from plain sentence list. Invokes CMU-created perl script
-# located in /opt/vmc/lib. Saves file in given directory.
-#
-# USAGE
-#
-# bash buildLM.sh sentence-list model-name save-directory
-#
-# DEPENDENCIES
-#
-# CMU Sphinx
-#
-# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-sentence_list_path=$1
-
-model_name=$2
-
-save_directory=$3
-
-lib_dir=/opt/vmc/lib
-
-# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-# run perl script to create language model
-perl $lib_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
-
-sentence_list=`basename $sentence_list_path`
-
-sentence_list_dir=`dirname $sentence_list_path`
-
-# rename output
-src=$sentence_list_path.arpabo
-dst=$save_directory/$model_name.lm
-mv $src $dst
-
-# convert lm to binary (bin) format (command was too complex for python to handle)
-filename=$save_directory/$model_name.lm
-sphinx_lm_convert -i $filename -o $filename.bin &> /dev/null
-
diff --git a/lib/quick_lm.pl b/lib/quick_lm.pl
@@ -46,7 +46,7 @@
# n-grams observed, since each n-gram is stored as a hash key. (So
# smaller vocabularies may turn out to be a problem as well.)
#
-# This package computes a stadard back-off language model. It differs
+# This package computes a standard back-off language model. It differs
# in one significant respect, which is the computation of the
# discount. We adopt a "proportional" (or ratio) discount in which a
# certain percentage of probability mass is removed (typically 50%)
diff --git a/lmc b/lmc
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Given a list of sentences, create a statistical language model.
+#
+# (Produces a binary language model from plain sentence list. Invokes
+# CMU-created perl script located in /opt/vmc/lib. Saves file in given
+# directory.)
+#
+# USAGE:
+#
+# lmc sentence-list lm-filename save-directory
+#
+# VARIABLES ===================================================================
+
+sentence_list_path=$1
+lm_filename=$2
+save_directory=$3
+
+lib_dir=/opt/vmc/lib
+
+# CHECK IF HELP NEEDED ========================================================
+
+if [[ -z $1 ]]; then
+ echo -e "\nUSAGE: \tlmc "
+ echo -e "\n\tsentence_list_file\t(input file with sample sentences)"
+ echo -e "\toutput_lm_file_name\t(desired base name of output lm file)"
+ echo -e "\tsave_directory\t\t(location to save the ouput file into)\n"
+ exit 1
+fi
+
+# COMMANDS ====================================================================
+
+# run perl script to create language model
+perl $lib_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
+
+# rename output
+mv "$sentence_list_path.arpabo" "$save_directory/$lm_filename.lm"
+
+# convert lm from text to binary
+filenamedir=$save_directory/$lm_filename.lm
+sphinx_lm_convert -i $filenamedir -o $filenamedir.bin &> /dev/null
diff --git a/lmt b/lmt
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# DESCRIPTION
-#
-# Given a list of sentences, create a statistical language model.
-#
-# USAGE: lmt lm-training-file lm-file-name output-location
-#
-# VARIABLES ===================================================================
-
-sentence_list_file=$1; output_lm_file_name=$2; save_directory=$3;
-
-fdir=/opt/vmc/lib
-
-# CHECK IF HELP NEEDED ========================================================
-
-if [[ -z $1 ]]; then
-
- echo
- echo -e "USAGE: \tlmt "
- echo
- echo -e "\tsentence_list_file\t(input file with sample sentences)"
- echo -e "\toutput_lm_file_name\t(desired base name of output lm file)"
- echo -e "\tsave_directory\t\t(location to save the ouput file into)"
- echo
-
- exit 1
-
-fi
-
-# COMMANDS ====================================================================
-
-# build language model
-bash $fdir/buildLM.sh $sentence_list_file $output_lm_file_name $save_directory
-
-
diff --git a/vmc b/vmc
@@ -6,7 +6,7 @@
# a voice model in a specified location. Indicate whether to record
# (-newrecordings) or import (-importrecordings) the audio files.
#
-# The statistical language model is now produced separately, run 'lmt' to
+# The statistical language model is now produced separately, run 'lmc' to
# see what the parameters are.
#
# There are two modes of operation: 1) adapt an existing model to use new
@@ -70,7 +70,7 @@ if [ $1 == '-remove' ] || [ $1 == '-uninstall' ]; then
echo -n "Press [enter] to confirm removing VMC."; read
sudo rm -rf /opt/vmc
sudo rm -f /usr/local/bin/vmc
- sudo rm -f /usr/local/bin/lmt
+ sudo rm -f /usr/local/bin/lmc
echo "vmc removed."
fi