‹ projects

vmc

a voice model creator for CMU Sphinx
Log | Files | Refs | README | LICENSE

commit 391d5755a66cf41b7bdf25108bf880642e22a423
parent 7dc13bc7348ed6d91ceeb8f60c673611f9271d39
Author: umhau <umhau@alum.gcc.edu>
Date:   Mon, 19 Jun 2017 18:31:46 -0400

clean up language model 'trainer' & give it a new name

Diffstat:
Minstall.sh | 6+++---
Dlib/buildLM.sh | 43-------------------------------------------
Mlib/quick_lm.pl | 2+-
Almc | 43+++++++++++++++++++++++++++++++++++++++++++
Dlmt | 36------------------------------------
Mvmc | 4++--
6 files changed, 49 insertions(+), 85 deletions(-)

diff --git a/install.sh b/install.sh @@ -118,9 +118,9 @@ sudo mv $libdir/cmusphinx-en-us-ptm-5.2 $libdir/en-us sudo cp $scriptpath/vmc /usr/local/bin/vmc sudo chmod +x /usr/local/bin/vmc -# move lmt into user's path & set as executable -sudo cp $scriptpath/lmt /usr/local/bin/lmt -sudo chmod +x /usr/local/bin/lmt +# move lmc into user's path & set as executable +sudo cp $scriptpath/lmc /usr/local/bin/lmc +sudo chmod +x /usr/local/bin/lmc # GET SPHINXTRAIN BINARIES ======================================================================== diff --git a/lib/buildLM.sh b/lib/buildLM.sh @@ -1,43 +0,0 @@ -#!/bin/bash -# -# DESCRIPTION -# -# Produce binary language model from plain sentence list. Invokes CMU-created perl script -# located in /opt/vmc/lib. Saves file in given directory. -# -# USAGE -# -# bash buildLM.sh sentence-list model-name save-directory -# -# DEPENDENCIES -# -# CMU Sphinx -# -# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -sentence_list_path=$1 - -model_name=$2 - -save_directory=$3 - -lib_dir=/opt/vmc/lib - -# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -# run perl script to create language model -perl $lib_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null - -sentence_list=`basename $sentence_list_path` - -sentence_list_dir=`dirname $sentence_list_path` - -# rename output -src=$sentence_list_path.arpabo -dst=$save_directory/$model_name.lm -mv $src $dst - -# convert lm to binary (bin) format (command was too complex for python to handle) -filename=$save_directory/$model_name.lm -sphinx_lm_convert -i $filename -o $filename.bin &> /dev/null - diff --git a/lib/quick_lm.pl b/lib/quick_lm.pl @@ -46,7 +46,7 @@ # n-grams observed, since each n-gram is stored as a hash key. (So # smaller vocabularies may turn out to be a problem as well.) # -# This package computes a stadard back-off language model. It differs +# This package computes a standard back-off language model. It differs # in one significant respect, which is the computation of the # discount. We adopt a "proportional" (or ratio) discount in which a # certain percentage of probability mass is removed (typically 50%) diff --git a/lmc b/lmc @@ -0,0 +1,43 @@ +#!/bin/bash +# +# DESCRIPTION +# +# Given a list of sentences, create a statistical language model. +# +# (Produces a binary language model from plain sentence list. Invokes +# CMU-created perl script located in /opt/vmc/lib. Saves file in given +# directory.) +# +# USAGE: +# +# lmc sentence-list lm-filename save-directory +# +# VARIABLES =================================================================== + +sentence_list_path=$1 +lm_filename=$2 +save_directory=$3 + +lib_dir=/opt/vmc/lib + +# CHECK IF HELP NEEDED ======================================================== + +if [[ -z $1 ]]; then + echo -e "\nUSAGE: \tlmc " + echo -e "\n\tsentence_list_file\t(input file with sample sentences)" + echo -e "\toutput_lm_file_name\t(desired base name of output lm file)" + echo -e "\tsave_directory\t\t(location to save the ouput file into)\n" + exit 1 +fi + +# COMMANDS ==================================================================== + +# run perl script to create language model +perl $lib_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null + +# rename output +mv "$sentence_list_path.arpabo" "$save_directory/$lm_filename.lm" + +# convert lm from text to binary +filenamedir=$save_directory/$lm_filename.lm +sphinx_lm_convert -i $filenamedir -o $filenamedir.bin &> /dev/null diff --git a/lmt b/lmt @@ -1,36 +0,0 @@ -#!/bin/bash -# -# DESCRIPTION -# -# Given a list of sentences, create a statistical language model. -# -# USAGE: lmt lm-training-file lm-file-name output-location -# -# VARIABLES =================================================================== - -sentence_list_file=$1; output_lm_file_name=$2; save_directory=$3; - -fdir=/opt/vmc/lib - -# CHECK IF HELP NEEDED ======================================================== - -if [[ -z $1 ]]; then - - echo - echo -e "USAGE: \tlmt " - echo - echo -e "\tsentence_list_file\t(input file with sample sentences)" - echo -e "\toutput_lm_file_name\t(desired base name of output lm file)" - echo -e "\tsave_directory\t\t(location to save the ouput file into)" - echo - - exit 1 - -fi - -# COMMANDS ==================================================================== - -# build language model -bash $fdir/buildLM.sh $sentence_list_file $output_lm_file_name $save_directory - - diff --git a/vmc b/vmc @@ -6,7 +6,7 @@ # a voice model in a specified location. Indicate whether to record # (-newrecordings) or import (-importrecordings) the audio files. # -# The statistical language model is now produced separately, run 'lmt' to +# The statistical language model is now produced separately, run 'lmc' to # see what the parameters are. # # There are two modes of operation: 1) adapt an existing model to use new @@ -70,7 +70,7 @@ if [ $1 == '-remove' ] || [ $1 == '-uninstall' ]; then echo -n "Press [enter] to confirm removing VMC."; read sudo rm -rf /opt/vmc sudo rm -f /usr/local/bin/vmc - sudo rm -f /usr/local/bin/lmt + sudo rm -f /usr/local/bin/lmc echo "vmc removed." fi