commit 656283331056f872b451eaf1eaf1ae8a2a80990c
parent 958411c58c3aa87c5bf9b77fb83160435cb4c305
Author: umhau <umhau@alum.gcc.edu>
Date: Sun, 18 Jun 2017 18:40:57 -0400
reorganized tool to have a better set of options.
Diffstat:
7 files changed, 136 insertions(+), 168 deletions(-)
diff --git a/README.md b/README.md
@@ -10,12 +10,6 @@ Note this tool has only been tested with Linux Mint 17.3 & 18.
**Please see the LICENSE file for terms of use.**
-TODO
-----
-
-Make sure that if I work from prerecorded audio files, I can add to a set of
-files already recorded.
-
Linux/Unix installation
-------------------------------------------------------------------------------
@@ -35,25 +29,19 @@ Commands:
See use examples in the next section.
-Usage instructions
+Usage Examples
-------------------------------------------------------------------------------
-Example usage, recording new audio with 5 repetitions of each sentence:
-
- vmc new_model -record ~/Downloads/sentences.txt ~/projects/new_model 5
+Add to a preexisting set of recordings, and adapt an existing acoustic model.
-Example usage, importing previously created audio files:
+ vmc en-us -adapt /extant/model/location -addrecordings /audio/files/location /dictation/file/location.txt 5
- vmc ccmodel -import audio_files cc.list ~/tools/ccmodel
+Create a new model, and create a new set of audio recordings.
-Note that the model name and the name of the model folder should be the same.
-Also note the repetitions specification is optional; it defaults to 1.
+ vmc en-us -create /place/to/put/model -newrecordings /place/to/put/audio/files /dictation/file/location.txt 5
-The model folder will contain all necessary files to run PocketSphinx with the
-newly created custom voice model.
-
-Note that dependencies are not checked when running vmc. To check
-dependencies, see the section above.
+Import a previously created set of recordings, and adapt a preexisting model.
+ vmc en-us -adapt /extant/model/location -importrecordings /audio/files/location
File Structure
-------------------------------------------------------------------------------
@@ -78,7 +66,6 @@ structure is as follows (incomplete, only showing commonly-used files):
acoustic-model
- feat.params
-
Background
-------------------------------------------------------------------------------
diff --git a/installdependencies.sh b/installdependencies.sh
@@ -111,6 +111,7 @@ if [ ! -d $installation_directory/sphinxbase/ ]; then
make -j $CORES
make -j $CORES check
sudo make -j $CORES install
+ sudo chown -R $USER: $installation_directory # bug: dir had root ownership.
else
echo "Done."
echo "SphinxBase already installed."
@@ -129,6 +130,7 @@ if [ ! -d $installation_directory/sphinxtrain/ ]; then
./configure
make -j $CORES
sudo make -j $CORES install
+ sudo chown -R $USER: $installation_directory # bug: dir had root ownership.
echo "Done."
else
echo "Done."
@@ -150,6 +152,7 @@ if [ ! -d $installation_directory/pocketsphinx/ ]; then
make -j $CORES clean all
make -j $CORES check
sudo make -j $CORES install
+ sudo chown -R $USER: $installation_directory # bug: dir had root ownership.
echo "done."
else
echo "Done."
diff --git a/installvmc.sh b/installvmc.sh
@@ -1,17 +1,15 @@
#!/bin/bash
#
-# USAGE
+# USAGE =======================================================================
#
# bash installvmc.sh
#
-# NOTES
+# NOTES =======================================================================
#
# Copies the vmc packages into /opt/vmc, and puts the vmc script into
# /usr/local/bin. Once there, vmc can be called (with its requisite
# options) from anywhere with just vmc.sh.
#
-
-
# SET VARIABLES ===============================================================
# Absolute path to this script & containing folder. stackoverflow.com/q/242538
@@ -32,20 +30,17 @@ fi
# MOVE VMC FILES ==============================================================
# get sudo
-sudo ls 1>/dev/null; echo -n "Installing vmc..."
+sudo ls 1>/dev/null; echo -en "\nInstalling vmc..."
# create vmc directories
sudo mkdir -p $libdir; sudo mkdir -p $libdir
-# move tools
-sudo cp -r $scriptpath/tools/* $libdir/
+# move library
+sudo cp -r $scriptpath/lib/* $libdir/
-sudo tar -xf $scriptpath/cmusphinx-en-us-ptm-5.2.tar.gz -C $libdir
+sudo tar -xf $scriptpath/lib/cmusphinx-en-us-ptm-5.2.tar.gz -C $libdir
sudo mv $libdir/cmusphinx-en-us-ptm-5.2 $libdir/en-us
-# move functions
-sudo cp -r $scriptpath/lib/* $libdir/
-
# move vmc into user's path & set as executable
sudo cp $scriptpath/vmc /usr/local/bin/vmc
sudo chmod +x /usr/local/bin/vmc
diff --git a/lib/format_text.py b/lib/format_text.py
@@ -57,12 +57,13 @@ model_name = sys.argv[2]
audio_folder = sys.argv[3].rstrip(os.sep)
iterations = int(sys.argv[4])
pronunciation_dictionary = '/opt/vmc/lib/cmudict-en-us.dict'
+recording_count = int(sys.argv[5]) # how many audio files already exist
-try:
- recording_count = int(sys.argv[5]) # how many audio files already exist
-except IndexError:
- recording_count = 0
-
+if recording_count == 0:
+ append_to_existing=False
+else:
+ append_to_existing=True
+
# LOGIC =======================================================================
# Print iterations progress ---------------------------------------------------
@@ -126,36 +127,38 @@ for line in lines*iterations:
# CREATE PRONUNCIATION DICTIONARY ---------------------------------------------
+# this is the same for the new file and the old file
+uwordsfilename = str(audio_folder+'/'+model_name+'.vocab') # correct extension
+
# create unique, sorted word list from sentence list
-words = []
-print("Creating unique, sorted word list...")
-[words.append(word.strip(string.punctuation).upper()) for word in sentences_text.split()]
+words = []; print("Creating unique, sorted word list...")
+
+# get words from new sentence file
+[words.append(word.strip(string.punctuation).upper().rstrip()) for word in sentences_text.split()]
+
+# add the words from the old word list (if this is appending to an old model)
+if append_to_existing:
+ with open(uwordsfilename) as f:
+ for word in f: words.append(word.strip(string.punctuation).upper().rstrip())
+
# set() uniques the list, sorted() puts them a-z.
-uwords = sorted(list(set(words)))
+uwords = list(filter(None, sorted(list(set(words)))))
# save word list to file
-print("Saving word list to file...")
-uwordsfilename = str(audio_folder+'/'+model_name+'.vocab') # correct extension
-uwordsfile = open(uwordsfilename, 'w')
+print("Saving word list to file..."); uwordsfile = open(uwordsfilename, 'w')
for word in uwords:
uwordsfile.write("%s\n" % word)
# create pronunciation dictionary from word list
if create_pronunciation_dictionary:
- cmudict = []
- print("Opening pronunciation dictionary...")
+ cmudict = []; print("Opening pronunciation dictionary...")
with open(pronunciation_dictionary) as f:
for line in f:
cmudict.append(line)
- pdict = []
- missing_words = []
- l = len(uwords)
- i = 0
print("Extracting entries corresponding to word list...")
- printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
-
- curr_line = 0
+ pdict = []; missing_words = []; l = len(uwords); i = 0; curr_line = 0
+ printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
for word in uwords:
diff --git a/lib/getaudio.py b/lib/getaudio.py
@@ -106,7 +106,7 @@ with open(sentence_file) as f:
for line in f:
sentence_list.append(line)
-num_recs = len(sentence_list)*reps
+num_recs = len(sentence_list)*reps+recording_count
# collect audio files
try:
diff --git a/lib/voicemodel.sh b/lib/voicemodel.sh
@@ -20,7 +20,7 @@
# VARIABLES ===================================================================
model_name=$1 # If performing adaptation, this must match prior names.
-acoustic_model_dir=$2 # 'en-us' default voice model folder
+acoustic_model_dir=$2 # acoustic model folder, often the default 'en-us'
audio_file_dir=$3 # contains audio file and sentence file derivatives
libdir=/opt/vmc/lib
diff --git a/vmc b/vmc
@@ -4,157 +4,137 @@
#
# Given a sentence file and (optionally) prerecorded audio files, produce
# a voice model in a specified location. Indicate whether to record
-# (-record) or import (-import) the audio files.
+# (-newrecordings) or import (-importrecordings) the audio files.
#
-# The statistical language model is now produced separately, run
-#
-# $ lmt
+# The statistical language model is now produced separately, run 'lmt' to
+# see what the parameters are.
#
-# to see what the parameters are.
+# There are two modes of operation: 1) adapt an existing model to use new
+# audio data and 2) create a new model from scratch. In either case,
+# there are three options for sourcing the audio: 1) record new audio, 2)
+# import audio from a folder, or 3) add new recordings to the previously
+# recorded audio data ('addrecordings', below).
#
-# USAGE
-#
-# vmc
-# model-name used to name most of the internal files
-#
-# [ -record OR -import audio/file/directory ]
-#
-# vm-training-file sentences the user should record for
-# training purposes
+# If importing audio, be aware that the model names of the acoustic m
+# models used must match exactly.
#
-# output-folder this is a complete file path. If
-# adapting in-place, this is the same as
-# the acoustic-model (input) folder.
+# If creating a new model, there is a copy of the standard acoustic voice
+# model stored in /opt/vmc/lib/en-us. Copy it into the home directory
+# before use. (sudo cp /opt/vmc/lib/en-us ~/Documents/)
#
-# [reps] how many times to get a recording of
-# each sentence
+# If importing audio files, the associated text file with sentences must
+# be located in the folder with the audio, and follow the naming and
+# internal formatting standards.
#
-# acoustic-model Location of acoustic model to start
-# with. (this is a complete file path,
-# not including 'en-us'.) Optional.
-#
-# -adapt_in_place Only relevant if an acoustic model has
-# been given to start with. Adapt, and
-# do not copy, the given acoustic model.
-#
-# DEPENDENCIES
-#
-# CMU Sphinx, Python 3 (& 2.7), Perl, and other misc. packages.
-#
-# NOTES
-#
-# The output folder is intended to be the location of the voice model,
-# once completed.
-#
# The [reps] variable at the end specifies how many times to request a
# recording of each entry in the sentence file. It is optional, as it is
# placed at the end of the list of parameters and the script will not
# fail if it is not specified.
+#
+# USAGE | EXAMPLES ------------------------------------------------------------
#
-# Having been installed to /usr/local/bin, this command can be called
-# from anywhere.
+# vmc en-us \
+# -adapt /extant/model/location \
+# -addrecordings /audio/files/location /dictation/file/location.txt 5
+#
+# vmc en-us \
+# -create /place/to/put/model \
+# -newrecordings /place/to/put/audio/files /dictation/file/location.txt 5
#
-# After installation, a keyphrase list should be added in order to use
-# the voice model for keyword spotting.
+# vmc en-us \
+# -adapt /extant/model/location \
+# -importrecordings /audio/files/location
#
-
# VARIABLES ===================================================================
-export LD_LIBRARY_PATH=/usr/local/lib
-# set path to include sphinx library location
-# jrmeyer.github.io/installation/2016/01/09/Installing-CMU-Sphinx-on-Ubuntu.html
-
-if [[ $2 = '-record' ]]; then
+model_name="$1" # i.e. 'en-us'
+model_location="$3" # i.e. /usr/local/lib/python2.7/dist-packages/pocketsphinx/model/en-us
+audio_folder="$5" # i.e. ~/.psyche/audio
- vm_training_file=$3; output_folder=$4
+if [ $4 == '-newrecordings' ] || [ $4 == '-addrecordings' ]; then dict_file="$6"; reps="$7";
+else echo "Bad options given. Run: 'nano /usr/local/bin/vmc'."; exit 1; fi
- if [[ -n $5 ]]; then iterations=$5; else iterations=1; fi
-
-elif [[ $2 = '-import' ]]; then
-
- audio_file_directory=$3; sentence_file=$4; output_folder=$5; iterations=1
-
-else
-
- echo
- echo -e "USAGE: \tvmc "
- echo
- echo -e "\tmodel-name\t\t(used to name most of the internal files)"
- echo -e "\t[ -record OR -import audio/file/directory ]"
- echo -e "\tvm-training-file\t(sentences for the user to record)"
- echo -e "\toutput-folder\t\t(this is a complete file path)"
- echo -e "\t[reps]\t\t\t(number of voice recordings per sentence)"
- echo
-
- exit 1
-
-fi
-
-model_name=$1
-
-audio_folder=$output_folder/audio
-
-tdir=/opt/vmc/tools; fdir=/opt/vmc/functions
+current_number_of_recordings="0" # changed below, if there are any.
+libdir="/opt/vmc/lib"
# COMMANDS ====================================================================
-# OBTAIN REQUISITE FILES ------------------------------------------------------
-
-echo
-echo "Collecting required files..."
+# misc. housekeeping ----------------------------------------------------------
-# get audio files and put them where they go
-if [[ $2 = '-record' ]]; then
-
- sudo mkdir -p $audio_folder
+sudo ls 1>/dev/null # get sudo
- sudo python3 $fdir/getaudio.py $vm_training_file $audio_folder $iterations $model_name
+export LD_LIBRARY_PATH=/usr/local/lib # include sphinx library location
+# I need to do this any time I want to use the manually downloaded CMU Sphinx.
+# http://jrmeyer.github.io/installation/2016/01/08/Installing-CMU-Sphinx-on-Ubuntu.html
- echo "Recorded audio files saved into $audio_folder. They can be reused."
+if [ $2 == '-create' ] && [ ! -d "$model_location" ]; then
+ echo "Press [enter] to confirm writing new acoustic model at:"
+ echo "$model_location"; read
+ mkdir $model_location; cp -r /opt/vmc/lib/en-us/* "$model_location/";
+elif [ $2 == '-create' ] && [ -d "$model_location" ]; then
+ echo -n "Model already exists at this directory! Press [enter] to overwrite."; read
+ sudo rm -r $model_location
+ mkdir $model_location; cp -r /opt/vmc/lib/en-us/* "$model_location/";
+fi
-elif [[ $2 = '-import' ]]; then
+# record new audio files ------------------------------------------------------
+if [ $4 = '-addrecordings' ]; then # find how many recordings were already made
+ current_number_of_recordings=$(ls $audio_folder/$model_name*.wav | tail -1 | sed 's/[^0-9]*//g' | bc -l);
+ echo $current_number_of_recordings
+else
sudo mkdir -p $audio_folder
+ current_number_of_recordings='0'
+fi
- sudo cp -a $audio_file_directory/*.wav $audio_folder/
-
+if [ $4 = '-newrecordings' ] && [ -d "$audio_folder" ]; then
+ echo -n "Audio already exists at this directory! Press [enter] to overwrite."; read
+ sudo rm -r $audio_folder; mkdir $audio_folder
fi
-# copy default acoustic model - or not.
-if [[ $2 = '-adapt_in_place' ]]; then
-
-
-if [ -n "$6" ]; then
- echo "Pulling base acoustic model from $6"
- read -p "Press enter to continue, or CTRL-C to exit"
- sudo cp -r $installation_directory/en-us $output_folder
-else
- echo "Using default base acoustic model."
- sudo cp -r $tdir/en-us $output_folder
+if [ $4 = '-newrecordings' ] || [ $4 = '-addrecordings' ]; then # record audio into folder.
+
+ sudo python3 $libdir/getaudio.py \
+ $dict_file \
+ $audio_folder \
+ $reps \
+ $model_name \
+ $current_number_of_recordings
+ sudo chown -R $USER: $audio_folder # bug: dir had root ownership.
+ echo "Audio files saved into $audio_folder. They can be reused."
fi
# PRODUCE DERIVATIVE FILES ----------------------------------------------------
+# These are recreated for all audio files, regardless of whether this is a new
+# collection of recordings or just adding to an old one. Simpler that way.
-echo
-echo "Producing sentence file derivatives..."
+echo -e "\nProducing sentence file derivatives..."
+sudo python3 $libdir/format_text.py \
+ $dict_file \
+ $model_name \
+ $audio_folder \
+ $reps \
+ $current_number_of_recordings
-# get derivatives of sentence file
-sudo python3 $fdir/format_text.py $vm_training_file $model_name $output_folder $iterations
-
-echo
echo "Producing audio file derivatives..."
-
-# get derivatives of audio files
-sudo bash $fdir/acousticfiles.sh $audio_folder $output_folder/$model_name.fileids
+sudo bash $libdir/acousticfiles.sh \
+ "$audio_folder" \
+ "$model_location" \
+ "$audio_folder/$model_name.fileids"
# CREATE MODELS ---------------------------------------------------------------
-echo
echo "Creating voice model..."
-
-# build voice model
-sudo bash $fdir/voicemodel.sh $model_name $output_folder $audio_folder $output_folder
-
-echo
-echo "Process complete. New acoustic voice model saved into $output_folder"
+sudo bash $libdir/voicemodel.sh $model_name $model_location $audio_folder
+
+# FIX ROOT PERMISSIONS --------------------------------------------------------
+# I really hope this isn't horrible practice, but it's the most straightforward
+# way to make sure the wrong stuff isn't left owned by root
+sudo chown -R $USER: $model_location
+sudo chown -R $USER: $audio_folder
+
+# DONE ------------------------------------------------------------------------
+echo "Process complete."
+echo -e "ACOUSTIC VOICE MODEL: \t $model_location"
+echo -e "VOICE RECORDINGS: \t $audio_folder"