commit 56cd94a15971a6713e2435af294a1acbecd56ab9
parent a1a49162af8fc2d3aa9fee3e586f13a30b87b4fc
Author: Um Hau <um.hau@outlook.com>
Date: Sun, 18 Jun 2017 18:42:39 -0400
Merge pull request #2 from umhau/new-command-structure
New command structure
Diffstat:
26 files changed, 751 insertions(+), 1686 deletions(-)
diff --git a/README.md b/README.md
@@ -10,12 +10,6 @@ Note this tool has only been tested with Linux Mint 17.3 & 18.
**Please see the LICENSE file for terms of use.**
-TODO
-----
-
-Make sure that if I work from prerecorded audio files, I can add to a set of
-files already recorded.
-
Linux/Unix installation
-------------------------------------------------------------------------------
@@ -27,33 +21,50 @@ an AMD64 computer running Mint 18 would look like this:
Commands:
- $ cd ~/Downloads
- $ git clone https://github.com/umhau/vmc.git
- $ cd ./vmc
- $ sudo bash ./installdependencies.sh ~/tools
- $ sudo bash ./installvmc.sh
+ cd ~/Downloads
+ git clone https://github.com/umhau/vmc.git
+ cd ./vmc
+ sudo bash ./installdependencies.sh ~/tools
+ sudo bash ./installvmc.sh
See use examples in the next section.
-Usage instructions
+Usage Examples
-------------------------------------------------------------------------------
-Example usage, recording new audio with 5 repetitions of each sentence:
+Add to a preexisting set of recordings, and adapt an existing acoustic model.
- $ vmc.sh new_model -record ~/Downloads/sentences.txt ~/projects/new_model 5
+ vmc en-us -adapt /extant/model/location -addrecordings /audio/files/location /dictation/file/location.txt 5
-Example usage, importing previously created audio files:
+Create a new model, and create a new set of audio recordings.
- $ vmc.sh ccmodel -import audio_files cc.list ~/tools/ccmodel
+ vmc en-us -create /place/to/put/model -newrecordings /place/to/put/audio/files /dictation/file/location.txt 5
-Note that the model name and the name of the model folder should be the same.
-Also note the repetitions specification is optional; it defaults to 1.
+Import a previously created set of recordings, and adapt a preexisting model.
+ vmc en-us -adapt /extant/model/location -importrecordings /audio/files/location
-The model folder will contain all necessary files to run PocketSphinx with the
-newly created custom voice model.
+File Structure
+-------------------------------------------------------------------------------
-Note that dependencies are not checked when running vmc.sh. To check
-dependencies, see the section above.
+Two folders are involved: the audio recordings folder and the acoustic model
+folder. These can be kept in separate places. The acoustic model folder may
+be part of the python-pocketsphinx installation, in which case it is kept at '/usr/local/lib/python2.7/dist-packages/pocketsphinx/model/en-us'. Some files
+are generated by vmc.
+
+Note the model name is only used with files created from audio recordings. All
+the en-us files have very default names.
+
+Most files have default names, or are named according to the model name. File
+structure is as follows (incomplete, only showing commonly-used files):
+
+ audio-recordings
+ - [model name].fileids
+ - [model name].transcription
+ - mdef
+ - mdef.txt
+
+ acoustic-model
+ - feat.params
Background
-------------------------------------------------------------------------------
diff --git a/functions/acousticfiles.sh b/functions/acousticfiles.sh
@@ -1,37 +0,0 @@
-#!/bin/bash
-#
-# DESCRIPTION
-#
-# Produce acoustic feature files from user-supplied voice recordings. These are stored with
-# the associated audio files, and named similarly with an .mfc extension.
-#
-# USAGE
-#
-# bash acousticfiles.sh /audio/folder/path /path/to/model-name.fileids
-#
-# EXAMPLE
-#
-# bash /opt/vmc/functions/acousticfiles.sh ~/audio ~/audio/newmodel.fileids
-#
-# DEPENDENCIES
-#
-# CMU Sphinx
-#
-
-# VARIABLES DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-folderpath=${1%/}
-
-fid_filepath=$2 # filename format: model-name.fileids
-
-
-# FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-# generate some acoustic feature files
-echo "Generating acoustic feature files..."
-cd $folderpath # sphinx_fe likes to have a consistent working directory
-sphinx_fe -argfile /opt/vmc/tools/en-us/feat.params -samprate 16000 -c $fid_filepath -di . -do . -ei wav -eo mfc -mswav yes &> /dev/null
-
-
-
-
diff --git a/functions/buildLM.sh b/functions/buildLM.sh
@@ -1,43 +0,0 @@
-#!/bin/bash
-#
-# DESCRIPTION
-#
-# Produce binary language model from plain sentence list. Invokes CMU-created perl script
-# located in /opt/vmc/tools. Saves file in given directory.
-#
-# USAGE
-#
-# bash buildLM.sh sentence-list model-name save-directory
-#
-# DEPENDENCIES
-#
-# CMU Sphinx
-#
-# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-sentence_list_path=$1
-
-model_name=$2
-
-save_directory=$3
-
-tools_dir=/opt/vmc/tools
-
-# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-# run perl script to create language model
-perl $tools_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
-
-sentence_list=`basename $sentence_list_path`
-
-sentence_list_dir=`dirname $sentence_list_path`
-
-# rename output
-src=$sentence_list_path.arpabo
-dst=$save_directory/$model_name.lm
-mv $src $dst
-
-# convert lm to binary (bin) format (command was too complex for python to handle)
-filename=$save_directory/$model_name.lm
-sphinx_lm_convert -i $filename -o $filename.bin &> /dev/null
-
diff --git a/functions/format_text.py b/functions/format_text.py
@@ -1,187 +0,0 @@
-#!/usr/bin/python3
-#
-# DESCRIPTION
-#
-# Creates a number of text files dependent on a sentence file which are required for building
-# a CMU Sphinx voice model. Also uses the extended PocketSphinx pronunciation dictionary.
-#
-# Note that the target directory is the directory where the files should be saved into. This
-# should be similar to the directory the initial command was given from within.
-#
-# USAGE
-#
-# python3 format_text.py /path/to/sentence-file.txt model-name target-directory iterations
-#
-# EXAMPLE
-#
-# python3 /opt/vmc/functions/format_text.py ~/sentence-file.txt model-name target-directory 2
-#
-# DEPENDENCIES
-#
-# python3
-#
-# IMPORTS =========================================================================================
-
-import pathlib, re, sys, os, string
-
-# VARIABLE DEFINITIONS ============================================================================
-
-sentence_file = sys.argv[1] # os.path.basename() to get just the filename
-
-model_name = sys.argv[2]
-
-target_directory = sys.argv[3].rstrip(os.sep)
-
-iterations = int(sys.argv[4])
-
-pronunciation_dictionary = '/opt/vmc/tools/cmudict-en-us.dict'
-
-# FUNCTION DEFINITION =============================================================================
-
-# Print iterations progress
-def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100):
- """
- Call in a loop to create terminal progress bar
- @params:
- iteration - Required : current iteration (Int)
- total - Required : total iterations (Int)
- prefix - Optional : prefix string (Str)
- suffix - Optional : suffix string (Str)
- decimals - Optional : positive number of decimals in percent complete (Int)
- barLength - Optional : character length of bar (Int)
- """
- formatStr = "{0:." + str(decimals) + "f}"
- percents = formatStr.format(100 * (iteration / float(total)))
- filledLength = int(round(barLength * iteration / float(total)))
- bar = '█' * filledLength + '-' * (barLength - filledLength)
- sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
- if iteration == total:
- sys.stdout.write('\n')
- sys.stdout.flush()
-
-# LOGIC ===========================================================================================
-
-
-# FILEID AND TRANSCRIPTION FILES ------------------------------------------------------------------
-
-sentences_text = ""
-
-j=0
-lines = []
-
-with open(sentence_file) as f:
-
- for line in f:
- lines.append(line)
-
-# this was separated out so I can easily multiply the lines by the iterations...doesn't work to do
-# f*iterations (in above loop).
-for line in lines*iterations:
-
- sentences_text = sentences_text+' '+line
-
- j+=1
- afno = str('%04d'%j)
-
- # create transcription file
- exclude = set(string.punctuation)
- sentence = ''.join(ch for ch in line if ch not in exclude)
-
- nice_text = sentence.lower().rstrip()
- formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n"
- formatted_filename = target_directory + "/" +model_name + '.transcription'
- hs = open(formatted_filename,"a")
- hs.write(formatted_text)
- hs.close()
-
- #create fileid file
- formatted_text = model_name + "_" + afno + "\n"
- formatted_filename = target_directory + "/" +model_name + '.fileids'
- hs = open(formatted_filename,"a")
- hs.write(formatted_text)
- hs.close()
-
- sentences_text = sentences_text+' '+line # why twice? I have no memory of why I did this.
-
-# CREATE PRONUNCIATION DICTIONARY -----------------------------------------------------------------
-
-# create unique, sorted word list from sentence list
-words = []
-print("Creating unique, sorted word list...")
-[words.append(word.strip(string.punctuation).upper()) for word in sentences_text.split()]
-# set() uniques the list, sorted() puts them a-z.
-uwords = sorted(list(set(words)))
-
-# save word list to file
-print("Saving word list to file...")
-uwordsfilename = str(target_directory+'/'+model_name+'.vocab') # correct extension
-uwordsfile = open(uwordsfilename, 'w')
-for word in uwords:
- uwordsfile.write("%s\n" % word)
-
-# create pronunciation dictionary from word list
-cmudict = []
-print("Opening pronunciation dictionary...")
-with open(pronunciation_dictionary) as f:
- for line in f:
- cmudict.append(line)
-
-pdict = []
-missing_words = []
-l = len(uwords)
-i = 0
-print("Extracting entries corresponding to word list...")
-printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
-
-curr_line = 0
-
-for word in uwords:
-
- wordmatch=False # a counter to help with efficiency
- for line in cmudict[curr_line:]:
-
- regex_string = str('^(?P<text>'+str(word.lower()) + '(\(\d\))?)( |\t)(?P<phones>.+)$')
-
- if re.match(regex_string, line):
- # print("match!")
- ms = re.search(regex_string, line)
- pdict.append(str(ms.group('text')+' '+ms.group('phones')))
- wordmatch=True
-
- # if I already made a match and I'm not now, time to break. this allows for finding
- # alternate pronunciations
- elif wordmatch:
- # curr_line +=1
- break
-
- # curr_line +=1
-
- # check for words the pronunciation dictionary doesn't have & save
- if not wordmatch:
- missing_words.append(word)
-
- i +=1
-
- printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
-
-# save pronunciation dictionary to file
-print("Saving pronunciation dictionary to file...")
-pdictfilename = str(target_directory+'/'+model_name+'.dic')
-pdictfile = open(pdictfilename, 'w')
-for word_entry in pdict:
- pdictfile.write("%s\n" % word_entry)
-
-# RECORD LIST OF MISSING WORDS --------------------------------------------------------------------
-
-if missing_words:
- missing_words_filename = str(target_directory+'/'+model_name+'.missing')
- print("\nWord(s) missing from pronunciation dictionary. See ")
- print(missing_words_filename+" for list.")
- mwordsfile = open(missing_words_filename, 'w')
- for word in missing_words:
- mwordsfile.write("%s\n" % word)
-
-# DONE --------------------------------------------------------------------------------------------
-
-print("Data files created.")
-
diff --git a/functions/getaudio.py b/functions/getaudio.py
@@ -1,129 +0,0 @@
-#!/usr/bin/python3
-#
-# DESCRIPTION
-#
-# getaudio is used to sequentially prompt the user for dictations of displayed sentences.
-#
-# DEPENDENCIES
-#
-# python3-pyaudio, python3
-#
-# USAGE
-#
-# python3 getaudio.py sentence-file /output/folder recording-repetitions model-name
-#
-# LIBRARY IMPORTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-import sys, os, _thread, pyaudio, wave, contextlib
-
-# VARIABLE DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-pronunciation_dictionary="cmudict-en-us.dict"
-
-chunk = 1024
-FORMAT = pyaudio.paInt16
-CHANNELS = 1
-RATE = 16000
-
-sentence_file = sys.argv[1]
-
-output_folder = sys.argv[2].rstrip(os.sep)
-
-reps = int(sys.argv[3])
-
-model_name = sys.argv[4]
-
-
-# FUNCTION DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-# ignore sdterr messages: as from pyaudio
-@contextlib.contextmanager
-def ignore_stderr():
- devnull = os.open(os.devnull, os.O_WRONLY)
- old_stderr = os.dup(2)
- sys.stderr.flush()
- os.dup2(devnull, 2)
- os.close(devnull)
- try:
- yield
- finally:
- os.dup2(old_stderr, 2)
- os.close(old_stderr)
-
-def record_until_keypress(audio_filepath):
-
- # detect keypress [enter]
- def input_thread(L):
- input()
- L.append(None)
-
- # initialize audio stream - and keep it quiet
- with ignore_stderr():
- p = pyaudio.PyAudio()
- stream = p.open(format = FORMAT,
- channels = CHANNELS,
- rate = RATE,
- input = True,
- frames_per_buffer = chunk)
-
- # create interrupt thread
- L = []
- _thread.start_new_thread(input_thread, (L,))
-
- # record data during loop
- frames = []
- while True:
- data = stream.read(chunk)
- frames.append(data)
- if L:
- stream.stop_stream()
- break
-
- # exit cleanly after break
- stream.close()
- p.terminate()
-
- # write data to WAVE file
- data = b''.join(frames)
- wf = wave.open(audio_filepath, 'wb')
- wf.setnchannels(CHANNELS)
- wf.setsampwidth(p.get_sample_size(FORMAT))
- wf.setframerate(RATE)
- wf.writeframes(data)
- wf.close()
-
-
-# LOGIC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-if not os.path.exists(output_folder):
- os.makedirs(output_folder)
-
-# create list of sentences for prompt
-sentence_list = []
-with open(sentence_file) as f:
- for line in f:
- sentence_list.append(line)
-
-num_recs = len(sentence_list)*reps
-
-# collect audio files
-try:
-
- input("Press [enter], read text, & press [enter].")
-
- j=0
-
- for sentence in sentence_list*reps:
- #recording number
- j+=1
-
- # record audio with visual
- print("Recording no. %04d of %04d: \n\n\t%s" % (j, num_recs, sentence), end='\r')
-
- # recording file should look like this (e.g.): ./bespoke_training_data/audio/arctic_0001.wav
- record_until_keypress(str(output_folder + os.sep + model_name + "_%04d.wav" % j))
-
-except KeyboardInterrupt:
- pass
-
-
diff --git a/functions/voicemodel.sh b/functions/voicemodel.sh
@@ -1,98 +0,0 @@
-#!/bin/bash
-#
-# DESCRIPTION
-#
-# Given acoustic feature files and sentence file derivatives, produce voice model.
-#
-# USAGE
-#
-# bash voicemodel.sh model-name model-dir acoustic-files-dir sentence-file-derivatives-dir
-#
-# EXAMPLE
-#
-# bash voicemodel.sh new_model ~/tools/new_model ~/tools/new_model/audio ~/tools/new_model
-#
-# DEPENDENCIES
-#
-# CMU Sphinx
-#
-# NOTES
-#
-# This script is primarily using a copy of en-us that is being actively edited as it is
-# adapted to become a custom voice model.
-#
-# Binaries are located in /opt/vmc/tools.
-#
-# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-model_name=$1
-model_dir=$2 # location of adapted voice model files: copy of en-us, audio files, etc.
-af_dir=$3 # directory containing audio files and audio feature files
-sf_dir=$4 # directory containing sentence file derivatives
-
-tools_dir=/opt/vmc/tools
-
-pronunciation_dictionary=$tools_dir/cmudict-en-us.dict
-
-# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-# convert binary mdef file to .txt
-cd $model_dir
-pocketsphinx_mdef_convert -text $model_dir/en-us/mdef $model_dir/en-us/mdef.txt &> /dev/null
-
-# run tools to create voice model
-cd $af_dir
-
-# sphinx_fe
-sphinx_fe \
- -argfile $model_dir/en-us/feat.params \
- -samprate 16000 \
- -c $sf_dir/$model_name.fileids \
- -di . \
- -do . \
- -ei wav \
- -eo mfc \
- -mswav yes \
- &> /dev/null
-
-$tools_dir/bw \
- -hmmdir $model_dir/en-us \
- -moddeffn $model_dir/en-us/mdef.txt \
- -ts2cbfn .ptm. \
- -feat 1s_c_d_dd \
- -svspec 0-12/13-25/26-38 \
- -cmn current \
- -agc none \
- -dictfn $pronunciation_dictionary \
- -ctlfn $sf_dir/$model_name.fileids \
- -lsnfn $sf_dir/$model_name.transcription \
- -accumdir . \
- &> /dev/null
-
-$tools_dir/mllr_solve \
- -meanfn $model_dir/en-us/means \
- -varfn $model_dir/en-us/variances \
- -outmllrfn mllr_matrix \
- -accumdir . \
- &> /dev/null
-
-$tools_dir/map_adapt \
- -moddeffn $model_dir/en-us/mdef.txt \
- -ts2cbfn .ptm. \
- -meanfn $model_dir/en-us/means \
- -varfn $model_dir/en-us/variances \
- -mixwfn $model_dir/en-us/mixture_weights \
- -tmatfn $model_dir/en-us/transition_matrices \
- -accumdir . \
- -mapmeanfn $model_dir/en-us/means \
- -mapvarfn $model_dir/en-us/variances \
- -mapmixwfn $model_dir/en-us/mixture_weights \
- -maptmatfn $model_dir/en-us/transition_matrices\
- &> /dev/null
-
-$tools_dir/mk_s2sendump \
- -pocketsphinx yes \
- -moddeffn $model_dir/en-us/mdef.txt \
- -mixwfn $model_dir/en-us/mixture_weights \
- -sendumpfn $model_dir/en-us/sendump \
- &> /dev/null
-\ No newline at end of file
diff --git a/installdependencies.sh b/installdependencies.sh
@@ -111,6 +111,7 @@ if [ ! -d $installation_directory/sphinxbase/ ]; then
make -j $CORES
make -j $CORES check
sudo make -j $CORES install
+ sudo chown -R $USER: $installation_directory # bug: dir had root ownership.
else
echo "Done."
echo "SphinxBase already installed."
@@ -129,6 +130,7 @@ if [ ! -d $installation_directory/sphinxtrain/ ]; then
./configure
make -j $CORES
sudo make -j $CORES install
+ sudo chown -R $USER: $installation_directory # bug: dir had root ownership.
echo "Done."
else
echo "Done."
@@ -150,6 +152,7 @@ if [ ! -d $installation_directory/pocketsphinx/ ]; then
make -j $CORES clean all
make -j $CORES check
sudo make -j $CORES install
+ sudo chown -R $USER: $installation_directory # bug: dir had root ownership.
echo "done."
else
echo "Done."
diff --git a/installvmc.sh b/installvmc.sh
@@ -1,77 +1,61 @@
#!/bin/bash
#
-# USAGE
+# USAGE =======================================================================
#
# bash installvmc.sh
#
-# NOTES
+# NOTES =======================================================================
#
-# Copies the vmc packages into /opt/vmc, and puts the vmc script into /usr/local/bin. Once
-# there, vmc can be called (with its requisite options) from anywhere with just vmc.sh.
+# Copies the vmc packages into /opt/vmc, and puts the vmc script into
+# /usr/local/bin. Once there, vmc can be called (with its requisite
+# options) from anywhere with just vmc.sh.
#
+# SET VARIABLES ===============================================================
+# Absolute path to this script & containing folder. stackoverflow.com/q/242538
+script=$(readlink -f "$0"); scriptpath=$(dirname "$script")
-# SET VARIABLES ===================================================================================
+libdir=/opt/vmc/lib
-
-script=$(readlink -f "$0") # Absolute path to this script, e.g. /home/user/bin/foo.sh
-scriptpath=$(dirname "$script") # Absolute path this script is in, thus /home/user/bin
- # http://stackoverflow.com/questions/242538/unix-shell-script-find-out-which-directory-the-script-file-resides
-
-tdir=/opt/vmc/tools
-
-fdir=/opt/vmc/functions
-
-# CHECK FOR PREVIOUS INSTALLATION =================================================================
+# CHECK FOR PREVIOUS INSTALLATION =============================================
if [ -d /opt/vmc/ ]; then
- echo -n "Removing vmc..."
-
- bash $scriptpath/uninstallvmc.sh 1>/dev/null
+ bash $scriptpath/uninstallvmc.sh 1>/dev/null; echo -n "Removed vmc";
- echo "done."
-
- # echo "A version of vmc is already installed. To uninstall, run uninstallvmc.sh"
- # exit 1
+ # echo "vmc is already installed. To remove, run uninstallvmc.sh"; exit 1
fi
-# MOVE VMC FILES ==================================================================================
+# MOVE VMC FILES ==============================================================
# get sudo
-sudo ls 1>/dev/null
-
-echo -n "Installing vmc..."
+sudo ls 1>/dev/null; echo -en "\nInstalling vmc..."
# create vmc directories
-sudo mkdir -p $tdir
-sudo mkdir -p $fdir
-
-# move tools
-sudo cp -r $scriptpath/tools/* $tdir/
+sudo mkdir -p $libdir; sudo mkdir -p $libdir
-sudo tar -xf $scriptpath/cmusphinx-en-us-ptm-5.2.tar.gz -C $tdir
-sudo mv $tdir/cmusphinx-en-us-ptm-5.2 $tdir/en-us
+# move library
+sudo cp -r $scriptpath/lib/* $libdir/
-# move functions
-sudo cp -r $scriptpath/functions/* $fdir/
+sudo tar -xf $scriptpath/lib/cmusphinx-en-us-ptm-5.2.tar.gz -C $libdir
+sudo mv $libdir/cmusphinx-en-us-ptm-5.2 $libdir/en-us
# move vmc into user's path & set as executable
-sudo cp $scriptpath/vmc.sh /usr/local/bin/vmc.sh
-sudo chmod +x /usr/local/bin/vmc.sh
+sudo cp $scriptpath/vmc /usr/local/bin/vmc
+sudo chmod +x /usr/local/bin/vmc
# move lmt into user's path & set as executable
-sudo cp $scriptpath/lmt.sh /usr/local/bin/lmt.sh
-sudo chmod +x /usr/local/bin/lmt.sh
+sudo cp $scriptpath/lmt /usr/local/bin/lmt
+sudo chmod +x /usr/local/bin/lmt
# GET SPHINXTRAIN BINARIES ========================================================================
# copy binary tools into model folder
-sudo cp /usr/local/libexec/sphinxtrain/bw $tdir
-sudo cp /usr/local/libexec/sphinxtrain/map_adapt $tdir
-sudo cp /usr/local/libexec/sphinxtrain/mk_s2sendump $tdir
-sudo cp /usr/local/libexec/sphinxtrain/mllr_solve $tdir
+sudo cp /usr/local/libexec/sphinxtrain/bw $libdir
+sudo cp /usr/local/libexec/sphinxtrain/map_adapt $libdir
+sudo cp /usr/local/libexec/sphinxtrain/mk_s2sendump $libdir
+sudo cp /usr/local/libexec/sphinxtrain/mllr_solve $libdir
echo "done."
diff --git a/lib/acousticfiles.sh b/lib/acousticfiles.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# DESCRIPTION -----------------------------------------------------------------
+#
+# Produce acoustic feature files from user-supplied voice recordings.
+# These are stored with the associated audio files, and named similarly
+# with an .mfc extension.
+#
+# Do not include trailing forward slashes in the folder paths.
+#
+# USAGE | EXAMPLE -------------------------------------------------------------
+# bash acousticfiles.sh /path/to/audio/folder \
+# /path/to/acoustic/model \
+# /path/to/model.fileids
+#
+# VARIABLE DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+audio_folder_path="$1" # e.g. ~/.psyche/audio
+
+# i.e. /usr/local/lib/python2.7/dist-packages/pocketsphinx/model/en-us
+acoustic_model_location="$2"
+
+fileids_location="$3" # e.g. ~/.psyche/audio/model.fileids
+
+# FUNCTION ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# generate some acoustic feature files - if supplementing an existing audio
+# collection, this will overwrite the preexisting feature files. There isn't a
+# way around that without either ugly hacks or a rewrite of the sphinx_fe file.
+
+echo "Generating acoustic feature files..."
+cd $audio_folder_path # sphinx_fe likes to have a consistent working directory
+sudo sphinx_fe -argfile \
+ "$acoustic_model_location/feat.params" \
+ -samprate 16000 \
+ -c $fileids_location \
+ -di . \
+ -do . \
+ -ei wav \
+ -eo mfc \
+ -mswav yes \
+ &> /dev/null
diff --git a/lib/buildLM.sh b/lib/buildLM.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Produce binary language model from plain sentence list. Invokes CMU-created perl script
+# located in /opt/vmc/lib. Saves file in given directory.
+#
+# USAGE
+#
+# bash buildLM.sh sentence-list model-name save-directory
+#
+# DEPENDENCIES
+#
+# CMU Sphinx
+#
+# VARIABLES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sentence_list_path=$1
+
+model_name=$2
+
+save_directory=$3
+
+lib_dir=/opt/vmc/lib
+
+# COMMANDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# run perl script to create language model
+perl $lib_dir/quick_lm.pl -s $sentence_list_path #&> /dev/null
+
+sentence_list=`basename $sentence_list_path`
+
+sentence_list_dir=`dirname $sentence_list_path`
+
+# rename output
+src=$sentence_list_path.arpabo
+dst=$save_directory/$model_name.lm
+mv $src $dst
+
+# convert lm to binary (bin) format (command was too complex for python to handle)
+filename=$save_directory/$model_name.lm
+sphinx_lm_convert -i $filename -o $filename.bin &> /dev/null
+
diff --git a/tools/cmudict-en-us.dict b/lib/cmudict-en-us.dict
diff --git a/cmusphinx-en-us-ptm-5.2.tar.gz b/lib/cmusphinx-en-us-ptm-5.2.tar.gz
Binary files differ.
diff --git a/lib/format_text.py b/lib/format_text.py
@@ -0,0 +1,212 @@
+#!/usr/bin/python3
+#
+# DESCRIPTION -----------------------------------------------------------------
+#
+# Creates a number of text files dependent on a sentence file which are
+# required for building a CMU Sphinx voice model. Also uses the extended
+# PocketSphinx pronunciation dictionary.
+#
+# Note that the target directory is the directory where the files should
+# be saved into. This should be similar to the directory the initial
+# command was given from within.
+#
+# The last two options in the usage example are optional. The 'number of
+# preexisting audio recordings' variable is used for adding to an
+# existing collection of audio recordings - it controls how the new
+# .wav files are named (starting from zero, or something higher). The
+# last one, 'the fancy sentence list', is only looked for if the other is
+# present. It is the absolute path of the sentence list that vmc edited
+# the last time it was creating audio files in the given folder. Instead
+# of starting from scratch, vmc can simply append the new items into that
+# list.
+#
+# The integration of the new functions should be seamless, though I
+# anticipate that it will be a royal pain to get it working.
+#
+# USAGE | EXAMPLE--------------------------------------------------------------
+#
+# python3 format_text.py /path/to/sentence-file.txt \
+# model-name \
+# audio_folder \
+# iterations \
+# num_of_preexisting_audio_recordings
+#
+# FILES CREATED ---------------------------------------------------------------
+# All are marked for new information to be appended. Therefore, I don't
+# have to provide the old names of any files - just make sure the model
+# names are the same. Since none of these come with a model name except
+# the .dict file (.dic??), I should be fine with the current
+# configuration.
+#
+# audio_folder + "/" +model_name + '.transcription'
+# audio_folder + "/" +model_name + '.fileids'
+#
+# audio_folder+'/'+model_name+'.vocab'
+# audio_folder+'/'+model_name+'.dic'
+#
+# IMPORTS =====================================================================
+
+import pathlib, re, sys, os, string
+
+# VARIABLE DEFINITIONS ========================================================
+
+create_pronunciation_dictionary = False # This may not be desirable
+
+sentence_file = sys.argv[1] # os.path.basename() to get just the filename
+model_name = sys.argv[2]
+audio_folder = sys.argv[3].rstrip(os.sep)
+iterations = int(sys.argv[4])
+pronunciation_dictionary = '/opt/vmc/lib/cmudict-en-us.dict'
+recording_count = int(sys.argv[5]) # how many audio files already exist
+
+if recording_count == 0:
+ append_to_existing=False
+else:
+ append_to_existing=True
+
+# LOGIC =======================================================================
+
+# Print iterations progress ---------------------------------------------------
+def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100):
+
+ formatStr = "{0:." + str(decimals) + "f}"
+ percents = formatStr.format(100 * (iteration / float(total)))
+ filledLength = int(round(barLength * iteration / float(total)))
+ bar = '█' * filledLength + '-' * (barLength - filledLength)
+ sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
+ if iteration == total:
+ sys.stdout.write('\n')
+ sys.stdout.flush()
+
+# create files per-audio recording --------------------------------------------
+
+sentences_text = ""; lines = [];
+
+with open(sentence_file) as f:
+ for line in f: lines.append(line)
+
+for line in lines*iterations:
+
+ def append_to_file(formatted_filename, formatted_text):
+ hs = open(formatted_filename,"a")
+ hs.write(formatted_text)
+ hs.close()
+
+ sentences_text = sentences_text+' '+line
+ recording_count+=1
+ formatted_audio_file_number = str('%04d'%recording_count)
+
+ # create transcription file -----------------------------------------------
+
+ # clean up the text
+ exclude = set(string.punctuation)
+ sentence = ''.join(ch for ch in line if ch not in exclude)
+ nice_text = sentence.lower().rstrip()
+
+ # format text string and file name with file ids
+ formatted_text = "</s> "+nice_text+" </s> ("+model_name+"_"+formatted_audio_file_number+")\n"
+ formatted_filename = audio_folder + "/" +model_name + '.transcription'
+
+ # save into transcription file
+ append_to_file(formatted_filename, formatted_text)
+
+ #create fileid file -------------------------------------------------------
+
+ # format file id entry and filename
+ formatted_text = model_name + "_" + formatted_audio_file_number + "\n"
+ formatted_filename = audio_folder + "/" +model_name + '.fileids'
+
+ # save into fileids file
+ append_to_file(formatted_filename, formatted_text)
+
+ # ????
+ sentences_text = sentences_text+' '+line # why twice? I have no memory of
+ # why I did this. I don't think
+ # it does anything, either. TODO:
+ # remove and see what happens.
+
+# CREATE PRONUNCIATION DICTIONARY ---------------------------------------------
+
+# this is the same for the new file and the old file
+uwordsfilename = str(audio_folder+'/'+model_name+'.vocab') # correct extension
+
+# create unique, sorted word list from sentence list
+words = []; print("Creating unique, sorted word list...")
+
+# get words from new sentence file
+[words.append(word.strip(string.punctuation).upper().rstrip()) for word in sentences_text.split()]
+
+# add the words from the old word list (if this is appending to an old model)
+if append_to_existing:
+ with open(uwordsfilename) as f:
+ for word in f: words.append(word.strip(string.punctuation).upper().rstrip())
+
+# set() uniques the list, sorted() puts them a-z.
+uwords = list(filter(None, sorted(list(set(words)))))
+
+# save word list to file
+print("Saving word list to file..."); uwordsfile = open(uwordsfilename, 'w')
+for word in uwords:
+ uwordsfile.write("%s\n" % word)
+
+# create pronunciation dictionary from word list
+if create_pronunciation_dictionary:
+ cmudict = []; print("Opening pronunciation dictionary...")
+ with open(pronunciation_dictionary) as f:
+ for line in f:
+ cmudict.append(line)
+
+ print("Extracting entries corresponding to word list...")
+ pdict = []; missing_words = []; l = len(uwords); i = 0; curr_line = 0
+ printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
+
+ for word in uwords:
+
+ wordmatch=False # a counter to help with efficiency
+ for line in cmudict[curr_line:]:
+
+ regex_string = str('^(?P<text>'+str(word.lower()) + '(\(\d\))?)( |\t)(?P<phones>.+)$')
+
+ if re.match(regex_string, line):
+ # print("match!")
+ ms = re.search(regex_string, line)
+ pdict.append(str(ms.group('text')+' '+ms.group('phones')))
+ wordmatch=True
+
+ # if I already made a match and I'm not now, time to break. this allows
+ # for finding alternate pronunciations
+ elif wordmatch:
+ # curr_line +=1
+ break
+
+ # curr_line +=1
+
+ # check for words the pronunciation dictionary doesn't have & save
+ if not wordmatch:
+ missing_words.append(word)
+
+ i +=1
+
+ printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
+
+ # save pronunciation dictionary to file
+ print("Saving pronunciation dictionary to file...")
+ pdictfilename = str(audio_folder+'/'+model_name+'.dic')
+ pdictfile = open(pdictfilename, 'w')
+ for word_entry in pdict:
+ pdictfile.write("%s\n" % word_entry)
+
+# RECORD LIST OF MISSING WORDS ------------------------------------------------
+
+ if missing_words:
+ missing_words_filename = str(audio_folder+'/'+model_name+'.missing')
+ print("\nWord(s) missing from pronunciation dictionary. See ")
+ print(missing_words_filename+" for list.")
+ mwordsfile = open(missing_words_filename, 'w')
+ for word in missing_words:
+ mwordsfile.write("%s\n" % word)
+
+# DONE ------------------------------------------------------------------------
+
+print("Data files created.")
+
diff --git a/lib/getaudio.py b/lib/getaudio.py
@@ -0,0 +1,129 @@
+#!/usr/bin/python3
+#
+# DESCRIPTION
+#
+# getaudio is used to sequentially prompt the user for dictations of
+# displayed sentences.
+#
+# DEPENDENCIES: python3-pyaudio, python3
+#
+# USAGE
+#
+# python3 getaudio.py /path/to/simple-list-of-sentences.txt \
+# /audio/recording/folder \
+# recording-repetitions \
+# model-name \
+# num_of_preexisting_audio_recordings
+#
+# LIBRARY IMPORTS -------------------------------------------------------------
+
+import sys, os, _thread, pyaudio, wave, contextlib
+
+# VARIABLE DEFINITIONS --------------------------------------------------------
+
+chunk = 1024
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 16000
+
+sentence_file = sys.argv[1] # e.g. ~/sentencelist.txt
+audio_recording_folder = sys.argv[2].rstrip(os.sep) # e.g. ~/.psyche/audio
+reps = int(sys.argv[3]) # e.g. 5
+model_name = sys.argv[4] # e.g. 'en-us'
+
+try:
+ recording_count = int(sys.argv[5]) # how many audio files already exist
+except IndexError:
+ recording_count = 0
+
+# FUNCTION DEFINITIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# ignore sdterr messages: as from pyaudio
+@contextlib.contextmanager
+def ignore_stderr():
+ devnull = os.open(os.devnull, os.O_WRONLY)
+ old_stderr = os.dup(2)
+ sys.stderr.flush()
+ os.dup2(devnull, 2)
+ os.close(devnull)
+ try:
+ yield
+ finally:
+ os.dup2(old_stderr, 2)
+ os.close(old_stderr)
+
+def record_until_keypress(audio_filepath):
+
+ # detect keypress [enter]
+ def input_thread(L):
+ input()
+ L.append(None)
+
+ # initialize audio stream - and keep it quiet
+ with ignore_stderr():
+ p = pyaudio.PyAudio()
+ stream = p.open(format = FORMAT,
+ channels = CHANNELS,
+ rate = RATE,
+ input = True,
+ frames_per_buffer = chunk)
+
+ # create interrupt thread
+ L = []
+ _thread.start_new_thread(input_thread, (L,))
+
+ # record data during loop
+ frames = []
+ while True:
+ data = stream.read(chunk)
+ frames.append(data)
+ if L:
+ stream.stop_stream()
+ break
+
+ # exit cleanly after break
+ stream.close()
+ p.terminate()
+
+ # write data to WAVE file
+ data = b''.join(frames)
+ wf = wave.open(audio_filepath, 'wb')
+ wf.setnchannels(CHANNELS)
+ wf.setsampwidth(p.get_sample_size(FORMAT))
+ wf.setframerate(RATE)
+ wf.writeframes(data)
+ wf.close()
+
+
+# LOGIC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if not os.path.exists(audio_recording_folder):
+ raise NotADirectoryError("Audio recording save folder does not exist.")
+
+# create list of sentences for prompt
+sentence_list = []
+with open(sentence_file) as f:
+ for line in f:
+ sentence_list.append(line)
+
+num_recs = len(sentence_list)*reps+recording_count
+
+# collect audio files
+try:
+
+ input("Press [enter], read text, & press [enter].")
+
+ for sentence in sentence_list*reps:
+ #recording number
+ recording_count+=1
+
+ # record audio with visual
+ print("Recording no. %04d of %04d: \n\n\t%s" % (recording_count, num_recs, sentence), end='\r')
+
+ # recording file should look like this (e.g.): ./bespoke_training_data/audio/arctic_0001.wav
+ record_until_keypress(str(audio_recording_folder + os.sep + model_name + "_%04d.wav" % recording_count))
+
+except KeyboardInterrupt:
+ pass
+
+
diff --git a/tools/quick_lm.pl b/lib/quick_lm.pl
diff --git a/lib/voicemodel.sh b/lib/voicemodel.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+#
+# DESCRIPTION =================================================================
+#
+# Given acoustic feature files and sentence file derivatives, produce
+# voice model.
+#
+# This script is primarily using a copy of en-us that is being actively
+# edited as it is adapted to become a custom voice model.
+#
+# Binaries are located in /opt/vmc/lib.
+#
+# USAGE EXAMPLE ===============================================================
+#
+# bash voicemodel.sh \
+# 'en-us' \
+# /usr/local/lib/python2.7/dist-packages/pocketsphinx/model/en-us \
+# ~/.psyche/audio
+#
+# VARIABLES ===================================================================
+
+model_name=$1 # If performing adaptation, this must match prior names.
+acoustic_model_dir=$2 # acoustic model folder, often the default 'en-us'
+audio_file_dir=$3 # contains audio file and sentence file derivatives
+
+libdir=/opt/vmc/lib
+
+# also located at /opt/vmc/lib/cmudict-en-us.dict
+dict="/usr/local/lib/python2.7/dist-packages/pocketsphinx/model/cmudict-en-us.dict"
+
+# COMMANDS ====================================================================
+
+# convert binary mdef file to .txt --------------------------------------------
+cd $acoustic_model_dir
+sudo pocketsphinx_mdef_convert \
+ -text $acoustic_model_dir/mdef $acoustic_model_dir/mdef.txt &> /dev/null
+
+# run tools to create voice model ---------------------------------------------
+cd $audio_file_dir
+
+sudo sphinx_fe \
+ -argfile $acoustic_model_dir/feat.params \
+ -samprate 16000 \
+ c $audio_file_dir/$model_name.fileids \
+ -di . -do . -ei wav -eo mfc -mswav yes \
+ &> /dev/null
+
+sudo $libdir/bw \
+ -hmmdir $acoustic_model_dir \
+ -moddeffn $acoustic_model_dir/mdef.txt \
+ -ts2cbfn .ptm. -feat 1s_c_d_dd -svspec 0-12/13-25/26-38 \
+ -cmn current -agc none -dictfn $dict \
+ -ctlfn $audio_file_dir/$model_name.fileids \
+ -lsnfn $audio_file_dir/$model_name.transcription \
+ -accumdir . \
+ &> /dev/null
+
+sudo $libdir/mllr_solve \
+ -meanfn $acoustic_model_dir/means \
+ -varfn $acoustic_model_dir/variances \
+ -outmllrfn mllr_matrix -accumdir . &> /dev/null
+
+sudo $libdir/map_adapt \
+ -moddeffn $acoustic_model_dir/mdef.txt \
+ -ts2cbfn .ptm. \
+ -meanfn $acoustic_model_dir/means \
+ -varfn $acoustic_model_dir/variances \
+ -mixwfn $acoustic_model_dir/mixture_weights \
+ -tmatfn $acoustic_model_dir/transition_matrices \
+ -accumdir . \
+ -mapmeanfn $acoustic_model_dir/means \
+ -mapvarfn $acoustic_model_dir/variances \
+ -mapmixwfn $acoustic_model_dir/mixture_weights \
+ -maptmatfn $acoustic_model_dir/transition_matrices\
+ &> /dev/null
+
+sudo $libdir/mk_s2sendump \
+ -pocketsphinx yes \
+ -moddeffn $acoustic_model_dir/mdef.txt \
+ -mixwfn $acoustic_model_dir/mixture_weights \
+ -sendumpfn $acoustic_model_dir/sendump \
+ &> /dev/null
diff --git a/lmt b/lmt
@@ -0,0 +1,36 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Given a list of sentences, create a statistical language model.
+#
+# USAGE: lmt lm-training-file lm-file-name output-location
+#
+# VARIABLES ===================================================================
+
+sentence_list_file=$1; output_lm_file_name=$2; save_directory=$3;
+
+fdir=/opt/vmc/lib
+
+# CHECK IF HELP NEEDED ========================================================
+
+if [[ -z $1 ]]; then
+
+ echo
+ echo -e "USAGE: \tlmt "
+ echo
+ echo -e "\tsentence_list_file\t(input file with sample sentences)"
+ echo -e "\toutput_lm_file_name\t(desired base name of output lm file)"
+ echo -e "\tsave_directory\t\t(location to save the ouput file into)"
+ echo
+
+ exit 1
+
+fi
+
+# COMMANDS ====================================================================
+
+# build language model
+bash $fdir/buildLM.sh $sentence_list_file $output_lm_file_name $save_directory
+
+
diff --git a/lmt.sh b/lmt.sh
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-# DESCRIPTION
-#
-# Given a list of sentences, create a statistical language model.
-#
-# USAGE
-#
-# lmt.sh lm-training-file lm-file-name output-location
-#
-# DEPENDENCIES
-#
-# CMU Sphinx, Perl, and other misc. packages.
-#
-
-# VARIABLES =======================================================================================
-
-if [[ -z $1 ]]; then
-
- echo
- echo -e "USAGE: \tlmt.sh "
- echo
- echo -e "\tsentence_list_file\t(input file with sample sentences)"
- echo -e "\toutput_lm_file_name\t(desired base name of output lm file)"
- echo -e "\tsave_directory\t\t(location to save the ouput file into)"
- echo
-
- exit 1
-
-fi
-
-sentence_list_file=$1
-
-output_lm_file_name=$2
-
-save_directory=$3
-
-fdir=/opt/vmc/functions
-
-# COMMANDS ========================================================================================
-
-# build language model
-bash $fdir/buildLM.sh $sentence_list_file $output_lm_file_name $save_directory
-
-
diff --git a/old/README b/old/README
@@ -1,129 +0,0 @@
-Voice model creator for CMU Sphinx
-===============================================================================
-
-This tool contains basic tools for creating a custom domain voice model for use
-with the PocketSphinx decoder. It is also possible to use the voice models
-created by this tool as the basis for a test-to-speech engine.
-
-Note this tool has only been tested with Linux Mint 17.3 & 18.
-
-**Please see the LICENSE file for terms of use.**
-
-Linux Mint 18 Installation
--------------------------------------------------------------------------------
-
-vmc is generally run from within its top-level folder, and the voice model it
-creates is generated within that folder. i.e.:
-
- $ ~/vmc
- $ ~/vmc/new-voice-model
-
-two of the vmc dependencies, sphinxbase and sphinxtrain, are compiled in a
-location of the user's choice. To install the vmc dependencies with the
-provided script, this location must be provided. It is assumed that the
-location is a direct subdirectory of the user's home directory:
-
- $ mkdir ~/tools
-
-To install vmc's dependencies, run:
-
- $ sudo bash install.sh [install location]
-
-For example:
-
- $ sudo bash install.sh tools
-
-Full installation on an AMD64 computer running Mint 18 would look like this:
-
- $ cd ~/
- $ git clone https://github.com/umhau/vmc.git
- $ mkdir tools
- $ cd vmc
- $ sudo bash install.sh tools
-
-See use examples in the next section.
-
-Usage instructions
--------------------------------------------------------------------------------
-
-This tools brings together a number of disparate data files that are needed for
-creating a voice model. This graph illustrates the data process involved:
-
- word domain
- +
- |
- v
- +-------+ sentence list+----------+
- | + |
- | | |
- v v v
- dictionary grammar: LM voice samples
- + + +
- | | |
- | v |
- +--------> voice model <----------+
- training
- +
- |
- v
- voice model
-
-Each of these steps, starting with the sentence list (given) and ending with the
-voice model are contained within this tool.
-
-To use vmc, it is necessary to provide a few perameters. These are:
-
- - the model name
- - the file containing the sentence list
- - an integer called 'voice training iterations' (VTI)
- - the location of an audio folder for importing
- - the name of the corresponding sentence file to imported audio
-
-The tool requests recordings of each sentence provided in the file. The user is
-able to specify the number of times each sentence is used is making recordings
-with the input perameter 'voice training iterations'. If the value is 0, no
-voice samples will be requested, and the tool will to as much as it can without
-them. Note for brevity's sake the variable is abbreviated VTI.
-
-Also note there are three use cases - when audio is to be added in the course of
-running the script, when it is to be imported, and when I already have it and
-want to ignore it for the time being. The parameters system I used admittedly
-needs a lot of help. Learning as I go, all that. First program to use CLI
-parameters.
-
-Example usage, recording new audio:
-
- $ cd ~/vmc
- $ bash vmc.sh new-voice-model ../Downloads/sentence-list.txt 2
-
-Example usage, importing previous audio:
-
- $ cd ~/vmc
- $ bash vmc.sh voice-model foo -1 old_audio_folder old_audio_sentences.txt
-
-Example usage, ignoring audio files:
-
- $ cd ~/vmc
- $ bash vmc.sh voice-model3 arctic_data.txt 0
-
-Note that dependencies are not checked when running vmc.sh. To check
-dependencies, see the section above.
-
-If VTI is > 0, the user will be prompted to read each entry in the
-sentence list.
-
-The output of the command will be found at:
-
- $ ~/vmc/new-voice-model
-
-This folder will contain all necessary files to run PocketSphinx with a custom
-voice model.
-
-
-Non-association
--------------------------------------------------------------------------------
-
-This tool was not created in conjunction with the CMU Sphinx developers. Code
-was not reused except in the case of quick_lm.pl, in which case the original
-lisence was retained in the script.
-
diff --git a/old/install.sh b/old/install.sh
@@ -1,139 +0,0 @@
-#!/bin/bash
-# USAGE: bash install.sh [installation folder]
-#
-# installation folder is presumed to be a direct subdirectory of the
-# user's home directory. The script will use all the computer's cores
-# to perform compilations.
-
-echo
-# check that installation folder has been specified
-if [ ! -n "$1" ]; then
- echo
- echo "**Error**: you must specify installation folder for CMU programs."
- echo "Folder should be specified relative to the home directory."
- echo "Recommended: 'bash install.sh tools'"
- exit 64
-fi
-
-# check number of cores (speeds compilation)
-CORES=$(nproc --all 2>&1)
-
-
-# make sure folder exists
-if [ ! -d /home/$USER/$1 ]; then
- mkdir /home/$USER/$1
-fi
-
-
-# CHECK DEPENDENCIES
-# note on installation tactics: I prefer to let apt detect prior installation.
-# It makes the code much nicer to read.
-
-# check for git (needed for installations)
-echo "Installing git..."
-sudo apt-get install git -y
-echo
-
-# check for swig
-echo "Installing swig..."
-sudo apt-get install swig -y
-echo
-
-# check for perl
-echo "Installing perl..."
-sudo apt-get install perl -y
-echo
-
-# check for python development version: needed for sphinxbase
-echo "Installing python-dev..."
-sudo apt-get install python-dev -y
-echo
-
-# install pyaudio
-echo "Installing python3-pyaudio..."
-sudo apt-get install python3-pyaudio -y
-echo
-
-# check for python3: used in model scripts
-echo "Installing python3..."
-sudo apt-get install python3 -y
-echo
-
-echo "Installing libtool..."
-sudo apt-get install libtool-bin -y
-echo
-
-echo "Installing automake..."
-sudo apt-get install automake -y
-echo
-
-echo "Installing autoconf..."
-sudo apt-get install autoconf -y
-echo
-
-
-# check for sphinxbase
-echo -n "Checking for sphinxbase...."
-if [ ! -d ~/$1/sphinxbase/ ]; then
- echo
- echo "installing..."
- cd ~/$1
- git clone https://github.com/cmusphinx/sphinxbase.git
- cd ./sphinxbase
- ./autogen.sh
- ./configure
- make -j $CORES
- make -j $CORES check
- sudo make -j $CORES install
-else
- echo "Done."
- echo "SphinxBase already installed."
- echo
-fi
-
-# check for sphinxtrain
-echo -n "Checking for sphinxtrain...."
-if [ ! -d ~/$1/sphinxtrain/ ]; then
- echo
- echo -n "installing..."
- cd ~/$1
- git clone https://github.com/cmusphinx/sphinxtrain.git
- cd ./sphinxtrain
- ./autogen.sh
- ./configure
- make -j $CORES
- sudo make -j $CORES install
- echo "Done."
-else
- echo "Done."
- echo "SphinxTrain already installed."
- echo
-fi
-
-
-# check for pocketsphinx
-echo -n "Checking for pocketsphinx..."
-if [ ! -d ~/$1/pocketsphinx/ ]; then
- echo
- echo -n "installing..."
- cd ~/$1
- git clone https://github.com/cmusphinx/pocketsphinx.git
- cd ./pocketsphinx
- ./autogen.sh
- ./configure
- make -j $CORES clean all
- make -j $CORES check
- sudo make -j $CORES install
- echo "done."
-else
- echo "Done."
- echo "PocketSphinx already installed."
- echo
-fi
-
-echo "VMC dependency installations completed. See README for next steps."
-echo
-
-
-
-
diff --git a/old/t b/old/t
@@ -1,36 +0,0 @@
-# directory structure:
-
-# vmc
-# > tools
-# - quick_lm.pl
-# - cmudict-en-us.dict
-# - cmusphinx-en-us-ptm-5.2.tar.gz (only when downloaded)
-# - en-us (after installation - extract from tar.gz & delete it)
-# > functions
-# - getaudio.py
-# - acousticfiles.sh
-# - voicemodel.sh
-# - etc.
-# - installvmc.sh (only when downloaded - don't copy it)
-# - vmc
-#
-# USAGE
-#
-# add the options thing to vmc, then copy vmc to somewhere on the system path. That way I can
-# call it as a system command when needed. It will have everything it needs right here.
-#
-# the vmc package will be copied to /opt, where all such 3rd party tool packages are saved.
-#
-#
-# INSTALLATION
-#
-# vmc created in /opt
-# tools & functions copied into vmc
-# vmc (shell script) is moved to /usr/local/bin
-#
-
-# http://www.pathname.com/fhs/pub/fhs-2.3.html#OPTADDONAPPLICATIONSOFTWAREPACKAGES
-#
-# For example, someapp would be installed in /opt/someapp, with one of its command being
-# /opt/someapp/bin/foo, its configuration file would be in /etc/opt/someapp/foo.conf, and its log
-# files in /var/opt/someapp/logs/foo.access.
-\ No newline at end of file
diff --git a/old/vmc.py b/old/vmc.py
@@ -1,382 +0,0 @@
-#!/usr/bin/python3
-#
-#dependencies: python3-pyaudio
-
-from subprocess import Popen
-from subprocess import call
-import string
-import re
-import _thread
-import time
-import pyaudio
-import wave
-#import string
-import os
-import errno
-import contextlib
-import sys
-import shutil
-#import os.path
-from shutil import copyfile
-import tarfile
-
-
-import sys
-
-# Print iterations progress
-def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100):
- """
- Call in a loop to create terminal progress bar
- @params:
- iteration - Required : current iteration (Int)
- total - Required : total iterations (Int)
- prefix - Optional : prefix string (Str)
- suffix - Optional : suffix string (Str)
- decimals - Optional : positive number of decimals in percent complete (Int)
- barLength - Optional : character length of bar (Int)
- """
- formatStr = "{0:." + str(decimals) + "f}"
- percents = formatStr.format(100 * (iteration / float(total)))
- filledLength = int(round(barLength * iteration / float(total)))
- bar = '█' * filledLength + '-' * (barLength - filledLength)
- sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
- if iteration == total:
- sys.stdout.write('\n')
- sys.stdout.flush()
-
-
-# tool to check if programs are installed
-def is_tool(name):
- try:
- devnull = open(os.devnull)
- Popen([name], stdout=devnull, stderr=devnull).communicate()
- except OSError as e:
- if e.errno == os.errno.ENOENT:
- return False
- return True
-
-
-# ignore sdterr messages: as from pyaudio
-@contextlib.contextmanager
-def ignore_stderr():
- devnull = os.open(os.devnull, os.O_WRONLY)
- old_stderr = os.dup(2)
- sys.stderr.flush()
- os.dup2(devnull, 2)
- os.close(devnull)
- try:
- yield
- finally:
- os.dup2(old_stderr, 2)
- os.close(old_stderr)
-
-# detect any keypress
-def input_thread(L):
- input()
- L.append(None)
-
-def record(WAVE_OUTPUT_FILENAME):
-
- chunk = 1024
- FORMAT = pyaudio.paInt16
- CHANNELS = 1
- RATE = 16000
-
- with ignore_stderr():
- # initialize audio stream - and keep it quiet
- p = pyaudio.PyAudio()
- stream = p.open(format = FORMAT,
- channels = CHANNELS,
- rate = RATE,
- input = True,
- frames_per_buffer = chunk)
-
- # create interrupt thread
- L = []
- _thread.start_new_thread(input_thread, (L,))
-
- frames = []
- while True:
- # record data during loop
- data = stream.read(chunk)
- frames.append(data)
- if L:
- stream.stop_stream()
- break
-
- # exit cleanly after break
- stream.close()
- p.terminate()
-
- # write data to WAVE file
- data = b''.join(frames)
- wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
- wf.setnchannels(CHANNELS)
- wf.setsampwidth(p.get_sample_size(FORMAT))
- wf.setframerate(RATE)
- wf.writeframes(data)
- wf.close()
-
-
-# format sentences and add to file
-def sentence_format_and_save(sentence, model_name, afno): # afno: audio file number with 4 digits
-
- #transcription file
-
- # get rid of punctuation
- exclude = set(string.punctuation)
- sentence = ''.join(ch for ch in sentence if ch not in exclude)
-
- nice_text = sentence.lower().rstrip()#.translate(string.maketrans('', ''), ',.')
- formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n"
- # formatted_text = nice_text+"\n"
- formatted_filename = "./" + model_name + "/" +model_name + '.transcription'
- hs = open(formatted_filename,"a")
- hs.write(formatted_text)
- hs.close()
-
- #fileid
- formatted_text = model_name + "_" + afno + "\n"
- formatted_filename = "./" + model_name + "/" +model_name + '.fileids'
- hs = open(formatted_filename,"a")
- hs.write(formatted_text)
- hs.close()
-
-
-
-def sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary):
-
- # create unique, sorted word list from sentence list
- words = []
- print("Creating unique, sorted word list...")
- [words.append(word.strip(string.punctuation).upper()) for word in sentences_text.split()]
- # set() uniques the list, sorted() puts them a-z.
- uwords = sorted(list(set(words)))
-
- # save word list to file
- print("Saving word list to file...")
- uwordsfilename = str(model_name+'/'+model_name+'.vocab') # correct extension
- uwordsfile = open(uwordsfilename, 'w')
- for word in uwords:
- uwordsfile.write("%s\n" % word)
-
- # create pronunciation dictionary from word list
- cmudict = []
- print("Opening pronunciation dictionary...")
- with open(pronunciation_dictionary) as f:
- for line in f:
- cmudict.append(line)
-
- pdict = []
- missing_words = []
- l = len(uwords)
- i = 0
- print("Extracting entries corresponding to word list...")
- printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
-
- curr_line = 0
-
- for word in uwords:
-
- wordmatch=False # a counter to help with efficiency
- for line in cmudict[curr_line:]:
-
- regex_string = str('^(?P<text>'+str(word.lower()) + '(\(\d\))?)( |\t)(?P<phones>.+)$')
-
- if re.match(regex_string, line):
- # print("match!")
- ms = re.search(regex_string, line)
- pdict.append(str(ms.group('text')+' '+ms.group('phones')))
- wordmatch=True
-
- # if I already made a match and I'm not now, time to break. this allows for finding
- # alternate pronunciations
- elif wordmatch:
- # curr_line +=1
- break
-
- # curr_line +=1
-
- # check for words the pronunciation dictionary doesn't have & save
- if not wordmatch:
- missing_words.append(word)
-
- i +=1
- printProgress (i, l, prefix = 'Progress:', suffix = 'Complete', decimals = 2, barLength = 20)
-
- # save missing words list to file
- if missing_words:
- missing_words_filename = str(model_name+'/'+model_name+'.missing')
- print("\nWord(s) missing from pronunciation dictionary. See ")
- print(missing_words_filename+" for list.")
- mwordsfile = open(missing_words_filename, 'w')
- for word in missing_words:
- mwordsfile.write("%s\n" % word)
-
- # save pronunciation dictionary to file
- print("Saving pronunciation dictionary to file...")
- pdictfilename = str(model_name+'/'+model_name+'.dic')
- pdictfile = open(pdictfilename, 'w')
- for word_entry in pdict:
- pdictfile.write("%s\n" % word_entry)
-
- # final instructions
- print("Data files created.")
-
-
-
-
-
-def vmc(
- model_name,
- sentence_file,
- voice_training_iterations=1,
- number_of_audio_files=0,
- pronunciation_dictionary="cmudict-en-us.dict"):
- """
- [model_name] a clear identifier for the specific recognition model.
-
- [sentence_file] the pathname for the input file containing the sentences to train on.
-
- [voice_training_iterations] integer representing the ratio of voice data collected to total
- sentences. 0 = no voice data collected (use what you have), 1 = a single recording of each
- sentence, 2=two recordings of each sentence, &c.
-
- [number_of_audio_files] for when the user is providing a prebuilt set of audio files with an
- included sentence file, this tells me how many lines from that sentence file I need (sometimes
- there's more example sentences than audio files.)
-
- [pronunciation_dictionary] This is the automatic source used to create the custom pronunciation
- dictionary provided and used by the voice model. There seems to be a difference in the
- whitespace used in several dictionaries, preventing the bw program from executing properly.
-
- """
-
- # record more voice samples?
- if voice_training_iterations>0:
-
- # get 'sentences' with all the sample lines.
- sentences_text = ""
- sentences_list = []
- with open(sentence_file) as f:
- for line in f:
- sentences_text = sentences_text+' '+line
- sentences_list.append(line)
-
- sentences_list=sentences_list*voice_training_iterations
-
- num_recs = len(sentences_list)
-
- j=0
-
- try:
- input("Press [enter], read text, & press [enter].")
- for sentence in sentences_list:
- #recording number
- j=j+1
- afno = str('%04d'%j) # audio file number w/ 4 digits
- # record audio with visual
- print("Recording no. %04d of %04d: \n\n\t%s" % (j, num_recs, sentence), end='\r')
-
- # recording file should look like this (e.g.): ./bespoke_training_data/audio/arctic_0001.wav
- audio_file_name = "./" + model_name +'/audio/' + model_name+ "_" + afno + ".wav"
-
- record(audio_file_name)
-
- # add formatted data to language model files
- sentence_format_and_save(sentence, model_name, afno)
-
-
- except KeyboardInterrupt:
- pass
-
- sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary)
-
- # if an old audio folder was specified, don't record
- elif voice_training_iterations < 0:
-
- lineno = 0
-
- # get 'sentences' from old file with all the sample lines.
- # that means this sentence_file is the old_sentence_list_file from the perameter set
- print("Opening old sentence file...")
- with open(sentence_file) as f:
-
- sentences_text = ''
-
- for line in f:
-
- lineno = lineno+1
-
- afno = str('%04d'%lineno) # audio file number w/ 4 digits
-
- # create string of sentences
- sentences_text = sentences_text+' '+line
-
- # in this case, model_name corresponds to the filenames you want to save into
- sentence_format_and_save(line, model_name, afno)
-
- if lineno == number_of_audio_files:
- break
-
- print("Parsing sentences...")
- sentence_parsing(sentences_text, model_name, sentence_file, pronunciation_dictionary)
-
-
- elif voice_training_iterations == 0:
- print("Not dealing with audio today.")
-
-
- else:
- raise TypeError("Something weird happened to the voice_training_iterations variable.")
-
-
-
-# set up input perameters
-i = len(sys.argv)
-
-# print("sys.argv length is %s\n" % str(i))
-
-# print("Perameter inputs:")
-# for j in sys.argv:
-# print(j)
-
-# if using default iterations value
-if i==3:
- model_name = str(sys.argv[1])
- sentence_list_filename = str(sys.argv[2])
-
- vmc(model_name, sentence_list_filename)
-
-# if specifying iterations value: this is default now (done within shell script)
-elif i==5:
- model_name = str(sys.argv[1])
- sentence_list_filename = str(sys.argv[2])
- pronunciation_dictionary = str(sys.argv[3])
- voice_training_iterations = int(sys.argv[4])
-
- vmc(model_name, sentence_list_filename,voice_training_iterations, pronunciation_dictionary)
-
-# check if using old audio file
-elif i==8 and int(sys.argv[4]) < 0: # not unless both audio perameters specified _and_
- # iterations specified
-
- print("Using old audio file...")
-
- model_name = str(sys.argv[1])
- sentence_list_filename = str(sys.argv[2])
- pronunciation_dictionary = str(sys.argv[3])
- voice_training_iterations = int(sys.argv[4])
- old_audio_folder = str(sys.argv[5])
- old_sentence_list_file = str(sys.argv[5])+'/'+str(sys.argv[6])
- number_of_audio_files = int(sys.argv[7])
-
- vmc(model_name, old_sentence_list_file, voice_training_iterations, number_of_audio_files, pronunciation_dictionary)
-
-# error if wrong number of perameters entered
-else:
- raise TypeError("Perameters not set correctly!")
-
-
-
-
diff --git a/old/vmc.sh b/old/vmc.sh
@@ -1,232 +0,0 @@
-#!/bin/bash
-#
-# USAGE: bash vmc.sh voice-model sentence-list.txt 2 [old-audio-folder old-sentence-list-file]
-#
-# EXAMPLES:
-# old audio: bash vmc.sh voice-model2 ex-corpus.txt -1 audio arctic_data.txt
-# ignore audio: bash vmc.sh voice-model2 ex-corpus.txt 0
-# record audio, one repetition: bash vmc.sh voice-model2 ex-corpus.txt 1
-# record audio, two repetitions: bash vmc.sh voice-model2 ex-corpus.txt 2
-#
-# NOTE: sentence-list.txt is only used in the case of recording new audio, but must be preserved
-# in the command string for coherence sake. I know, it's hacky. I'm a noob and I'm not
-# spending another 10 hours rebuilding the code into something nicer.
-#
-#
-# source: https://nixingaround.blogspot.com/2016/08/improving-accuracy-of-cmu-sphinx-for_3.html
-
-
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [set & check variables] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
-
-# basic variables
-model_name=$1
-sentence_list_filename=$2
-voice_training_iterations=$3
-
-
-# make sure this variable is consistently used
-pronunciation_dictionary="cmudict-en-us.dict"
-
-
-# set path to include sphinx library location
-# https://jrmeyer.github.io/installation/2016/01/09/Installing-CMU-Sphinx-on-Ubuntu.html
-export LD_LIBRARY_PATH=/usr/local/lib
-
-
-# check that variables have been specified
-if [ ! -n $voice_training_iterations ]; then
- echo "**Error**: input variables incorrect."
- echo "Please see README for usage instructions."
- exit 64
-fi
-
-
-# check for old audio-related perameters
-# http://stackoverflow.com/questions/3601515/how-to-check-if-a-variable-is-set-in-bash
-
-# check if incorrectly specified
-if ( ! [ -z ${4+x} ] && [ -z ${5+x} ] )
-then
- echo "You must provide two perameters to reuse audio files."
- exit 64
-fi
-
-
-# rename perameter if both present
-if [ $4 ];
-then
-
- old_audio_folder=$4 # perameter should be the relative path to the audio folder
-
- old_sentence_list_file=$5
-
- sentence_list_filename=./$model_name/$5
-
- number_of_audio_files=$( ls $old_audio_folder/*.wav | wc -l )
-
-fi
-
-
-# check for new model folder
-echo -n "Checking for $model_name..."
-if [ ! -d ./$model_name/audio ]; then
- echo -n "creating new folder..."
- mkdir -p "$(pwd)/$model_name/audio/"
- echo "done."
-else
- echo "WARNING: model already exists."
- # exit 65
-fi
-
-
-
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [import old audio folder] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
-
-if [ $4 ]; then
-
- echo -n "Looking for audio folder..."
-
- # create two strings, space-delimited, in the formats:
- for entry in "$old_audio_folder"/*.wav
- do
-
- # ./[old-audio-folder]/[old-model-name]_XXXX.wav
- olddirentry=$entry
-
- # _XXXX.wav -- see: http://tldp.org/LDP/abs/html/string-manipulation.html
- suffix=${entry: -9}
-
- # copy and rename files to audio directory
- cp -r $olddirentry ./$model_name/audio/$model_name$suffix
-
- done
-
- # copy plain sentence file
- cp $old_audio_folder/$old_sentence_list_file ./$model_name/$old_sentence_list_file
-
- echo "done."
-
-fi
-
-
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [create acoustic voice model] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
-
-# run python script: produces dictionary, LM & voice samples - if adding preexisting audio, indicate.
-
- a=$model_name # defines file and folder names
- b=$sentence_list_filename # points to the initial list of sentences
- c=$pronunciation_dictionary # used for producing custom pronunciation dictionary
- d=$voice_training_iterations # indicates action on voice data - record, import, nothing
- e=$old_audio_folder # if importing audio, gives its location
- f=$old_sentence_list_file # if importing audio, points to sentence list
- g=$number_of_audio_files # number of sentences to extract from list
-
-python3 vmc.py $a $b $c $d $e $f $g
-
-
-# run perl script to create language model, move into model folder
-perl quick_lm.pl -s $sentence_list_filename &> /dev/null
-src=$sentence_list_filename.arpabo
-dst=$model_name/$model_name.lm
-mv $src $dst
-
-# get full-size acoustic model
-echo -n "Extracting acoustic model..."
-tar -xf ./cmusphinx-en-us-ptm-5.2.tar.gz -C ./$model_name
-mv ./$model_name/cmusphinx-en-us-ptm-5.2 ./$model_name/en-us
-echo "done."
-
-# convert lm to binary (bin) format (command was too complex for python to handle)
-echo "Converting lm from text to binary..."
-dst=$model_name/$model_name.lm
-sphinx_lm_convert -i $dst -o $dst.bin &> /dev/null
-
-# generate some acoustic feature files
-echo "Generating acoustic feature files..."
-cd ./$model_name/audio
-sphinx_fe -argfile ../en-us/feat.params -samprate 16000 -c ../$model_name.fileids -di . -do . -ei wav -eo mfc -mswav yes &> /dev/null
-cd ../..
-
-# convert binary mdef file to .txt
-echo "Converting mdef file from binary to text..."
-cd ./$model_name/
-pocketsphinx_mdef_convert -text ./en-us/mdef ./en-us/mdef.txt &> /dev/null
-cd ..
-
-# get binary tools from sphinx installations
-echo -n "Copying tools from sphinxtrain installation..."
-cd ./$model_name/
-cp /usr/local/libexec/sphinxtrain/bw .
-cp /usr/local/libexec/sphinxtrain/map_adapt .
-cp /usr/local/libexec/sphinxtrain/mk_s2sendump .
-cp /usr/local/libexec/sphinxtrain/mllr_solve .
-cd ..
-echo "done."
-
-# run tools to create voice model
-cd ./$model_name/audio
-
-# sphinx_fe
-echo "Executing sphinx_fe..."
-sphinx_fe \
- -argfile ../en-us/feat.params \
- -samprate 16000 \
- -c ../$model_name.fileids \
- -di . \
- -do . \
- -ei wav \
- -eo mfc \
- -mswav yes \
- &> /dev/null
-
-echo "Executing bw..."
-../bw \
- -hmmdir ../en-us \
- -moddeffn ../en-us/mdef.txt \
- -ts2cbfn .ptm. \
- -feat 1s_c_d_dd \
- -svspec 0-12/13-25/26-38 \
- -cmn current \
- -agc none \
- -dictfn ../../$pronunciation_dictionary \
- -ctlfn ../$model_name.fileids \
- -lsnfn ../$model_name.transcription \
- -accumdir . \
- &> /dev/null
-
-echo "Executing mllr_solve..."
-../mllr_solve \
- -meanfn ../en-us/means \
- -varfn ../en-us/variances \
- -outmllrfn mllr_matrix \
- -accumdir . \
- &> /dev/null
-
-# move your files to a new directory (not sure if necessary?)
-#cp -a ../en-us ../en-us-adapt
-
-echo "Executing map_adapt..."
-../map_adapt \
- -moddeffn ../en-us/mdef.txt \
- -ts2cbfn .ptm. \
- -meanfn ../en-us/means \
- -varfn ../en-us/variances \
- -mixwfn ../en-us/mixture_weights \
- -tmatfn ../en-us/transition_matrices \
- -accumdir . \
- -mapmeanfn ../en-us/means \
- -mapvarfn ../en-us/variances \
- -mapmixwfn ../en-us/mixture_weights \
- -maptmatfn ../en-us/transition_matrices\
- &> /dev/null
-
-echo "Executing mk_s2sendump..."
-../mk_s2sendump \
- -pocketsphinx yes \
- -moddeffn ../en-us/mdef.txt \
- -mixwfn ../en-us/mixture_weights \
- -sendumpfn ../en-us/sendump \
- &> /dev/null
-
-
-
diff --git a/uninstallvmc.sh b/uninstallvmc.sh
@@ -1,16 +1,14 @@
#!/bin/bash
#
-# USAGE
-#
-# bash uninstallvmc.sh
+# USAGE: bash uninstallvmc
#
-# DELETE EVERYTHING ===============================================================================
+# DELETE EVERYTHING ===========================================================
sudo rm -rf /opt/vmc
-sudo rm -f /usr/local/bin/vmc.sh
+sudo rm -f /usr/local/bin/vmc
-sudo rm -f /usr/local/bin/lmt.sh
+sudo rm -f /usr/local/bin/lmt
echo "vmc removed."
diff --git a/vmc b/vmc
@@ -0,0 +1,140 @@
+#!/bin/bash
+#
+# DESCRIPTION
+#
+# Given a sentence file and (optionally) prerecorded audio files, produce
+# a voice model in a specified location. Indicate whether to record
+# (-newrecordings) or import (-importrecordings) the audio files.
+#
+# The statistical language model is now produced separately, run 'lmt' to
+# see what the parameters are.
+#
+# There are two modes of operation: 1) adapt an existing model to use new
+# audio data and 2) create a new model from scratch. In either case,
+# there are three options for sourcing the audio: 1) record new audio, 2)
+# import audio from a folder, or 3) add new recordings to the previously
+# recorded audio data ('addrecordings', below).
+#
+# If importing audio, be aware that the model names of the acoustic m
+# models used must match exactly.
+#
+# If creating a new model, there is a copy of the standard acoustic voice
+# model stored in /opt/vmc/lib/en-us. Copy it into the home directory
+# before use. (sudo cp /opt/vmc/lib/en-us ~/Documents/)
+#
+# If importing audio files, the associated text file with sentences must
+# be located in the folder with the audio, and follow the naming and
+# internal formatting standards.
+#
+# The [reps] variable at the end specifies how many times to request a
+# recording of each entry in the sentence file. It is optional, as it is
+# placed at the end of the list of parameters and the script will not
+# fail if it is not specified.
+#
+# USAGE | EXAMPLES ------------------------------------------------------------
+#
+# vmc en-us \
+# -adapt /extant/model/location \
+# -addrecordings /audio/files/location /dictation/file/location.txt 5
+#
+# vmc en-us \
+# -create /place/to/put/model \
+# -newrecordings /place/to/put/audio/files /dictation/file/location.txt 5
+#
+# vmc en-us \
+# -adapt /extant/model/location \
+# -importrecordings /audio/files/location
+#
+# VARIABLES ===================================================================
+
+model_name="$1" # i.e. 'en-us'
+model_location="$3" # i.e. /usr/local/lib/python2.7/dist-packages/pocketsphinx/model/en-us
+audio_folder="$5" # i.e. ~/.psyche/audio
+
+if [ $4 == '-newrecordings' ] || [ $4 == '-addrecordings' ]; then dict_file="$6"; reps="$7";
+else echo "Bad options given. Run: 'nano /usr/local/bin/vmc'."; exit 1; fi
+
+current_number_of_recordings="0" # changed below, if there are any.
+
+libdir="/opt/vmc/lib"
+
+# COMMANDS ====================================================================
+
+# misc. housekeeping ----------------------------------------------------------
+
+sudo ls 1>/dev/null # get sudo
+
+export LD_LIBRARY_PATH=/usr/local/lib # include sphinx library location
+# I need to do this any time I want to use the manually downloaded CMU Sphinx.
+# http://jrmeyer.github.io/installation/2016/01/08/Installing-CMU-Sphinx-on-Ubuntu.html
+
+if [ $2 == '-create' ] && [ ! -d "$model_location" ]; then
+ echo "Press [enter] to confirm writing new acoustic model at:"
+ echo "$model_location"; read
+ mkdir $model_location; cp -r /opt/vmc/lib/en-us/* "$model_location/";
+elif [ $2 == '-create' ] && [ -d "$model_location" ]; then
+ echo -n "Model already exists at this directory! Press [enter] to overwrite."; read
+ sudo rm -r $model_location
+ mkdir $model_location; cp -r /opt/vmc/lib/en-us/* "$model_location/";
+fi
+
+# record new audio files ------------------------------------------------------
+
+if [ $4 = '-addrecordings' ]; then # find how many recordings were already made
+ current_number_of_recordings=$(ls $audio_folder/$model_name*.wav | tail -1 | sed 's/[^0-9]*//g' | bc -l);
+ echo $current_number_of_recordings
+else
+ sudo mkdir -p $audio_folder
+ current_number_of_recordings='0'
+fi
+
+if [ $4 = '-newrecordings' ] && [ -d "$audio_folder" ]; then
+ echo -n "Audio already exists at this directory! Press [enter] to overwrite."; read
+ sudo rm -r $audio_folder; mkdir $audio_folder
+fi
+
+if [ $4 = '-newrecordings' ] || [ $4 = '-addrecordings' ]; then # record audio into folder.
+
+ sudo python3 $libdir/getaudio.py \
+ $dict_file \
+ $audio_folder \
+ $reps \
+ $model_name \
+ $current_number_of_recordings
+ sudo chown -R $USER: $audio_folder # bug: dir had root ownership.
+ echo "Audio files saved into $audio_folder. They can be reused."
+fi
+
+# PRODUCE DERIVATIVE FILES ----------------------------------------------------
+# These are recreated for all audio files, regardless of whether this is a new
+# collection of recordings or just adding to an old one. Simpler that way.
+
+echo -e "\nProducing sentence file derivatives..."
+sudo python3 $libdir/format_text.py \
+ $dict_file \
+ $model_name \
+ $audio_folder \
+ $reps \
+ $current_number_of_recordings
+
+echo "Producing audio file derivatives..."
+sudo bash $libdir/acousticfiles.sh \
+ "$audio_folder" \
+ "$model_location" \
+ "$audio_folder/$model_name.fileids"
+
+# CREATE MODELS ---------------------------------------------------------------
+
+echo "Creating voice model..."
+sudo bash $libdir/voicemodel.sh $model_name $model_location $audio_folder
+
+# FIX ROOT PERMISSIONS --------------------------------------------------------
+# I really hope this isn't horrible practice, but it's the most straightforward
+# way to make sure the wrong stuff isn't left owned by root
+sudo chown -R $USER: $model_location
+sudo chown -R $USER: $audio_folder
+
+# DONE ------------------------------------------------------------------------
+echo "Process complete."
+echo -e "ACOUSTIC VOICE MODEL: \t $model_location"
+echo -e "VOICE RECORDINGS: \t $audio_folder"
diff --git a/vmc.sh b/vmc.sh
@@ -1,156 +0,0 @@
-#!/bin/bash
-#
-# DESCRIPTION
-#
-# Given a sentence file and (optionally) prerecorded audio files, produce
-# a voice model in a specified location. Indicate whether to record
-# (-record) or import (-import) the audio files.
-#
-# The statistical language model is now produced separately, run
-#
-# $ lmt.sh
-#
-# to see what the parameters are.
-#
-# USAGE
-#
-# vmc.sh
-# model-name used to name most of the internal files
-# [ -record OR -import audio/file/directory ]
-# vm-training-file sentences the user should record for
-# training purposes
-# output-folder this is a complete file path
-# [reps] how many times to get a recording of
-# each sentence
-# acoustic-model Location of acoustic model to start
-# with. (this is a complete file path,
-# not including 'en-us'.) Optional.
-#
-# DEPENDENCIES
-#
-# CMU Sphinx, Python 3 (& 2.7), Perl, and other misc. packages.
-#
-# NOTES
-#
-# The output folder is intended to be the location of the voice model,
-# once completed.
-#
-# The [reps] variable at the end specifies how many times to request a
-# recording of each entry in the sentence file. It is optional, as it is
-# placed at the end of the list of parameters and the script will not
-# fail if it is not specified.
-#
-# Having been installed to /usr/local/bin, this command can be called
-# from anywhere.
-#
-# After installation, a keyphrase list should be added in order to use
-# the voice model for keyword spotting.
-#
-
-# VARIABLES ===================================================================
-
-export LD_LIBRARY_PATH=/usr/local/lib
-# set path to include sphinx library location
-# jrmeyer.github.io/installation/2016/01/09/Installing-CMU-Sphinx-on-Ubuntu.html
-
-if [[ $2 = '-record' ]]; then
-
- vm_training_file=$3
- output_folder=$4
-
- if [[ -n $5 ]]; then
- iterations=$5
- else
- iterations=1
- fi
-
-elif [[ $2 = '-import' ]]; then
-
- audio_file_directory=$3
- sentence_file=$4
- output_folder=$5
- iterations=1
-
-else
-
- echo
- echo -e "USAGE: \tvmc.sh "
- echo
- echo -e "\tmodel-name\t\t(used to name most of the internal files)"
- echo -e "\t[ -record OR -import audio/file/directory ]"
- echo -e "\tvm-training-file\t(sentences for the user to record)"
- echo -e "\toutput-folder\t\t(this is a complete file path)"
- echo -e "\t[reps]\t\t\t(number of voice recordings per sentence)"
- echo
-
- exit 1
-
-fi
-
-model_name=$1
-
-audio_folder=$output_folder/audio
-
-tdir=/opt/vmc/tools
-
-fdir=/opt/vmc/functions
-
-
-# COMMANDS ====================================================================
-
-# OBTAIN REQUISITE FILES ------------------------------------------------------
-
-echo
-echo "Collecting required files..."
-
-# get audio files and put them where they go
-if [[ $2 = '-record' ]]; then
-
- mkdir -p $audio_folder
-
- python3 $fdir/getaudio.py $vm_training_file $audio_folder $iterations $model_name
-
- echo "Recorded audio files saved into $audio_folder. They can be reused."
-
-elif [[ $2 = '-import' ]]; then
-
- mkdir -p $audio_folder
-
- cp -a $audio_file_directory/*.wav $audio_folder/
-
-fi
-
-# copy default acoustic model
-if [ -n "$6" ]; then
- echo "Pulling base acoustic model from $6"
- read -p "Press enter to continue, or CTRL-C to exit"
- cp -r $installation_directory/en-us $output_folder
-else
- echo "Using default base acoustic model."
- cp -r $tdir/en-us $output_folder
-fi
-
-# PRODUCE DERIVATIVE FILES ----------------------------------------------------
-
-echo
-echo "Producing sentence file derivatives..."
-
-# get derivatives of sentence file
-python3 $fdir/format_text.py $vm_training_file $model_name $output_folder $iterations
-
-echo
-echo "Producing audio file derivatives..."
-
-# get derivatives of audio files
-bash $fdir/acousticfiles.sh $audio_folder $output_folder/$model_name.fileids
-
-# CREATE MODELS ---------------------------------------------------------------
-
-echo
-echo "Creating voice model..."
-
-# build voice model
-bash $fdir/voicemodel.sh $model_name $output_folder $audio_folder $output_folder
-
-echo
-echo "Process complete. New acoustic voice model saved into $output_folder"