‹ projects

vmc

a voice model creator for CMU Sphinx
Log | Files | Refs | README | LICENSE

commit 088837893a275a0418d74581ed2dce93d13f39e0
parent 3747c1157cbb7464cb2dee8260c533c61d0b2771
Author: umhau <umhau@users.noreply.github.com>
Date:   Fri,  4 Nov 2016 15:21:16 -0400

iterations didn't work.
Diffstat:
Mfunctions/format_text.py | 50++++++++++++++++++++++++++++----------------------
1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/functions/format_text.py b/functions/format_text.py @@ -67,35 +67,41 @@ def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, bar sentences_text = "" j=0 +lines = [] with open(sentence_file) as f: - for line in f*iterations: + for line in f: + lines.append(line) - sentences_text = sentences_text+' '+line +# this was separated out so I can easily multiply the lines by the iterations...doesn't work to do +# f*iterations (in above loop). +for line in lines*iterations: - j+=1 - afno = str('%04d'%j) + sentences_text = sentences_text+' '+line - # create transcription file - exclude = set(string.punctuation) - sentence = ''.join(ch for ch in line if ch not in exclude) + j+=1 + afno = str('%04d'%j) - nice_text = sentence.lower().rstrip() - formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n" - formatted_filename = target_directory + "/" +model_name + '.transcription' - hs = open(formatted_filename,"a") - hs.write(formatted_text) - hs.close() - - #create fileid file - formatted_text = model_name + "_" + afno + "\n" - formatted_filename = target_directory + "/" +model_name + '.fileids' - hs = open(formatted_filename,"a") - hs.write(formatted_text) - hs.close() - - sentences_text = sentences_text+' '+line + # create transcription file + exclude = set(string.punctuation) + sentence = ''.join(ch for ch in line if ch not in exclude) + + nice_text = sentence.lower().rstrip() + formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n" + formatted_filename = target_directory + "/" +model_name + '.transcription' + hs = open(formatted_filename,"a") + hs.write(formatted_text) + hs.close() + + #create fileid file + formatted_text = model_name + "_" + afno + "\n" + formatted_filename = target_directory + "/" +model_name + '.fileids' + hs = open(formatted_filename,"a") + hs.write(formatted_text) + hs.close() + + sentences_text = sentences_text+' '+line # why twice? I have no memory of why I did this. # CREATE PRONUNCIATION DICTIONARY -----------------------------------------------------------------