commit 088837893a275a0418d74581ed2dce93d13f39e0
parent 3747c1157cbb7464cb2dee8260c533c61d0b2771
Author: umhau <umhau@users.noreply.github.com>
Date: Fri, 4 Nov 2016 15:21:16 -0400
iterations didn't work.
Diffstat:
1 file changed, 28 insertions(+), 22 deletions(-)
diff --git a/functions/format_text.py b/functions/format_text.py
@@ -67,35 +67,41 @@ def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, bar
sentences_text = ""
j=0
+lines = []
with open(sentence_file) as f:
- for line in f*iterations:
+ for line in f:
+ lines.append(line)
- sentences_text = sentences_text+' '+line
+# this was separated out so I can easily multiply the lines by the iterations...doesn't work to do
+# f*iterations (in above loop).
+for line in lines*iterations:
- j+=1
- afno = str('%04d'%j)
+ sentences_text = sentences_text+' '+line
- # create transcription file
- exclude = set(string.punctuation)
- sentence = ''.join(ch for ch in line if ch not in exclude)
+ j+=1
+ afno = str('%04d'%j)
- nice_text = sentence.lower().rstrip()
- formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n"
- formatted_filename = target_directory + "/" +model_name + '.transcription'
- hs = open(formatted_filename,"a")
- hs.write(formatted_text)
- hs.close()
-
- #create fileid file
- formatted_text = model_name + "_" + afno + "\n"
- formatted_filename = target_directory + "/" +model_name + '.fileids'
- hs = open(formatted_filename,"a")
- hs.write(formatted_text)
- hs.close()
-
- sentences_text = sentences_text+' '+line
+ # create transcription file
+ exclude = set(string.punctuation)
+ sentence = ''.join(ch for ch in line if ch not in exclude)
+
+ nice_text = sentence.lower().rstrip()
+ formatted_text = "</s> " + nice_text + " </s> (" + model_name + "_" + afno + ")\n"
+ formatted_filename = target_directory + "/" +model_name + '.transcription'
+ hs = open(formatted_filename,"a")
+ hs.write(formatted_text)
+ hs.close()
+
+ #create fileid file
+ formatted_text = model_name + "_" + afno + "\n"
+ formatted_filename = target_directory + "/" +model_name + '.fileids'
+ hs = open(formatted_filename,"a")
+ hs.write(formatted_text)
+ hs.close()
+
+ sentences_text = sentences_text+' '+line # why twice? I have no memory of why I did this.
# CREATE PRONUNCIATION DICTIONARY -----------------------------------------------------------------