commit 958411c58c3aa87c5bf9b77fb83160435cb4c305
parent 0fdfda57ab97c56de588f87ec73628102cc43fed
Author: umhau <umhau@alum.gcc.edu>
Date: Thu, 15 Jun 2017 14:21:34 -0400
commented some confusing code
Diffstat:
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/lib/format_text.py b/lib/format_text.py
@@ -96,21 +96,29 @@ for line in lines*iterations:
formatted_audio_file_number = str('%04d'%recording_count)
# create transcription file -----------------------------------------------
+
+ # clean up the text
exclude = set(string.punctuation)
sentence = ''.join(ch for ch in line if ch not in exclude)
-
nice_text = sentence.lower().rstrip()
+
+ # format text string and file name with file ids
formatted_text = "</s> "+nice_text+" </s> ("+model_name+"_"+formatted_audio_file_number+")\n"
formatted_filename = audio_folder + "/" +model_name + '.transcription'
+ # save into transcription file
append_to_file(formatted_filename, formatted_text)
#create fileid file -------------------------------------------------------
+
+ # format file id entry and filename
formatted_text = model_name + "_" + formatted_audio_file_number + "\n"
formatted_filename = audio_folder + "/" +model_name + '.fileids'
+ # save into fileids file
append_to_file(formatted_filename, formatted_text)
+ # ????
sentences_text = sentences_text+' '+line # why twice? I have no memory of
# why I did this. I don't think
# it does anything, either. TODO: