import os
import numpy as np
sentences = []
for i in range(1,503):
versions_of_text = []
previous_text = ""
speakers_missing_text = []
for dir in os.listdir("./"):
try:
text = open(dir + "/" + dir + "_" + "{:03n}".format(i) + ".txt").read()
text = text.strip()
if len(versions_of_text) == 0:
versions_of_text.append(text)
sentences.append(text + ", " + str(i))
else:
is_match = False
for t in versions_of_text:
if text == t:
is_match = True
if not is_match:
versions_of_text.append(text)
sentences.append(text + ", " + str(i))
except:
speakers_missing_text.append(dir)
if len(versions_of_text) == 1 and len(speakers_missing_text) == 0:
print("FILE: " + "{:03n}".format(i) + " is okay.")
elif len(speakers_missing_text) > 0:
if len(speakers_missing_text) < 10:
speaker_list = speakers_missing_text
else:
speaker_list = speakers_missing_text[:10]
print("\tFile: " + "{:03n}".format(i) + " is absent for " + str(speaker_list))
elif len(versions_of_text) > 1:
print("\tFile: " + "{:03n}".format(i) + " has " + str(len(versions_of_text)) + " versions across all speakers:")
for v in versions_of_text:
print("\t\t" + v)
print("I found a total of " + str(len(sentences)) + " unique sentences.")
print("Here they are alphabetically:")
sorted_sentences = np.sort(sentences)
n = 0
for s in sorted_sentences:
if n < 100:
print(s)
n += 1
print("\n\nWhoever did this is a war criminal.")