Rewrite filelists.py to parse DLG files
This is required for TSL support.
This commit is contained in:
parent
0b226fce6e
commit
5a534bfcfa
1 changed files with 63 additions and 14 deletions
|
@ -17,31 +17,80 @@ if not os.path.exists(wav_dir):
|
|||
raise RuntimeError("WAV directory does not exist")
|
||||
|
||||
|
||||
def get_lines_from_tlk(obj, speaker):
|
||||
def is_suitable_text(text):
|
||||
return not (text.startswith("[") and text.endswith("]"))
|
||||
|
||||
|
||||
def index_or_negative_one(string, substr, beg=0):
|
||||
try:
|
||||
return string.index(substr, beg)
|
||||
except ValueError:
|
||||
return -1
|
||||
|
||||
|
||||
def erase_brackets(string, left, right):
|
||||
left_idx = index_or_negative_one(string, left)
|
||||
if left_idx == -1:
|
||||
return (string, False)
|
||||
|
||||
right_idx = index_or_negative_one(string, right, left_idx + 1)
|
||||
if right_idx == -1:
|
||||
return (string, False)
|
||||
|
||||
return (string[:left_idx] + string[(right_idx + 1):], True)
|
||||
|
||||
|
||||
def clear_text(text):
|
||||
while True:
|
||||
text, _ = erase_brackets(text, "[", "]")
|
||||
text, found = erase_brackets(text, "{", "}")
|
||||
if not found:
|
||||
break
|
||||
|
||||
return text.strip()
|
||||
|
||||
|
||||
def get_lines_from_dlg(obj, speaker, tlk_strings):
|
||||
lines = []
|
||||
if "strings" in obj:
|
||||
if "EntryList|15" in obj:
|
||||
uniq_sound = set()
|
||||
for string in obj["strings"]:
|
||||
if ("soundResRef" in string) and (speaker in string["soundResRef"]):
|
||||
soundresref = string["soundResRef"].lower()
|
||||
text = string["text"]
|
||||
if soundresref.startswith("n") and (not (text.startswith("[") and text.endswith("]"))) and (not soundresref in uniq_sound):
|
||||
wav_filename = os.path.join(wav_dir, soundresref + ".wav")
|
||||
for entry in obj["EntryList|15"]:
|
||||
if "VO_ResRef|11" in entry:
|
||||
voresref = entry["VO_ResRef|11"].lower()
|
||||
textstrref = int(entry["Text|12"].split("|")[0])
|
||||
if textstrref != -1:
|
||||
text = tlk_strings[textstrref][1]
|
||||
if voresref and (not voresref in uniq_sound) and is_suitable_text(text):
|
||||
wav_filename = os.path.join(wav_dir, voresref + ".wav")
|
||||
if os.path.exists(wav_filename):
|
||||
lines.append("{}|{}|0\n".format(wav_filename, text))
|
||||
uniq_sound.add(soundresref)
|
||||
lines.append("{}|{}|0\n".format(wav_filename, clear_text(text)))
|
||||
uniq_sound.add(voresref)
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def generate_filelist(extract_dir, speaker):
|
||||
# Extract lines from all TLK files
|
||||
tlk_strings = dict()
|
||||
|
||||
# Read strings from dialog.tlk into dictionary
|
||||
tlk_path = os.path.join(extract_dir, "dialog.tlk.json")
|
||||
if os.path.exists(tlk_path):
|
||||
with open(tlk_path, "r") as fp:
|
||||
obj = json.load(fp)
|
||||
if "strings" in obj:
|
||||
for string in obj["strings"]:
|
||||
strref = int(string["_index"])
|
||||
soundresref = string["soundResRef"].lower()
|
||||
text = string["text"]
|
||||
tlk_strings[strref] = (soundresref, text)
|
||||
|
||||
# Extract lines from DLG files
|
||||
lines = []
|
||||
for f in glob.glob("{}/**".format(extract_dir), recursive=True):
|
||||
if f.endswith(".tlk.json"):
|
||||
if f.endswith(".dlg.json"):
|
||||
with open(f, "r") as fp:
|
||||
obj = json.load(fp)
|
||||
lines.extend(get_lines_from_tlk(obj, speaker))
|
||||
lines.extend(get_lines_from_dlg(obj, speaker, tlk_strings))
|
||||
|
||||
# Split lines into training and validation filelists
|
||||
random.shuffle(lines)
|
||||
|
|
Loading…
Reference in a new issue