vttmaker/backup.py

61 lines
1.7 KiB
Python

import json
import re
def parse_vtt(vtt_filename):
with open(vtt_filename, "r", encoding="utf-8") as file:
lines = file.readlines()
time_pattern = re.compile(r"(\d+\.\d{3}) --> (\d+\.\d{3})")
subtitles = []
current_subtitle = {}
for line in lines[1:]:
match = time_pattern.match(line)
if match:
current_subtitle["start"] = float(match.group(1))
current_subtitle["end"] = float(match.group(2))
current_subtitle["content"] = ""
elif line.strip() == "":
if current_subtitle:
if current_subtitle["content"][-1] == "\n":
current_subtitle["content"] = current_subtitle["content"][:-1]
subtitles.append(current_subtitle)
current_subtitle = {}
else:
current_subtitle["content"] += (
line.strip() + "\n"
) # Space to separate lines
if current_subtitle:
if current_subtitle["content"][-1] == "\n":
current_subtitle["content"] = current_subtitle["content"][:-1]
subtitles.append(current_subtitle)
return subtitles
def subtitles_to_backup(subtitles):
backup_data = {
"subtitles": subtitles,
"script_lines": [],
"line_index": len(subtitles),
"current_subtitle": {},
"play": 0,
}
return backup_data
def main(vtt_filename, output_filename):
subtitles = parse_vtt(vtt_filename)
backup_data = subtitles_to_backup(subtitles)
with open(output_filename, "w", encoding="utf-8") as json_file:
json.dump(backup_data, json_file, indent=2)
vtt_filename = "audio.vtt"
output_filename = "backup2.json"
main(vtt_filename, output_filename)