import json import re def parse_vtt(vtt_filename): with open(vtt_filename, "r", encoding="utf-8") as file: lines = file.readlines() time_pattern = re.compile(r"(\d+\.\d{3}) --> (\d+\.\d{3})") subtitles = [] current_subtitle = {} for line in lines[1:]: match = time_pattern.match(line) if match: current_subtitle["start"] = float(match.group(1)) current_subtitle["end"] = float(match.group(2)) current_subtitle["content"] = "" elif line.strip() == "": if current_subtitle: if current_subtitle["content"][-1] == "\n": current_subtitle["content"] = current_subtitle["content"][:-1] subtitles.append(current_subtitle) current_subtitle = {} else: current_subtitle["content"] += ( line.strip() + "\n" ) # Space to separate lines if current_subtitle: if current_subtitle["content"][-1] == "\n": current_subtitle["content"] = current_subtitle["content"][:-1] subtitles.append(current_subtitle) return subtitles def subtitles_to_backup(subtitles): backup_data = { "subtitles": subtitles, "script_lines": [], "line_index": len(subtitles), "current_subtitle": {}, "play": 0, } return backup_data def main(vtt_filename, output_filename): subtitles = parse_vtt(vtt_filename) backup_data = subtitles_to_backup(subtitles) with open(output_filename, "w", encoding="utf-8") as json_file: json.dump(backup_data, json_file, indent=2) vtt_filename = "audio.vtt" output_filename = "backup2.json" main(vtt_filename, output_filename)