vttmaker/backup.py

import json
import re


def parse_vtt(vtt_filename):
    with open(vtt_filename, "r", encoding="utf-8") as file:
        lines = file.readlines()

    time_pattern = re.compile(r"(\d+\.\d{3}) --> (\d+\.\d{3})")

    subtitles = []
    current_subtitle = {}

    for line in lines[1:]:
        match = time_pattern.match(line)
        if match:
            current_subtitle["start"] = float(match.group(1))
            current_subtitle["end"] = float(match.group(2))
            current_subtitle["content"] = ""
        elif line.strip() == "":
            if current_subtitle:
                if current_subtitle["content"][-1] == "\n":
                    current_subtitle["content"] = current_subtitle["content"][:-1]
                subtitles.append(current_subtitle)
                current_subtitle = {}
        else:
            current_subtitle["content"] += (
                line.strip() + "\n"
            )  # Space to separate lines

    if current_subtitle:
        if current_subtitle["content"][-1] == "\n":
            current_subtitle["content"] = current_subtitle["content"][:-1]
        subtitles.append(current_subtitle)

    return subtitles


def subtitles_to_backup(subtitles):
    backup_data = {
        "subtitles": subtitles,
        "script_lines": [],
        "line_index": len(subtitles),
        "current_subtitle": {},
        "play": 0,
    }
    return backup_data


def main(vtt_filename, output_filename):
    subtitles = parse_vtt(vtt_filename)
    backup_data = subtitles_to_backup(subtitles)

    with open(output_filename, "w", encoding="utf-8") as json_file:
        json.dump(backup_data, json_file, indent=2)


vtt_filename = "audio.vtt"
output_filename = "backup2.json"
main(vtt_filename, output_filename)