Spaces:
Sleeping
Sleeping
| # Function to convert segments to dictionaries | |
| from faster_whisper.transcribe import Segment, Word | |
| # Function to dump a Word instance to a dictionary | |
| def word_to_dict(word: Word) -> dict: | |
| return { | |
| "start": word.start, | |
| "end": word.end, | |
| "word": word.word, | |
| "probability": word.probability | |
| } | |
| # Function to load a Word instance from a dictionary | |
| def dict_to_word(data: dict) -> Word: | |
| return Word( | |
| start=data["start"], | |
| end=data["end"], | |
| word=data["word"], | |
| probability=data["probability"] | |
| ) | |
| # Function to dump a Segment instance to a dictionary | |
| def segment_to_dict(segment: Segment) -> dict: | |
| return { | |
| "id": segment.id, | |
| "seek": segment.seek, | |
| "start": segment.start, | |
| "end": segment.end, | |
| "text": segment.text, | |
| "tokens": segment.tokens, | |
| "temperature": segment.temperature, | |
| "avg_logprob": segment.avg_logprob, | |
| "compression_ratio": segment.compression_ratio, | |
| "no_speech_prob": segment.no_speech_prob, | |
| "words": [word_to_dict(word) for word in segment.words] if segment.words else None | |
| } | |
| # Function to load a Segment instance from a dictionary | |
| def dict_to_segment(data: dict) -> Segment: | |
| return Segment( | |
| id=data["id"], | |
| seek=data["seek"], | |
| start=data["start"], | |
| end=data["end"], | |
| text=data["text"], | |
| tokens=data["tokens"], | |
| temperature=data["temperature"], | |
| avg_logprob=data["avg_logprob"], | |
| compression_ratio=data["compression_ratio"], | |
| no_speech_prob=data["no_speech_prob"], | |
| words=[dict_to_word(word) for word in data["words"]] if data["words"] else None | |
| ) | |
| def get_raw_words_from_segments(segments: list[Segment]) -> str: | |
| return " ".join( | |
| word.word | |
| for segment in segments if segment.words | |
| for word in segment.words | |
| ) |