Added basic test coverage for VttConverter.

This commit is contained in:
Abdulmhsen B. A. A.
2024-09-16 21:24:09 +03:00
parent 631b8f2060
commit 7cbbe627b1
5 changed files with 225 additions and 30 deletions

View File

@@ -6,6 +6,7 @@ use InvalidArgumentException;
/**
* Class VttConverter
*
* Based on {@link https://github.com/mantas-done/subtitles/blob/master/src/Code/Converters/VttConverter.php}
*/
final readonly class VttConverter
@@ -64,7 +65,7 @@ final readonly class VttConverter
$textLine = $line;
// speaker
$speaker = null;
if (preg_match('/<v(?: (.*?))?>((?:.*?)<\/v>)/', $textLine, $matches)) {
if (preg_match('~^<v(?: (.*?))?>(.+)(</v>)?~', $textLine, $matches)) {
$speaker = $matches[1] ?? null;
$textLine = $matches[2];
}
@@ -141,13 +142,15 @@ final readonly class VttConverter
$parts[0] = 2 === substr_count($parts[0], ':') ? $parts[0] : '00:' . $parts[0];
if (!isset($parts[1])) {
throw new InvalidArgumentException("Invalid timestamp - time doesn't have milliseconds: " . $vtt_time);
throw new InvalidArgumentException(r("Invalid timestamp - time doesn't have milliseconds: '{time}'.", [
'time' => $vtt_time
]));
}
$only_seconds = strtotime("1970-01-01 {$parts[0]} UTC");
$onlySeconds = strtotime("1970-01-01 {$parts[0]} UTC");
$milliseconds = (float)('0.' . $parts[1]);
return $only_seconds + $milliseconds;
return $onlySeconds + $milliseconds;
}
private static function internalTimeToVtt($internal_time): string
@@ -163,61 +166,64 @@ final readonly class VttConverter
{
$lines = mb_split("\n", $content);
$lines = array_map('trim', $lines);
$new_lines = [];
$is_comment = false;
$newLines = [];
$isComment = false;
foreach ($lines as $line) {
if ($is_comment && strlen($line)) {
if ($isComment && strlen($line)) {
continue;
}
if (str_starts_with($line, 'NOTE ')) {
$is_comment = true;
if (true === str_starts_with($line, 'NOTE ')) {
$isComment = true;
continue;
}
$is_comment = false;
$new_lines[] = $line;
$isComment = false;
$newLines[] = $line;
}
return implode("\n", $new_lines);
return implode("\n", $newLines);
}
private static function getLineParts($line, $colon_count, $timestamp_count)
private static function getLineParts(string $line, int $colonCount, int $timestampCount): array
{
$matches = [
'start' => null,
'end' => null,
'text' => null,
];
$timestamps = self::timestampsFromLine($line);
// there shouldn't be any text before the timestamp
// if there is text before it, then it is not a timestamp
$right_timestamp = '';
if (isset($timestamps['start']) && (substr_count($timestamps['start'], ':') >= $colon_count || substr_count(
$rightTimestamp = '';
if (isset($timestamps['start']) && (substr_count($timestamps['start'], ':') >= $colonCount || substr_count(
$timestamps['start'],
';'
) >= $colon_count)) {
$text_before_timestamp = substr($line, 0, strpos($line, $timestamps['start']));
if (!self::hasText($text_before_timestamp)) {
) >= $colonCount)) {
$textBeforeTimestamp = substr($line, 0, strpos($line, $timestamps['start']));
if (!self::hasText($textBeforeTimestamp)) {
// start
$matches['start'] = $timestamps['start'];
$right_timestamp = $matches['start'];
if ($timestamp_count === 2 && isset($timestamps['end']) && (substr_count(
$rightTimestamp = $matches['start'];
if ($timestampCount === 2 && isset($timestamps['end']) && (substr_count(
$timestamps['end'],
':'
) >= $colon_count || substr_count($timestamps['end'], ';') >= $colon_count)) {
) >= $colonCount || substr_count($timestamps['end'], ';') >= $colonCount)) {
// end
$matches['end'] = $timestamps['end'];
$right_timestamp = $matches['end'];
$rightTimestamp = $matches['end'];
}
}
}
// check if there is any text after the timestamp
if ($right_timestamp) {
$tmp_parts = explode($right_timestamp, $line); // if start and end timestamp are equals
$right_text = end($tmp_parts); // take text after the end timestamp
if (self::hasText($right_text) || self::hasDigit($right_text)) {
$matches['text'] = trim($right_text);
if ($rightTimestamp) {
$tmpParts = explode($rightTimestamp, $line); // if start and end timestamp are equals
$rightText = end($tmpParts); // take text after the end timestamp
if (self::hasText($rightText) || self::hasDigit($rightText)) {
$matches['text'] = trim($rightText);
}
} else {
$matches['text'] = $line;
@@ -226,7 +232,7 @@ final readonly class VttConverter
return $matches;
}
private static function timestampsFromLine(string $line)
private static function timestampsFromLine(string $line): array
{
preg_match_all(self::TIME_FORMAT . 'm', $line, $timestamps);
@@ -244,8 +250,8 @@ final readonly class VttConverter
}
if ($result['start']) {
$text_before_timestamp = substr($line, 0, strpos($line, $result['start']));
if (self::hasText($text_before_timestamp)) {
$textBeforeTimestamp = substr($line, 0, strpos($line, $result['start']));
if (self::hasText($textBeforeTimestamp)) {
$result = [
'start' => null,
'end' => null,

View File

@@ -0,0 +1,19 @@
WEBVTT
00:00:00.498 --> 00:00:02.827
<v Speaker01>- Johnny, where are you?</v>
00:00:02.827 --> 00:00:06.383
<v Speaker02>- Over here</v>
<v Speaker01>- Where?</v>
00:00:06.383 --> 00:00:09.427
Oh, there you are!
00:00:09.427 --> 00:00:12.600
Come over here.
I want to read to you.
00:00:12.600 --> 00:00:16.900
I have your favorite book:
Green Eggs and Ham

View File

@@ -0,0 +1,51 @@
[
{
"start": 0.498,
"end": 2.827,
"lines": [
"- Johnny, where are you?"
],
"vtt": {
"speakers": [
"Speaker01"
]
}
},
{
"start": 2.827,
"end": 6.383,
"lines": [
"- Over here",
"- Where?"
],
"vtt": {
"speakers": [
"Speaker02",
"Speaker01"
]
}
},
{
"start": 6.383,
"end": 9.427,
"lines": [
"Oh, there you are!"
]
},
{
"start": 9.427,
"end": 12.6,
"lines": [
"Come over here.",
"I want to read to you."
]
},
{
"start": 12.6,
"end": 16.9,
"lines": [
"I have your favorite book:",
"Green Eggs and Ham"
]
}
]

View File

@@ -0,0 +1,59 @@
WEBVTT
Kind: captions
Language: en-US
Channel: CC1
Station: Online ABC
ProgramID: SH010855880000
ProgramType: TV series
ProgramName: Castle
Title: Law & Murder
Season: 3
Episode: 19
PublishDate: 2011-03-28
ContentAdvisory: TV-14
STYLE
/* Default cue styling */
::cue {
background-image: linear-gradient(to bottom, dimgray, lightgray);
color: blue;
}
/* Classes that can be applied to individual cues or phrases */
::cue(.bg-yellow) {
background-color: yellow;
}
::cue(.green) {
color: green;
}
NOTE
Copyright (c) 2016 by XYZ Company
All rights reserved
NOTE - Revisions
05/10/2016 09:20 AM - Revision 1.0 - First draft. TBD: Positioning
05/13/2016 06:13 PM - Revision 1.1 - Positioning completed.
05/14/2016 12:25 PM - Revision 1.2 - Review completed. Final draft.
NOTE ==== Beginning of Cues ====
00:00:00.498 --> 00:00:02.827
<v Speaker01>- Johnny, where are you?
00:00:02.827 --> 00:00:06.383
<v Speaker02>- <i>Over here</i></v>
<v Speaker01>- Where?</v>
00:00:06.383 --> 00:00:09.427
Oh, <b>there</b> you are!
00:00:09.427 --> 00:00:12.600
Come over here.
I want to read to you.
00:00:12.600 --> 00:00:16.900
I have your favorite book:
<u>Green Eggs and Ham</u>
NOTE ==== End of file ====

View File

@@ -0,0 +1,60 @@
<?php
declare(strict_types=1);
namespace Tests\Libs;
use App\Libs\Stream;
use App\Libs\TestCase;
use App\Libs\VttConverter;
class VttConverterTest extends TestCase
{
protected function getData(): string
{
return (string)Stream::make(__DIR__ . '/../Fixtures/subtitle.vtt', 'r');
}
protected function getExportedData(): string
{
return (string)Stream::make(__DIR__ . '/../Fixtures/subtitle.exported.vtt', 'r');
}
protected function getJSON(): array
{
return json_decode((string)Stream::make(__DIR__ . '/../Fixtures/subtitle.json', 'r'), true);
}
public function test_parse()
{
$data = VttConverter::parse($this->getData());
$this->assertEquals($this->getJSON(), $data, 'Failed to parse VTT file');
$this->assertEquals(
trim($this->getExportedData()),
trim(VttConverter::export($data)),
'Failed to export VTT file'
);
}
public function test_exceptions()
{
$this->checkException(
closure: function () {
$text = <<<VTT
WEBVTT
00:00:14 --> 00:00:21
test
VTT;
$data = VttConverter::parse($text);
dump($data);
return $data;
},
reason: 'Invalid VTT file',
exception: \InvalidArgumentException::class,
);
}
}