fork(fix): Clone AMLL 并修复 BUG

- 将AMLL Clone到本以地进行修复和优化(emm虽然这很不优雅但是暂时无时间做子模块和Fork)
- 修复在当前播放歌词行不可见的视口Seek会出现滚动偏移的问题
This commit is contained in:
lqtmcstudio
2026-06-07 00:02:14 +08:00
parent 783d2c3dee
commit 72f4510dc8
458 changed files with 86075 additions and 1665 deletions

View File

@@ -0,0 +1,5 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`TTML Generator - toTTMLResult > generates TTMLResult from AMLL data and serialize it to XML 1`] = `"<tt xmlns:amll="http://www.example.com/ns/amll" xmlns:itunes="http://music.apple.com/lyric-ttml-internal" xmlns:ttm="http://www.w3.org/ns/ttml#metadata" xmlns:tts="http://www.w3.org/ns/ttml#styling" itunes:timing="Word" xmlns="http://www.w3.org/ns/ttml"><head><metadata><ttm:agent type="person" xml:id="v1"/><ttm:agent type="person" xml:id="v2"/><iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal"><transliterations><transliteration xml:lang="zh-Latn"><text for="L1"><span begin="1.000" end="2.000">ni</span><span begin="2.000" end="3.000">hao</span><span ttm:role="x-bg"><span begin="3.000" end="4.000">(shi</span><span begin="4.000" end="5.000">jie)</span></span></text></transliteration></transliterations></iTunesMetadata><amll:meta key="musicName" value="Test Song"/><amll:meta key="artists" value="Artist A"/><amll:meta key="artists" value="Artist B"/></metadata></head><body dur="3.000"><div begin="1.000" end="3.000"><p begin="1.000" end="3.000" itunes:key="L1" ttm:agent="v1"><span begin="1.000" end="2.000">你</span><span begin="2.000" end="3.000">好</span><span ttm:role="x-translation" xml:lang="en">你好</span><span ttm:role="x-bg" begin="3.000" end="5.000"><span begin="3.000" end="4.000">(世</span><span begin="4.000" end="5.000">界)</span><span ttm:role="x-translation" xml:lang="en">世界</span></span></p></div></body></tt>"`;
exports[`TTML Generator Integration > matches the XML snapshot 1`] = `"<tt xmlns:amll="http://www.example.com/ns/amll" xmlns:itunes="http://music.apple.com/lyric-ttml-internal" xmlns:ttm="http://www.w3.org/ns/ttml#metadata" xmlns:tts="http://www.w3.org/ns/ttml#styling" xml:lang="ja" itunes:timing="Word" xmlns="http://www.w3.org/ns/ttml"><head><metadata><ttm:agent type="person" xml:id="v1"><ttm:name type="full">Vocalist A (Taro)</ttm:name></ttm:agent><ttm:agent type="person" xml:id="v2"><ttm:name type="full">Vocalist B (Hanako)</ttm:name></ttm:agent><ttm:agent type="group" xml:id="v1000"><ttm:name type="full">Chorus Group</ttm:name></ttm:agent><iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal"><transliterations><transliteration xml:lang="ja-Latn"><text for="L1"><span begin="10.000" end="10.500">Ko</span><span begin="10.500" end="10.800">re</span> <span begin="10.800" end="11.000">wa</span> <span begin="11.200" end="11.800">tesuto</span></text><text for="L2"><span begin="15.000" end="15.800">Futatsume</span> <span begin="16.000" end="16.500">no</span> <span begin="16.500" end="17.000">rain</span></text><text for="L3"><span begin="20.000" end="21.500">Kōrasu</span> <span begin="21.500" end="22.000">desu</span><span ttm:role="x-bg"><span begin="22.500" end="23.800">(haikei)</span></span></text></transliteration></transliterations><songwriters><songwriter>作曲者1号</songwriter><songwriter>作曲者2号</songwriter></songwriters></iTunesMetadata><amll:meta key="musicName" value="Complex Test Song"/><amll:meta key="musicName" value="複雑なテストソング"/><amll:meta key="artists" value="Vocalist A (Taro)"/><amll:meta key="artists" value="Vocalist B (Hanako)"/><amll:meta key="album" value="AMLL Parser Test Suite"/><amll:meta key="ncmMusicId" value="123456789"/><amll:meta key="qqMusicId" value="987654321"/><amll:meta key="spotifyId" value="abc123xyz"/><amll:meta key="appleMusicId" value="999888777"/><amll:meta key="isrc" value="JPXX02500001"/><amll:meta key="ttmlAuthorGithub" value="10001"/><amll:meta key="ttmlAuthorGithubLogin" value="TestUser"/></metadata></head><body dur="25.000"><div begin="10.000" end="17.000" itunes:songPart="Verse"><p begin="10.000" end="12.000" itunes:key="L1" ttm:agent="v1"><span begin="10.000" end="10.500" amll:obscene="true">これ</span><span begin="10.500" end="10.800">は</span> <span begin="11.200" end="11.800" amll:empty-beat="5">テスト</span><span ttm:role="x-translation" xml:lang="en-US">This is the first line (Vocalist A)</span><span ttm:role="x-translation" xml:lang="zh-Hans-CN">这是第一行歌词 (演唱者A)</span></p><p begin="15.000" end="17.000" itunes:key="L2" ttm:agent="v2"><span begin="15.000" end="15.800">二つ目</span> <span begin="16.000" end="16.500">の</span> <span begin="16.500" end="17.000">ライン</span><span ttm:role="x-translation" xml:lang="en-US">This is the second line (Vocalist B)</span><span ttm:role="x-translation" xml:lang="zh-Hans-CN">这是第二行歌词 (演唱者B)</span></p></div><div begin="20.000" end="25.000" itunes:songPart="Chorus"><p begin="20.000" end="25.000" itunes:key="L3" ttm:agent="v1000"><span begin="20.000" end="21.500">コーラス</span> <span begin="21.500" end="22.000">です</span><span ttm:role="x-translation" xml:lang="en-US">This is the chorus line</span><span ttm:role="x-translation" xml:lang="zh-Hans-CN">这是合唱部分</span><span ttm:role="x-bg" begin="22.500" end="23.800"><span begin="22.500" end="23.800">(背景)</span><span ttm:role="x-translation" xml:lang="en">Background</span><span ttm:role="x-translation" xml:lang="en-US">With background</span><span ttm:role="x-translation" xml:lang="zh-Hans-CN">带背景音</span><span ttm:role="x-roman" xml:lang="ja-Latn">haikei</span></span></p></div></body></tt>"`;

View File

@@ -0,0 +1,161 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`toAmllLyrics Conversion > computes Apple Music TTML with v2000 other agent correctly in duet alignment 1`] = `
[
"[ 0.36s] 右 [main] : Baby, you've gone far away",
"[ 5.83s] 右 [main] : You've gone far away",
"[ 10.35s] 右 [main] : Baby, you've gone far away",
"[ 15.81s] 右 [main] : You've gone far away",
"[ 20.40s] 左 [main] : 於是茶就這樣冷了",
"[ 25.36s] 左 [main] : 於是天就這樣亮了",
"[ 30.36s] 左 [main] : 於是你就這樣離開了",
"[ 35.38s] 左 [main] : 於是我終於醒了",
"[ 40.35s] 左 [main] : 於是你我從此遠了",
"[ 45.36s] 左 [main] : 於是路從此分岔了",
"[ 50.38s] 左 [main] : 你的臉從此就陌生了",
"[ 54.11s] 左 [main] : Whoa, whoa",
"[ 55.18s] 左 [main] : 雖然我有一點不捨",
"[ 60.37s] 左 [main] : 可是時間不可能停下",
"[ 63.21s] 左 [main] : 不能停下 不會停下",
"[ 65.68s] 左 [main] : 世界一直在變幻著",
"[ 68.20s] 左 [main] : 你也變了 我也變了",
"[ 70.70s] 左 [main] : 過去都已經過去了",
"[ 73.80s] 左 [main] : 既然回不去了 我還在煩惱什麼",
"[ 79.14s] 左 [main] : Oh, whoa",
"[ 80.37s] 左 [main] : 於是告訴自己 不要哭",
"[ 83.20s] 左 [main] : 我不要哭 我不能哭",
"[ 85.69s] 左 [main] : 往前方的路 走一步",
"[ 88.17s] 左 [main] : 再走一步 就會幸福",
"[ 90.69s] 左 [main] : 成長要學會獨處",
"[ 93.81s] 左 [main] : Oh, whoa",
"[ 95.28s] 左 [main] : 雖然有一點孤獨",
"[100.40s] 右 [main] : Baby, you've gone far away",
"[102.52s] 左 [main] : Therefore, I think I should follow, boy",
"[105.72s] 右 [main] : When you've gone far away",
"[107.50s] 左 [main] : Therefore, maybe I'm about to go, yeah, because",
"[110.37s] 右 [main] : Baby, you've gone far away",
"[112.57s] 左 [main] : I don't know, boy, I don't really know why, why",
"[115.68s] 右 [main] : That you've gone far away",
"[117.49s] 左 [main] : Therefore, therefore, I'm now so alone",
"[120.38s] 左 [main] : 於是花一個人種了",
"[125.38s] 左 [main] : 於是夢一個人做了",
"[130.36s] 左 [main] : 於是痛一個人扛下了",
"[133.82s] 左 [main] : Oh, whoa",
"[135.37s] 左 [main] : 快樂一個人笑著",
"[140.35s] 左 [main] : 可是時間不可能停下",
"[143.22s] 左 [main] : 不能停下 不會停下",
"[145.67s] 左 [main] : 世界一直在變幻著",
"[148.21s] 左 [main] : 你也變了 我也變了",
"[150.69s] 左 [main] : 過去都已經過去了",
"[153.76s] 左 [main] : 既然回不去了 我還在煩惱什麼",
"[159.14s] 左 [main] : Oh, whoa",
"[160.39s] 左 [main] : 於是告訴自己 不要哭",
"[163.21s] 左 [main] : 我不要哭 我不能哭",
"[165.70s] 左 [main] : 往前方的路 走一步",
"[168.18s] 左 [main] : 再走一步 就會幸福",
"[170.69s] 左 [main] : 成長要學會獨處",
"[173.82s] 左 [main] : Oh, whoa",
"[175.26s] 左 [main] : 雖然有一點孤獨",
"[180.38s] 右 [main] : Baby, you've gone far away",
"[183.17s] 左 [main] : Ooh, ooh-ooh",
"[185.38s] 右 [main] : Oh, when you've gone far away",
"[187.57s] 左 [main] : Baby, you've gone",
"[189.43s] 左 [main] : Oh, you've gone far away",
"[190.40s] 右 [main] : Baby, you've gone far away",
"[192.57s] 左 [main] : Oh-oh, oh-whoa",
"[195.38s] 右 [main] : Oh, when you've gone far away",
"[196.96s] 左 [main] : Baby, you've gone far away",
"[199.64s] 左 [main] : You've gone far away",
"[200.41s] 右 [main] : Baby, you've gone far away",
"[204.15s] 左 [main] : Oh-ooh",
"[205.38s] 右 [main] : Oh, that you've gone far away",
"[206.32s] 左 [main] : Oh-oh-oh-ooh, oh-oh-oh-ooh",
"[210.39s] 左 [main] : Baby, you've gone far away",
"[213.84s] 左 [main] : Oh, oh",
"[215.58s] 左 [main] : When you've gone far away",
]
`;
exports[`toAmllLyrics Conversion > computes left-right duet layout in Apple Music style with multiple singers correctly in duet alignment 1`] = `
[
"[ 1.15s] 左 [main] : We don't talk about Bruno, no, no, no",
"[ 5.84s] 左 [main] : We don't talk about Bruno, but",
"[ 10.52s] 左 [main] : It was my wedding day",
"[ 11.72s] 左 [bg] : It was our wedding day",
"[ 12.79s] 左 [main] : We were getting ready and there wasn't a cloud in the sky",
"[ 16.94s] 左 [bg] : No clouds allowed in the sky",
"[ 19.26s] 左 [main] : Bruno walks in with a mischievous grin",
"[ 23.12s] 左 [bg] : Thunder",
"[ 24.26s] 左 [main] : You telling this story or am I?",
"[ 26.61s] 左 [bg] : I'm sorry, mi vida, go on",
"[ 28.49s] 左 [main] : Bruno says, "It looks like rain"",
"[ 31.39s] 左 [bg] : Why did he tell us?",
"[ 32.98s] 左 [main] : In doing so, he floods my brain",
"[ 35.71s] 左 [bg] : Abuela, get the umbrellas",
"[ 37.89s] 左 [main] : Married in a hurricane",
"[ 40.66s] 左 [bg] : What a joyous day, but anyway",
"[ 43.13s] 左 [main] : We don't talk about Bruno, no, no, no",
"[ 47.80s] 左 [main] : We don't talk about Bruno",
"[ 51.33s] 右 [main] : Hey, grew to live in fear of Bruno stuttering or stumbling",
"[ 54.20s] 右 [main] : I can always hear him sort of muttering and mumbling",
"[ 56.53s] 右 [main] : I associate him with the sound of falling sand",
"[ 59.42s] 右 [bg] : Ch-ch-ch",
"[ 61.23s] 右 [main] : It's a heavy lift with a gift so humbling",
"[ 63.53s] 右 [main] : Always left Abuela and the family fumbling",
"[ 65.85s] 右 [main] : Grappling with prophecies they couldn't understand",
"[ 69.04s] 右 [main] : Do you understand?",
"[ 70.41s] 左 [main] : Seven-foot frame, rats along his back",
"[ 74.60s] 左 [main] : When he calls your name, it all fades to black",
"[ 79.25s] 左 [main] : Yeah, he sees your dreams and feasts on your screams",
"[ 83.34s] 左 [bg] : Hey",
"[ 85.06s] 左 [main] : We don't talk about Bruno, no, no, no",
"[ 89.71s] 左 [main] : We don't talk about Bruno",
"[ 94.08s] 右 [main] : He told me my fish would die, the next day, dead",
"[ 97.64s] 右 [bg] : No, no",
"[ 98.79s] 左 [main] : He told me I'd grow a gut and just like he said",
"[102.27s] 左 [bg] : No, no",
"[102.38s] 右 [main] : He said that all my hair would disappear, now look at my head",
"[106.99s] 右 [bg] : No, no",
"[108.15s] 左 [main] : Your fate is sealed when your prophecy is read",
"[112.50s] 左 [main] : He told me that the life of my dreams",
"[116.33s] 左 [main] : Would be promised, and someday be mine",
"[121.78s] 左 [main] : He told me that my power would grow",
"[125.54s] 左 [main] : Like the grapes that thrive on the vine",
"[129.04s] 左 [bg] : Óye, Mariano's on his way",
"[131.05s] 右 [main] : He told me that the man of my dreams",
"[134.85s] 右 [main] : Would be just out of reach",
"[137.81s] 右 [main] : Betrothed to another",
"[140.72s] 右 [main] : It's like I hear him now",
"[142.15s] 左 [main] : Hey sis, I want not a sound out of you",
"[145.38s] 右 [main] : It's like I can hear him now",
"[148.30s] 右 [main] : I can hear him now",
"[149.75s] 左 [main] : Um, Bruno",
"[152.32s] 左 [main] : Yeah, about that Bruno",
"[154.23s] 左 [main] : I really need to know about Bruno",
"[156.36s] 左 [main] : Gimme the truth and the whole truth, Bruno",
"[158.42s] 左 [bg] : Isabela, your boyfriend's here",
"[161.75s] 左 [main] : Time for dinner",
"[163.73s] 右 [main] : Seven-foot frame, rats along his back",
"[164.33s] 右 [bg] : It was my wedding day, it was our wedding day",
"[166.63s] 右 [main] : When he calls your name, it all fades to black",
"[166.63s] 右 [bg] : We were getting ready and there wasn't a cloud in the sky",
"[170.70s] 右 [main] : No clouds allowed in the sky",
"[172.48s] 右 [main] : Yeah, he sees your dreams and feasts on your screams",
"[173.07s] 右 [bg] : Bruno walks in with a mischievous grin",
"[176.81s] 右 [main] : You telling this story or am I?",
"[176.81s] 右 [bg] : Thunder",
"[180.35s] 左 [main] : Óye, Mariano's on his way",
"[182.37s] 右 [main] : Bruno says, "It looks like rain"",
"[182.46s] 右 [bg] : He told me that the man of my dreams would be just out of reach",
"[185.21s] 左 [main] : Why did he tell us?",
"[186.74s] 右 [main] : In doing so, he floods my brain",
"[188.57s] 右 [bg] : Betrothed to another, another",
"[191.10s] 右 [main] : Married in a hurricane",
"[191.10s] 右 [bg] : And I'm fine, and I'm fine, and I'm fine, I'm fine",
"[196.05s] 左 [main] : He's here",
"[196.94s] 左 [main] : Don't talk about Bruno",
"[198.97s] 左 [main] : Why did I talk about Bruno?",
"[201.60s] 左 [main] : Not a word about Bruno",
"[203.22s] 左 [main] : I never shoulda brought up Bruno",
]
`;

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,112 @@
<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml"
xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
xmlns:itunes="http://itunes.apple.com/lyric-ttml-extensions"
xmlns:amll="http://www.example.com/ns/amll"
xml:lang="ja"
itunes:timing="Word">
<head>
<metadata>
<ttm:title>Complex Test Song</ttm:title>
<ttm:agent type="person" xml:id="v1">
<ttm:name type="full">Vocalist A (Taro)</ttm:name>
</ttm:agent>
<ttm:agent type="person" xml:id="v2">
<ttm:name type="full">Vocalist B (Hanako)</ttm:name>
</ttm:agent>
<ttm:agent type="group" xml:id="v1000">
<ttm:name type="full">Chorus Group</ttm:name>
</ttm:agent>
<amll:meta key="musicName" value="複雑なテストソング" />
<amll:meta key="artists" value="Vocalist A (Taro)" />
<amll:meta key="artists" value="Vocalist B (Hanako)" />
<amll:meta key="album" value="AMLL Parser Test Suite" />
<amll:meta key="isrc" value="JPXX02500001" />
<amll:meta key="ncmMusicId" value="123456789" />
<amll:meta key="qqMusicId" value="987654321" />
<amll:meta key="spotifyId" value="abc123xyz" />
<amll:meta key="appleMusicId" value="999888777" />
<amll:meta key="ttmlAuthorGithub" value="10001" />
<amll:meta key="ttmlAuthorGithubLogin" value="TestUser" />
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<songwriters>
<songwriter>作曲者1号</songwriter>
<songwriter>作曲者2号</songwriter>
</songwriters>
<translations>
<translation type="subtitle" xml:lang="en-US">
<text for="L1">This is the first line (Vocalist A)</text>
<text for="L2">This is the second line (Vocalist B)</text>
<text for="L3"> This is the chorus line <span ttm:role="x-bg">(With
background)</span>
</text>
</translation>
<translation type="subtitle" xml:lang="zh-Hans-CN">
<text for="L1">这是第一行歌词 (演唱者A)</text>
<text for="L2">这是第二行歌词 (演唱者B)</text>
<text for="L3"> 这是合唱部分 <span ttm:role="x-bg">(带背景音)</span>
</text>
</translation>
</translations>
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L1">
<span begin="00:10.000" end="00:10.500">Ko</span>
<span begin="00:10.500" end="00:10.800">re </span>
<span begin="00:10.800" end="00:11.000">wa </span>
<span begin="00:11.200" end="00:11.800">tesuto</span>
</text>
<text for="L2">
<span begin="00:15.000" end="00:15.800">Futatsume </span>
<span begin="00:16.000" end="00:16.500">no </span>
<span begin="00:16.500" end="00:17.000">rain</span>
</text>
<text for="L3">
<span begin="00:20.000" end="00:21.500">Kōrasu </span>
<span begin="00:21.500" end="00:22.000">desu</span>
<span ttm:role="x-bg">
<span begin="00:22.500" end="00:23.800">(haikei)</span>
</span>
</text>
</transliteration>
</transliterations>
</iTunesMetadata>
</metadata>
</head>
<body dur="00:30.000">
<div begin="00:08.000" end="00:18.000" itunes:song-part="Verse">
<p begin="00:10.000" end="00:12.000" itunes:key="L1" ttm:agent="v1">
<span begin="00:10.000" end="00:10.500" amll:obscene="true">これ</span>
<span begin="00:10.500" end="00:10.800">は </span>
<span begin="00:11.200" end="00:11.800" amll:empty-beat="5">テスト</span>
</p>
<p begin="00:15.000" end="00:17.000" itunes:key="L2" ttm:agent="v2">
<span begin="00:15.000" end="00:15.800">二つ目 </span>
<span begin="00:16.000" end="00:16.500">の </span>
<span begin="00:16.500" end="00:17.000">ライン</span>
</p>
</div>
<div begin="00:19.000" end="00:30.000" itunes:song-part="Chorus">
<p begin="00:20.000" end="00:25.000" itunes:key="L3" ttm:agent="v1000">
<span begin="00:20.000" end="00:21.500">コーラス </span>
<span begin="00:21.500" end="00:22.000">です</span>
<span ttm:role="x-bg" begin="00:22.500" end="00:23.800" ttm:agent="v1">
<span begin="00:22.500" end="00:23.800">(背景)</span>
<span ttm:role="x-translation" xml:lang="en">Background</span>
<span ttm:role="x-roman" xml:lang="ja-Latn">haikei</span>
</span>
</p>
</div>
</body>
</tt>

View File

@@ -0,0 +1,32 @@
<tt xmlns="http://www.w3.org/ns/ttml"
xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
xmlns:tts="http://www.w3.org/ns/ttml#styling"
xmlns:itunes="http://music.apple.com/lyric-ttml-internal"
itunes:timing="Word"
xml:lang="ja">
<head>
<metadata>
<ttm:agent type="person" xml:id="v1" />
</metadata>
</head>
<body dur="28.000">
<div begin="27.000" end="28.000">
<p begin="27.000" end="28.000" itunes:key="L1" ttm:agent="v1">
<span begin="27.000" end="27.500">これは</span>
<span tts:ruby="container">
<span tts:ruby="base">所</span>
<span tts:ruby="textContainer">
<span tts:ruby="text" begin="27.690" end="27.820">しょ</span>
</span>
</span>
<span tts:ruby="container">
<span tts:ruby="base">詮</span>
<span tts:ruby="textContainer">
<span tts:ruby="text" begin="27.820" end="27.880">せ</span>
<span tts:ruby="text" begin="27.880" end="27.950">ん</span>
</span>
</span>
</p>
</div>
</body>
</tt>

View File

@@ -0,0 +1,748 @@
import { readFileSync } from "node:fs";
import { join } from "node:path";
import { DOMImplementation, DOMParser, XMLSerializer } from "@xmldom/xmldom";
import { beforeAll, describe, expect, it } from "vitest";
import type { AmllLyricLine, AmllMetadata, TTMLResult } from "../src/index";
import { TTMLGenerator, TTMLParser, toTTMLResult } from "../src/index";
const XML = readFileSync(
join(import.meta.dirname, "fixtures", "complex-test-song.ttml"),
"utf-8",
);
describe("TTML Generator Integration", () => {
let parser: TTMLParser;
let generator: TTMLGenerator;
let originalResult: TTMLResult;
let generatedXML: string;
let parsedGeneratedResult: TTMLResult;
beforeAll(() => {
parser = new TTMLParser({ domParser: new DOMParser() });
generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
});
originalResult = parser.parse(XML);
generatedXML = generator.generate(originalResult);
parsedGeneratedResult = parser.parse(generatedXML);
});
it("generates an XML string", () => {
expect(generatedXML).toBeDefined();
expect(typeof generatedXML).toBe("string");
expect(generatedXML.length).toBeGreaterThan(0);
expect(generatedXML).toContain("<tt");
expect(generatedXML).toContain("</tt>");
});
it("matches the XML snapshot", () => {
expect(generatedXML).toMatchSnapshot();
});
it("preserves metadata after round-trip generation", () => {
expect(parsedGeneratedResult.metadata.language).toBe(
originalResult.metadata.language,
);
expect(parsedGeneratedResult.metadata.timingMode).toBe(
originalResult.metadata.timingMode,
);
expect(parsedGeneratedResult.metadata.title).toEqual(
originalResult.metadata.title,
);
expect(parsedGeneratedResult.metadata.artist).toEqual(
originalResult.metadata.artist,
);
expect(parsedGeneratedResult.metadata.album).toEqual(
originalResult.metadata.album,
);
expect(parsedGeneratedResult.metadata.isrc).toEqual(
originalResult.metadata.isrc,
);
expect(parsedGeneratedResult.metadata.platformIds).toEqual(
originalResult.metadata.platformIds,
);
expect(parsedGeneratedResult.metadata.authorIds).toEqual(
originalResult.metadata.authorIds,
);
expect(parsedGeneratedResult.metadata.authorNames).toEqual(
originalResult.metadata.authorNames,
);
expect(parsedGeneratedResult.metadata.songwriters).toEqual(
originalResult.metadata.songwriters,
);
expect(parsedGeneratedResult.metadata.agents).toEqual(
originalResult.metadata.agents,
);
});
it("preserves line count after round-trip generation", () => {
expect(parsedGeneratedResult.lines.length).toBe(
originalResult.lines.length,
);
});
it("preserves line content after round-trip generation", () => {
for (let i = 0; i < originalResult.lines.length; i++) {
const originalLine = originalResult.lines[i];
const generatedLine = parsedGeneratedResult.lines[i];
expect(generatedLine.id).toBe(originalLine.id);
expect(generatedLine.startTime).toBe(originalLine.startTime);
expect(generatedLine.endTime).toBe(originalLine.endTime);
expect(generatedLine.agentId).toBe(originalLine.agentId);
expect(generatedLine.songPart).toBe(originalLine.songPart);
expect(generatedLine.text).toBe(originalLine.text);
expect(generatedLine.words?.length).toBe(originalLine.words?.length);
if (originalLine.words && generatedLine.words) {
for (let j = 0; j < originalLine.words.length; j++) {
expect(generatedLine.words[j].text).toBe(originalLine.words[j].text);
expect(generatedLine.words[j].startTime).toBe(
originalLine.words[j].startTime,
);
expect(generatedLine.words[j].endTime).toBe(
originalLine.words[j].endTime,
);
}
}
expect(generatedLine.translations?.length).toBe(
originalLine.translations?.length,
);
if (originalLine.translations && generatedLine.translations) {
for (let j = 0; j < originalLine.translations.length; j++) {
expect(generatedLine.translations[j].language).toBe(
originalLine.translations[j].language,
);
expect(generatedLine.translations[j].text).toBe(
originalLine.translations[j].text,
);
}
}
expect(generatedLine.romanizations?.length).toBe(
originalLine.romanizations?.length,
);
if (originalLine.romanizations && generatedLine.romanizations) {
for (let j = 0; j < originalLine.romanizations.length; j++) {
expect(generatedLine.romanizations[j].language).toBe(
originalLine.romanizations[j].language,
);
expect(generatedLine.romanizations[j].text).toBe(
originalLine.romanizations[j].text,
);
}
}
}
});
it("generates separated sidecar nodes for mismatched languages between main and background", () => {
const mockResult: TTMLResult = {
metadata: { timingMode: "Line" },
lines: [
{
id: "L1",
startTime: 0,
endTime: 1000,
text: "Main",
translations: [{ language: "en", text: "Main English" }],
backgroundVocal: {
startTime: 0,
endTime: 1000,
text: "(Bg)",
translations: [{ language: "es", text: "Bg Spanish" }],
},
},
],
};
const generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
useSidecar: true,
});
const xml = generator.generate(mockResult);
const doc = new DOMParser().parseFromString(xml, "application/xml");
const translations = Array.from(doc.getElementsByTagName("translation"));
const enTrans = translations.find(
(el) => el.getAttribute("xml:lang") === "en",
);
const esTrans = translations.find(
(el) => el.getAttribute("xml:lang") === "es",
);
expect(enTrans).toBeDefined();
expect(esTrans).toBeDefined();
expect(enTrans?.textContent).toContain("Main English");
expect(enTrans?.getElementsByTagName("span").length).toBe(0);
expect(esTrans?.textContent).toContain("Bg Spanish");
expect(esTrans?.textContent).not.toContain("Main English");
const esSpans = esTrans?.getElementsByTagName("span");
expect(esSpans?.length).toBe(1);
expect(esSpans?.[0].getAttribute("ttm:role")).toBe("x-bg");
});
it("ensures inline translations and background vocals do not deeply nest each other", () => {
const mockResult: TTMLResult = {
metadata: { timingMode: "Line" },
lines: [
{
id: "L1",
startTime: 0,
endTime: 1000,
text: "Main Lyric",
translations: [{ language: "en", text: "Main Trans" }],
backgroundVocal: {
startTime: 0,
endTime: 1000,
text: "(Bg Lyric)",
translations: [{ language: "en", text: "Bg Trans" }],
},
},
],
};
const generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
useSidecar: false,
});
const xmlStr = generator.generate(mockResult);
const doc = new DOMParser().parseFromString(xmlStr, "application/xml");
const pNode = doc.getElementsByTagName("p")[0];
const childSpans = Array.from(pNode.childNodes).filter(
(n) => n.nodeType === 1 && n.nodeName.toLowerCase() === "span",
) as unknown as Element[];
const mainTransSpan = childSpans.find(
(span) => span.getAttribute("ttm:role") === "x-translation",
);
const bgSpan = childSpans.find(
(span) => span.getAttribute("ttm:role") === "x-bg",
);
expect(mainTransSpan).toBeDefined();
expect(mainTransSpan?.textContent).toBe("Main Trans");
expect(bgSpan).toBeDefined();
expect(bgSpan?.textContent).toContain("Bg Lyric");
if (!bgSpan) throw new Error();
const nestedTransSpans = Array.from(bgSpan.getElementsByTagName("span"));
expect(nestedTransSpans).toHaveLength(1);
expect(nestedTransSpans[0].getAttribute("ttm:role")).toBe("x-translation");
expect(nestedTransSpans[0].textContent).toBe("Bg Trans");
});
});
describe("TTML Generator - toTTMLResult", () => {
let generator: TTMLGenerator;
let parser: TTMLParser;
beforeAll(() => {
generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
});
parser = new TTMLParser({ domParser: new DOMParser() });
});
it("generates TTMLResult from AMLL data and serialize it to XML", () => {
const amllMetadata: AmllMetadata[] = [
["musicName", ["Test Song"]],
["artists", ["Artist A", "Artist B"]],
];
const amllLines: AmllLyricLine[] = [
{
startTime: 1000,
endTime: 3000,
isBG: false,
isDuet: false,
translatedLyric: "你好",
romanLyric: "ni hao",
words: [
{ startTime: 1000, endTime: 2000, word: "你", romanWord: "ni" },
{ startTime: 2000, endTime: 3000, word: "好", romanWord: "hao" },
],
},
{
startTime: 3000,
endTime: 5000,
isBG: true,
isDuet: false,
translatedLyric: "世界",
romanLyric: "shi jie",
words: [
{ startTime: 3000, endTime: 4000, word: "世", romanWord: "shi" },
{ startTime: 4000, endTime: 5000, word: "界", romanWord: "jie" },
],
},
];
const ttmlResult = toTTMLResult(amllLines, amllMetadata, {
translationLanguage: "en",
romanizationLanguage: "zh-Latn",
});
expect(ttmlResult.metadata.title).toEqual(["Test Song"]);
expect(ttmlResult.metadata.artist).toEqual(["Artist A", "Artist B"]);
expect(ttmlResult.lines.length).toBe(1);
expect(ttmlResult.lines[0].backgroundVocal).toBeDefined();
const xml = generator.generate(ttmlResult);
expect(xml).toContain("<tt");
expect(xml).toContain("Test Song");
expect(xml).toContain("Artist A");
expect(xml).toMatchSnapshot();
const parsed = parser.parse(xml);
expect(parsed.metadata.title).toEqual(["Test Song"]);
expect(parsed.lines.length).toBe(1);
expect(parsed.lines[0].text).toBe("你好");
expect(parsed.lines[0].backgroundVocal?.text).toBe("世界");
});
});
describe("TTML Generator - Line ID Generation", () => {
let generator: TTMLGenerator;
beforeAll(() => {
generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
});
});
const createMockResult = (
lines: Partial<TTMLResult["lines"][0]>[],
): TTMLResult => ({
metadata: { agents: { v1: { id: "v1" } } },
lines: lines as TTMLResult["lines"],
});
it("auto-generates line IDs from L1 when all IDs are missing", () => {
const result = createMockResult([
{ startTime: 0, endTime: 1000, text: "Line 1" },
{ startTime: 1000, endTime: 2000, text: "Line 2" },
]);
const xml = generator.generate(result);
expect(xml).toContain('itunes:key="L1"');
expect(xml).toContain('itunes:key="L2"');
});
it("regenerates all line IDs when only some IDs are provided", () => {
const result = createMockResult([
{ id: "Custom1", startTime: 0, endTime: 1000, text: "Line 1" },
{ startTime: 1000, endTime: 2000, text: "Line 2" },
{ id: "Custom3", startTime: 2000, endTime: 3000, text: "Line 3" },
]);
const xml = generator.generate(result);
expect(xml).not.toContain('"Custom1"');
expect(xml).not.toContain('"Custom3"');
expect(xml).toContain('itunes:key="L1"');
expect(xml).toContain('itunes:key="L2"');
expect(xml).toContain('itunes:key="L3"');
});
it("keeps existing line IDs when all valid IDs are provided", () => {
const result = createMockResult([
{ id: "Custom1", startTime: 0, endTime: 1000, text: "Line 1" },
{ id: "Custom2", startTime: 1000, endTime: 2000, text: "Line 2" },
]);
const xml = generator.generate(result);
expect(xml).toContain('itunes:key="Custom1"');
expect(xml).toContain('itunes:key="Custom2"');
expect(xml).not.toContain('itunes:key="L1"');
});
});
describe("TTML Generator - Agent Inference and Completion", () => {
let generator: TTMLGenerator;
beforeAll(() => {
generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
});
});
it("infers and generate default v1 when meta.agents and line agentId are missing", () => {
const result: TTMLResult = {
metadata: {},
lines: [
{ startTime: 0, endTime: 1000, text: "Line 1" },
{ startTime: 1000, endTime: 2000, text: "Line 2" },
],
};
const xml = generator.generate(result);
expect(xml).toContain('<ttm:agent type="person" xml:id="v1"');
const pTagMatches = xml.match(/ttm:agent="v1"/g);
expect(pTagMatches?.length).toBe(2);
});
it("infers unique agents from line agentIds when meta.agents is missing", () => {
const result: TTMLResult = {
metadata: {},
lines: [
{ agentId: "v1", startTime: 0, endTime: 1000, text: "Line 1" },
{ agentId: "v2", startTime: 1000, endTime: 2000, text: "Line 2" },
{ agentId: "v1", startTime: 2000, endTime: 3000, text: "Line 3" },
],
};
const xml = generator.generate(result);
const v1AgentDeclMatches = xml.match(
/<ttm:agent type="person" xml:id="v1"/g,
);
const v2AgentDeclMatches = xml.match(
/<ttm:agent type="person" xml:id="v2"/g,
);
expect(v1AgentDeclMatches?.length).toBe(1);
expect(v2AgentDeclMatches?.length).toBe(1);
});
it("uses provided meta.agents without auto-inference", () => {
const result: TTMLResult = {
metadata: {
agents: {
v3: { id: "v3", name: "Custom Singer", type: "person" },
},
},
lines: [
{ agentId: "v1", startTime: 0, endTime: 1000, text: "Line 1" },
{ agentId: "v2", startTime: 1000, endTime: 2000, text: "Line 2" },
],
};
const xml = generator.generate(result);
expect(xml).toContain('xml:id="v3"');
expect(xml).toContain("Custom Singer");
expect(xml).not.toContain('<ttm:agent type="person" xml:id="v1"');
expect(xml).not.toContain('<ttm:agent type="person" xml:id="v2"');
expect(xml).toContain(
'<p begin="0.000" end="1.000" itunes:key="L1" ttm:agent="v1">',
);
expect(xml).toContain(
'<p begin="1.000" end="2.000" itunes:key="L2" ttm:agent="v2">',
);
});
it("infers v1000 as a group agent", () => {
const result: TTMLResult = {
metadata: {},
lines: [
{
agentId: "v1",
startTime: 0,
endTime: 1000,
text: "Line 1",
},
{
agentId: "v1000",
startTime: 1000,
endTime: 2000,
text: "Chorus Line",
},
],
};
const xml = generator.generate(result);
expect(xml).toContain('<ttm:agent type="person" xml:id="v1"');
expect(xml).toContain('<ttm:agent type="group" xml:id="v1000"');
expect(xml).toContain(
'<p begin="0.000" end="1.000" itunes:key="L1" ttm:agent="v1">',
);
expect(xml).toContain(
'<p begin="1.000" end="2.000" itunes:key="L2" ttm:agent="v1000">',
);
});
});
describe("TTML Generator - Ruby Generation", () => {
let generator: TTMLGenerator;
let parser: TTMLParser;
beforeAll(() => {
generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
});
parser = new TTMLParser({ domParser: new DOMParser() });
});
it("generates ruby XML with tts namespace and four-level nesting for round-trip", () => {
const rubyResult: TTMLResult = {
metadata: {
title: ["Ruby Generation Test"],
},
lines: [
{
id: "L1",
startTime: 27000,
endTime: 28000,
text: "これは所詮",
words: [
{ text: "これは", startTime: 27000, endTime: 27500 },
{
text: "所",
startTime: 27690,
endTime: 27820,
ruby: [{ text: "しょ", startTime: 27690, endTime: 27820 }],
},
{
text: "詮",
startTime: 27820,
endTime: 27950,
ruby: [
{ text: "せ", startTime: 27820, endTime: 27880 },
{ text: "ん", startTime: 27880, endTime: 27950 },
],
},
],
},
],
};
const xml = generator.generate(rubyResult);
expect(xml).toContain('xmlns:tts="http://www.w3.org/ns/ttml#styling"');
expect(xml).toContain('tts:ruby="container"');
expect(xml).toContain('tts:ruby="base"');
expect(xml).toContain('tts:ruby="textContainer"');
expect(xml).toContain('tts:ruby="text"');
expect(xml).toContain('begin="27.000" end="27.500">これは</span>');
const parsedResult = parser.parse(xml);
const parsedLine = parsedResult.lines[0];
expect(parsedLine.text).toBe("これは所詮");
expect(parsedLine.words).toBeDefined();
expect(parsedLine.words).toHaveLength(3);
const rubyWord1 = parsedLine.words?.[1];
expect(rubyWord1?.text).toBe("所");
expect(rubyWord1?.ruby).toHaveLength(1);
expect(rubyWord1?.ruby?.[0]).toMatchObject({
text: "しょ",
startTime: 27690,
endTime: 27820,
});
const rubyWord2 = parsedLine.words?.[2];
expect(rubyWord2?.text).toBe("詮");
expect(rubyWord2?.ruby).toHaveLength(2);
expect(rubyWord2?.ruby?.[0]).toMatchObject({
text: "せ",
startTime: 27820,
endTime: 27880,
});
expect(rubyWord2?.ruby?.[1]).toMatchObject({
text: "ん",
startTime: 27880,
endTime: 27950,
});
});
});
describe("TTML Generator - Obscene words", () => {
let generator: TTMLGenerator;
let parser: TTMLParser;
beforeAll(() => {
generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
});
parser = new TTMLParser({ domParser: new DOMParser() });
});
it("injects amll:obscene in generated XML and supports round-trip", () => {
const result: TTMLResult = {
metadata: {},
lines: [
{
id: "L1",
startTime: 0,
endTime: 3000,
text: "bad word rubyBad",
words: [
{ text: "bad", startTime: 0, endTime: 1000, obscene: true },
{
text: "word",
startTime: 1000,
endTime: 2000,
endsWithSpace: true,
},
{
text: "rubyBad",
startTime: 2000,
endTime: 3000,
obscene: true,
ruby: [{ text: "rb", startTime: 2000, endTime: 3000 }],
},
],
},
],
};
const xml = generator.generate(result);
expect(xml).toContain('amll:obscene="true">bad</span>');
expect(xml).not.toContain('amll:obscene="true">word</span>');
expect(xml).toContain('tts:ruby="container" amll:obscene="true"');
const parsed = parser.parse(xml);
const parsedWords = parsed.lines[0].words;
expect(parsedWords).toBeDefined();
expect(parsedWords).toHaveLength(3);
expect(parsedWords?.[0].obscene).toBe(true);
expect(parsedWords?.[1].obscene).toBeUndefined();
expect(parsedWords?.[2].obscene).toBe(true);
});
it("restores obscene from AMLL fallback structure", () => {
const amllLines: AmllLyricLine[] = [
{
startTime: 0,
endTime: 1000,
isBG: false,
isDuet: false,
translatedLyric: "",
romanLyric: "",
words: [
{
startTime: 0,
endTime: 500,
word: "bad ",
romanWord: "",
obscene: true,
},
{ startTime: 500, endTime: 1000, word: "word", romanWord: "" },
],
},
];
const ttmlResult = toTTMLResult(amllLines, []);
const words = ttmlResult.lines[0].words;
expect(words).toBeDefined();
expect(words).toHaveLength(2);
expect(words?.[0].text).toBe("bad");
expect(words?.[0].endsWithSpace).toBe(true);
expect(words?.[0].obscene).toBe(true);
expect(words?.[1].text).toBe("word");
expect(words?.[1].endsWithSpace).toBe(false);
expect(words?.[1].obscene).toBeUndefined();
});
});
describe("TTML Generator - Empty Beat", () => {
let generator: TTMLGenerator;
let parser: TTMLParser;
beforeAll(() => {
generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
});
parser = new TTMLParser({ domParser: new DOMParser() });
});
it("injects amll:empty-beat in generated XML and support round-trip", () => {
const result: TTMLResult = {
metadata: {},
lines: [
{
id: "L1",
startTime: 0,
endTime: 2000,
text: "wait word",
words: [
{ text: "wait", startTime: 0, endTime: 1000, emptyBeat: 4 },
{ text: "word", startTime: 1000, endTime: 2000 },
],
},
],
};
const xml = generator.generate(result);
expect(xml).toContain('amll:empty-beat="4">wait</span>');
expect(xml.match(/amll:empty-beat/g)?.length).toBe(1);
const parsed = parser.parse(xml);
const parsedWords = parsed.lines[0].words;
expect(parsedWords).toBeDefined();
expect(parsedWords).toHaveLength(2);
expect(parsedWords?.[0].emptyBeat).toBe(4);
expect(parsedWords?.[1].emptyBeat).toBeUndefined();
});
it("restores emptyBeat from AMLL fallback structure", () => {
const amllLines: AmllLyricLine[] = [
{
startTime: 0,
endTime: 1000,
isBG: false,
isDuet: false,
translatedLyric: "",
romanLyric: "",
words: [
{
startTime: 0,
endTime: 500,
word: "wait ",
romanWord: "",
emptyBeat: 8,
},
{ startTime: 500, endTime: 1000, word: "word", romanWord: "" },
],
},
];
const ttmlResult = toTTMLResult(amllLines, []);
const words = ttmlResult.lines[0].words;
expect(words).toBeDefined();
expect(words).toHaveLength(2);
expect(words?.[0].text).toBe("wait");
expect(words?.[0].emptyBeat).toBe(8);
expect(words?.[1].text).toBe("word");
expect(words?.[1].emptyBeat).toBeUndefined();
});
});

View File

@@ -0,0 +1,883 @@
import { readFileSync } from "node:fs";
import { join } from "node:path";
import { DOMImplementation, DOMParser, XMLSerializer } from "@xmldom/xmldom";
import { beforeAll, describe, expect, it } from "vitest";
import type { AmllLyricLine, SubLyricContent, TTMLResult } from "../src/index";
import {
TTMLGenerator,
TTMLParser,
toAmllLyrics,
toTTMLResult,
} from "../src/index";
const XML = readFileSync(
join(import.meta.dirname, "fixtures", "complex-test-song.ttml"),
"utf-8",
);
const RUBY_XML = readFileSync(
join(import.meta.dirname, "fixtures", "ruby-test-song.ttml"),
"utf-8",
);
describe("TTML Integration Test", () => {
let parser: TTMLParser;
let result: TTMLResult;
beforeAll(() => {
parser = new TTMLParser({ domParser: new DOMParser() });
result = parser.parse(XML);
});
const getLine = (id: string) => {
const line = result.lines.find((l) => l.id === id);
if (!line) throw new Error(`找不到 ID 为 ${id} 的歌词行`);
return line;
};
const getTranslation = (
item: { translations?: SubLyricContent[] },
lang: string,
) => {
const trans = item.translations?.find((t) => t.language === lang);
if (!trans) throw new Error(`未找到语言为 ${lang} 的翻译`);
return trans;
};
const getRomanization = (
item: { romanizations?: SubLyricContent[] },
lang: string,
) => {
const roman = item.romanizations?.find((r) => r.language === lang);
if (!roman) throw new Error(`未找到语言为 ${lang} 的音译`);
return roman;
};
it("parses global language and timing mode", () => {
expect(result.metadata.language).toBe("ja");
expect(result.metadata.timingMode).toBe("Word");
expect(result.metadata.title).toHaveLength(2);
expect(result.metadata.title).toEqual([
"Complex Test Song",
"複雑なテストソング",
]);
});
it("parses platform IDs", () => {
expect(result.metadata.platformIds?.ncmMusicId).toContain("123456789");
expect(result.metadata.platformIds?.qqMusicId).toContain("987654321");
expect(result.metadata.platformIds?.spotifyId).toContain("abc123xyz");
expect(result.metadata.platformIds?.appleMusicId).toContain("999888777");
});
it("parses artists list", () => {
expect(result.metadata.artist).toHaveLength(2);
expect(result.metadata.artist).toContain("Vocalist A (Taro)");
expect(result.metadata.artist).toContain("Vocalist B (Hanako)");
});
it("builds an agent map", () => {
expect(result.metadata.agents?.v1?.name).toBe("Vocalist A (Taro)");
expect(result.metadata.agents?.v1000?.name).toBe("Chorus Group");
});
it("parses songwriters list", () => {
expect(result.metadata.songwriters).toBeInstanceOf(Array);
expect(result.metadata.songwriters).toHaveLength(2);
expect(result.metadata.songwriters).toContain("作曲者1号");
expect(result.metadata.songwriters).toContain("作曲者2号");
});
it("parses ISRC", () => {
expect(result.metadata.isrc).toBeInstanceOf(Array);
expect(result.metadata.isrc).toContain("JPXX02500001");
});
it("parses verse and agent in L1", () => {
const l1 = getLine("L1");
expect(l1.songPart).toBe("Verse");
expect(l1.agentId).toBe("v1");
});
it("merges translations from Head in L1", () => {
const l1 = getLine("L1");
const transEn = getTranslation(l1, "en-US");
const transZh = getTranslation(l1, "zh-Hans-CN");
expect(transEn.text).toBe("This is the first line (Vocalist A)");
expect(transZh.text).toBe("这是第一行歌词 (演唱者A)");
});
it("merges word-level romanization from Head", () => {
const l1 = getLine("L1");
const roman = getRomanization(l1, "ja-Latn");
expect(roman.words).toBeInstanceOf(Array);
expect(roman.words).toMatchObject([
{ text: "Ko", startTime: 10000, endTime: 10500, endsWithSpace: false },
{ text: "re", startTime: 10500, endTime: 10800, endsWithSpace: true },
{ text: "wa", startTime: 10800, endTime: 11000, endsWithSpace: true },
{
text: "tesuto",
startTime: 11200,
endTime: 11800,
endsWithSpace: false,
},
]);
});
it("handles explicit whitespace spans in L1", () => {
const l1 = getLine("L1");
expect(l1.words).toMatchObject([
{ text: "これ" },
{ text: "は", endsWithSpace: true },
{ text: "テスト" },
]);
});
it("handles complex background vocal nesting in L3", () => {
const l3 = getLine("L3");
expect(l3.songPart).toBe("Chorus");
expect(l3.agentId).toBe("v1000");
expect(l3.text).toContain("コーラス です");
expect(l3.backgroundVocal).toBeDefined();
const bg = l3.backgroundVocal;
if (!bg) throw new Error("背景人声数组中未找到数据");
expect(bg.text).toBe("背景");
const transEn = getTranslation(bg, "en");
expect(transEn.text).toBe("Background");
const roman = getRomanization(bg, "ja-Latn");
expect(roman.text).toBe("haikei");
});
it("keeps both inline Body translation (en) and Head translation (en-US) in L3", () => {
const l3 = getLine("L3");
const bg = l3.backgroundVocal;
if (!bg) throw new Error("背景人声数组中未找到数据");
const transEn = getTranslation(bg, "en");
expect(transEn.text).toBe("Background");
const transEnUS = getTranslation(bg, "en-US");
expect(transEnUS.text).toBe("With background");
});
it("parses all lyric lines", () => {
expect(result.lines).toBeInstanceOf(Array);
expect(result.lines).toHaveLength(3);
const lineIds = result.lines.map((l) => l.id);
expect(lineIds).toContain("L1");
expect(lineIds).toContain("L2");
expect(lineIds).toContain("L3");
});
it("parses the second line in L2", () => {
const l2 = getLine("L2");
expect(l2.songPart).toBe("Verse");
expect(l2.agentId).toBe("v2");
expect(l2.text).toContain("二つ目");
expect(l2.text).toContain("の");
expect(l2.text).toContain("ライン");
});
it("parses word-level timings in L2", () => {
const l2 = getLine("L2");
expect(l2.words).toMatchObject([
{ text: "二つ目", startTime: 15000, endTime: 15800, endsWithSpace: true },
{ text: "の", startTime: 16000, endTime: 16500, endsWithSpace: true },
{ text: "ライン", startTime: 16500, endTime: 17000 },
]);
});
it("validates time ranges for all lines", () => {
const l1 = getLine("L1");
expect(l1.startTime).toBe(10000);
expect(l1.endTime).toBe(12000);
const l2 = getLine("L2");
expect(l2.startTime).toBe(15000);
expect(l2.endTime).toBe(17000);
const l3 = getLine("L3");
expect(l3.startTime).toBe(20000);
expect(l3.endTime).toBe(25000);
});
it("validates word-level timing accuracy in L1", () => {
const l1 = getLine("L1");
expect(l1.words).toMatchObject([
{ startTime: 10000, endTime: 10500 },
{ startTime: 10500, endTime: 10800 },
{ startTime: 11200, endTime: 11800 },
]);
});
it("parses album metadata", () => {
expect(result.metadata.album).toBeInstanceOf(Array);
expect(result.metadata.album).toHaveLength(1);
expect(result.metadata.album?.[0]).toBe("AMLL Parser Test Suite");
});
it("parses author metadata", () => {
expect(result.metadata.authorIds).toBeInstanceOf(Array);
expect(result.metadata.authorIds).toHaveLength(1);
expect(result.metadata.authorIds?.[0]).toBe("10001");
expect(result.metadata.authorNames).toBeInstanceOf(Array);
expect(result.metadata.authorNames).toHaveLength(1);
expect(result.metadata.authorNames?.[0]).toBe("TestUser");
});
it("merges translations and romanization in L2", () => {
const l2 = getLine("L2");
const transEn = getTranslation(l2, "en-US");
const transZh = getTranslation(l2, "zh-Hans-CN");
expect(transEn.text).toBe("This is the second line (Vocalist B)");
expect(transZh.text).toBe("这是第二行歌词 (演唱者B)");
const roman = getRomanization(l2, "ja-Latn");
expect(roman.words).toBeInstanceOf(Array);
expect(roman.words).toHaveLength(3);
});
it("parses word-level romanization timings in L2", () => {
const l2 = getLine("L2");
const roman = getRomanization(l2, "ja-Latn");
expect(roman.words).toMatchObject([
{
text: "Futatsume",
startTime: 15000,
endTime: 15800,
endsWithSpace: true,
},
{ text: "no", startTime: 16000, endTime: 16500, endsWithSpace: true },
{ text: "rain", startTime: 16500, endTime: 17000 },
]);
});
it("parses word-level timings for main lyrics in L3", () => {
const l3 = getLine("L3");
expect(l3.words).toMatchObject([
{
text: "コーラス",
startTime: 20000,
endTime: 21500,
endsWithSpace: true,
},
{ text: "です", startTime: 21500, endTime: 22000 },
]);
});
it("parses background vocal timing and words in L3", () => {
const l3 = getLine("L3");
const bg = l3.backgroundVocal;
if (!bg) throw new Error("未找到背景人声");
expect(bg.startTime).toBe(22500);
expect(bg.endTime).toBe(23800);
expect(bg.words).toMatchObject([
{ text: "背景", startTime: 22500, endTime: 23800 },
]);
});
it("parses word-level timings for background romanization in L3", () => {
const l3 = getLine("L3");
const bg = l3.backgroundVocal;
if (!bg) throw new Error("未找到背景人声");
const roman = bg.romanizations?.find(
(r) => r.language === "ja-Latn" && r.words && r.words.length > 0,
);
if (!roman) throw new Error("未找到包含字级别数据的 ja-Latn 音译");
expect(roman.words).toMatchObject([
{ text: "haikei", startTime: 22500, endTime: 23800 },
]);
});
it("keeps both inline Body romanization and Head sidecar word-level romanization in L3", () => {
const l3 = getLine("L3");
const bg = l3.backgroundVocal;
if (!bg) throw new Error("未找到背景人声");
const jaRomans =
bg.romanizations?.filter((r) => r.language === "ja-Latn") || [];
expect(jaRomans.length).toBeGreaterThanOrEqual(2);
const inlineRoman = jaRomans.find((r) => !r.words || r.words.length === 0);
expect(inlineRoman?.text).toBe("haikei");
const sidecarRoman = jaRomans.find((r) => r.words && r.words.length > 0);
expect(sidecarRoman?.words).toMatchObject([
{ text: "haikei", startTime: 22500, endTime: 23800 },
]);
});
it("parses background role markers in translations in L3", () => {
const l3 = getLine("L3");
const transEn = getTranslation(l3, "en-US");
expect(transEn.text).toContain("This is the chorus line");
const transZh = getTranslation(l3, "zh-Hans-CN");
expect(transZh.text).toContain("这是合唱部分");
});
it("flattens background vocal data in L3 translations", () => {
const l3 = getLine("L3");
const mainTranslation = getTranslation(l3, "en-US");
expect("backgroundVocal" in mainTranslation).toBe(false);
const bg = l3.backgroundVocal;
expect(bg).toBeDefined();
if (!bg) throw new Error();
const bgTranslation = getTranslation(bg, "en-US");
expect(bgTranslation).toBeDefined();
expect(bgTranslation.text).toBe("With background");
});
it("composes full text correctly", () => {
expect(getLine("L1").text).toBe("これは テスト");
expect(getLine("L2").text).toBe("二つ目 の ライン");
expect(getLine("L3").text).toBe("コーラス です");
});
it("maps all vocalists correctly", () => {
expect(result.metadata.agents).toBeDefined();
expect(Object.keys(result.metadata.agents ?? {})).toHaveLength(3);
expect(result.metadata.agents?.v1?.name).toBe("Vocalist A (Taro)");
expect(result.metadata.agents?.v2?.name).toBe("Vocalist B (Hanako)");
expect(result.metadata.agents?.v1000?.name).toBe("Chorus Group");
});
it("parses merged romanized text", () => {
const l1 = getLine("L1");
const roman = getRomanization(l1, "ja-Latn");
expect(roman.text).toBe("Kore wa tesuto");
});
it("parses merged translation text", () => {
const l1 = getLine("L1");
expect(getTranslation(l1, "en-US").text).toBe(
"This is the first line (Vocalist A)",
);
expect(getTranslation(l1, "zh-Hans-CN").text).toBe(
"这是第一行歌词 (演唱者A)",
);
});
it("parses obscene marker on regular syllables (amll:obscene) in L1", () => {
const l1 = getLine("L1");
expect(l1.words).toBeDefined();
expect(l1.words?.[0].text).toBe("これ");
expect(l1.words?.[0].obscene).toBe(true);
expect(l1.words?.[1].text).toBe("は");
expect(l1.words?.[1].obscene).toBeUndefined();
});
it("parses empty-beat marker on regular syllables (amll:empty-beat) in L1", () => {
const l1 = getLine("L1");
expect(l1.words).toBeDefined();
expect(l1.words?.[2].text).toBe("テスト");
expect(l1.words?.[2].emptyBeat).toBe(5);
expect(l1.words?.[1].text).toBe("は");
expect(l1.words?.[1].emptyBeat).toBeUndefined();
});
it("ensures all timings are valid numbers", () => {
for (const line of result.lines) {
expect(typeof line.startTime).toBe("number");
expect(typeof line.endTime).toBe("number");
expect(line.startTime).toBeGreaterThanOrEqual(0);
expect(line.endTime).toBeGreaterThan(line.startTime);
line.words?.forEach((word) => {
expect(typeof word.startTime).toBe("number");
expect(typeof word.endTime).toBe("number");
expect(word.startTime).toBeGreaterThanOrEqual(0);
expect(word.endTime).toBeGreaterThanOrEqual(word.startTime);
});
if (line.backgroundVocal) {
expect(typeof line.backgroundVocal.startTime).toBe("number");
expect(typeof line.backgroundVocal.endTime).toBe("number");
expect(line.backgroundVocal.startTime).toBeGreaterThanOrEqual(0);
expect(line.backgroundVocal.endTime).toBeGreaterThan(
line.backgroundVocal.startTime,
);
}
}
});
it("ensures all text fields are valid strings", () => {
for (const line of result.lines) {
expect(typeof line.text).toBe("string");
expect(line.text.length).toBeGreaterThan(0);
expect(typeof line.id).toBe("string");
expect(line.id?.length).toBeGreaterThan(0);
line.words?.forEach((word) => {
expect(typeof word.text).toBe("string");
expect(word.text.length).toBeGreaterThan(0);
});
}
});
it("preserve the data structure in Parse -> Generate -> Parse round-trip", () => {
const originalResult = parser.parse(XML);
const generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
useSidecar: false,
});
const generatedXML = generator.generate(originalResult);
const roundTripParser = new TTMLParser({ domParser: new DOMParser() });
const roundTripResult = roundTripParser.parse(generatedXML);
expect(roundTripResult).toEqual(originalResult);
});
it("handles translation containing ONLY background vocals", () => {
const xml = `
<tt xmlns="http://www.w3.org/ns/ttml"
xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
<body>
<div>
<p begin="0s" end="1s" itunes:key="L1">
Main Lyric
<span ttm:role="x-bg">(Bg Lyric)</span>
<span ttm:role="x-translation" xml:lang="en">
<span ttm:role="x-bg">Only Bg Translation</span>
</span>
</p>
</div>
</body>
</tt>
`;
const customParser = new TTMLParser({ domParser: new DOMParser() });
const res = customParser.parse(xml);
const l1 = res.lines[0];
expect(l1).toMatchObject({
translations: undefined,
backgroundVocal: {
translations: [
{
text: "Only Bg Translation",
language: "en",
},
],
},
});
});
});
describe("toAmllLyrics Conversion", () => {
let parser: TTMLParser;
let result: TTMLResult;
let amllLines: AmllLyricLine[];
beforeAll(() => {
parser = new TTMLParser({ domParser: new DOMParser() });
result = parser.parse(XML);
amllLines = toAmllLyrics(result).lines;
});
it("converts to a flattened array", () => {
expect(amllLines).toBeInstanceOf(Array);
expect(amllLines).toHaveLength(4);
});
it("is sorted correctly", () => {
for (let i = 0; i < amllLines.length - 1; i++) {
expect(amllLines[i].startTime).toBeLessThanOrEqual(
amllLines[i + 1].startTime,
);
}
});
it("preserves word alignment for L1", () => {
const l1 = amllLines[0];
expect(l1.words).toMatchObject([
{ romanWord: "Ko" },
{ romanWord: "re" },
{ romanWord: "tesuto" },
]);
});
it("does not align romanization to nearby punctuation with different end time", () => {
const xml = `
<tt xmlns="http://www.w3.org/ns/ttml"
xmlns:itunes="http://music.apple.com/lyric-ttml-internal"
xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
<head>
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L26">
<span begin="2:00.250" end="2:00.690" xmlns="http://www.w3.org/ns/ttml">ko</span>
<span begin="2:00.695" end="2:00.870" xmlns="http://www.w3.org/ns/ttml">u</span>
<span begin="2:01.070" end="2:01.470" xmlns="http://www.w3.org/ns/ttml">ki</span>
</text>
</transliteration>
</transliterations>
</iTunesMetadata>
</head>
<body>
<div>
<p begin="1:57.890" end="2:03.860" itunes:key="L26" ttm:agent="v1">
<span begin="2:00.250" end="2:00.690">処</span>
<span begin="2:00.690" end="2:00.695">、</span>
<span begin="2:00.695" end="2:00.870">浮</span>
<span begin="2:01.070" end="2:01.470">き</span>
</p>
</div>
</body>
</tt>
`;
const customParser = new TTMLParser({ domParser: new DOMParser() });
const lines = toAmllLyrics(customParser.parse(xml)).lines;
expect(lines[0].words).toMatchObject([
{ word: "処", romanWord: "ko" },
{ word: "、", romanWord: "" },
{ word: "浮", romanWord: "u" },
{ word: "き", romanWord: "ki" },
]);
});
it("handles duet flags", () => {
expect(amllLines[0].isDuet).toBe(false);
expect(amllLines[1].isDuet).toBe(true);
expect(amllLines[2].isDuet).toBe(false);
});
it("sets the isBG flag", () => {
const bgLine = amllLines[3];
expect(bgLine.isBG).toBe(true);
expect(bgLine.translatedLyric).toBe("Background");
expect(bgLine.romanLyric).toBe("haikei");
});
it("passes through obscene to AmllLyricWord", () => {
const l1 = amllLines[0];
expect(l1.words[0].word).toBe("これ");
expect(l1.words[0].obscene).toBe(true);
expect(l1.words[1].word).toBe("は ");
expect(l1.words[1].obscene).toBeUndefined();
});
it("passes through emptyBeat to AmllLyricWord", () => {
const l1 = amllLines[0];
expect(l1.words[2].word).toBe("テスト");
expect(l1.words[2].emptyBeat).toBe(5);
expect(l1.words[1].word).toBe("は ");
expect(l1.words[1].emptyBeat).toBeUndefined();
});
const toLayoutSnapshot = (lines: AmllLyricLine[]) =>
lines.map((line) => {
const time = (line.startTime / 1000).toFixed(2).padStart(6, " ");
const position = line.isDuet ? "右" : "左";
const typeMark = line.isBG ? "[bg]" : "[main]";
const text = line.words
.map((w) => w.word)
.join("")
.trim();
return `[${time}s] ${position} ${typeMark} : ${text}`;
});
it.each([
[
"left-right duet layout in Apple Music style with multiple singers",
"apple-music-duet.ttml",
],
["Apple Music TTML with v2000 other agent", "apple-music-other-duet.ttml"],
])("computes %s correctly in duet alignment", (_, fixture) => {
const xml = readFileSync(
join(import.meta.dirname, "fixtures", fixture),
"utf-8",
);
const lines = toAmllLyrics(parser.parse(xml)).lines;
expect(toLayoutSnapshot(lines)).toMatchSnapshot();
});
it("converts Syllable.ruby to AmllLyricWord.ruby", () => {
const mockRubyResult: TTMLResult = {
metadata: {},
lines: [
{
startTime: 0,
endTime: 1000,
text: "所詮",
words: [
{
text: "所",
startTime: 0,
endTime: 500,
ruby: [{ text: "しょ", startTime: 0, endTime: 500 }],
},
{
text: "詮",
startTime: 500,
endTime: 1000,
ruby: [
{ text: "せ", startTime: 500, endTime: 750 },
{ text: "ん", startTime: 750, endTime: 1000 },
],
},
],
},
],
};
const lines = toAmllLyrics(mockRubyResult).lines;
expect(lines[0].words[0].ruby).toBeDefined();
expect(lines[0].words[0].ruby).toMatchObject([
{ word: "しょ", startTime: 0, endTime: 500 },
]);
expect(lines[0].words[1].ruby).toBeDefined();
expect(lines[0].words[1].ruby).toMatchObject([
{ word: "せ", startTime: 500, endTime: 750 },
{ word: "ん", startTime: 750, endTime: 1000 },
]);
});
});
describe("TTML Ruby Integration Test", () => {
let parser: TTMLParser;
let result: TTMLResult;
beforeAll(() => {
parser = new TTMLParser({ domParser: new DOMParser() });
result = parser.parse(RUBY_XML);
});
it("parses full line text including ruby base text", () => {
const l1 = result.lines.find((l) => l.id === "L1");
expect(l1).toBeDefined();
expect(l1?.text).toBe("これは所詮");
});
it("extracts ruby containers as standalone syllables with inferred timings", () => {
const l1 = result.lines.find((l) => l.id === "L1");
const words = l1?.words;
expect(words).toBeDefined();
expect(words).toHaveLength(3);
expect(words?.[0].text).toBe("これは");
expect(words?.[0].startTime).toBe(27000);
expect(words?.[1].text).toBe("所");
expect(words?.[1].startTime).toBe(27690);
expect(words?.[1].endTime).toBe(27820);
expect(words?.[2].text).toBe("詮");
expect(words?.[2].startTime).toBe(27820);
expect(words?.[2].endTime).toBe(27950);
});
it("extracts ruby annotation arrays (RubyTags)", () => {
const l1 = result.lines.find((l) => l.id === "L1");
const words = l1?.words;
const ruby1 = words?.[1].ruby;
expect(ruby1).toBeDefined();
expect(ruby1).toHaveLength(1);
expect(ruby1?.[0]).toMatchObject({
text: "しょ",
startTime: 27690,
endTime: 27820,
});
const ruby2 = words?.[2].ruby;
expect(ruby2).toBeDefined();
expect(ruby2).toHaveLength(2);
expect(ruby2?.[0]).toMatchObject({
text: "せ",
startTime: 27820,
endTime: 27880,
});
expect(ruby2?.[1]).toMatchObject({
text: "ん",
startTime: 27880,
endTime: 27950,
});
});
it("excludes ruby from regular syllables", () => {
const l1 = result.lines.find((l) => l.id === "L1");
const words = l1?.words;
expect(words?.[0].ruby).toBeUndefined();
});
});
describe("toTTMLResult Conversion", () => {
it("converts AmllLyricWord.ruby to Syllable.ruby", () => {
const mockAmllLines: AmllLyricLine[] = [
{
startTime: 0,
endTime: 1000,
isBG: false,
isDuet: false,
translatedLyric: "",
romanLyric: "",
words: [
{
word: "所",
startTime: 0,
endTime: 500,
ruby: [{ word: "しょ", startTime: 0, endTime: 500 }],
},
{
word: "詮",
startTime: 500,
endTime: 1000,
ruby: [
{ word: "せ", startTime: 500, endTime: 750 },
{ word: "ん", startTime: 750, endTime: 1000 },
],
},
],
},
];
const result = toTTMLResult(mockAmllLines, []);
const words = result.lines[0].words;
expect(words).toBeDefined();
expect(words?.[0].ruby).toBeDefined();
expect(words?.[0].ruby).toMatchObject([
{ text: "しょ", startTime: 0, endTime: 500 },
]);
expect(words?.[1].ruby).toBeDefined();
expect(words?.[1].ruby).toMatchObject([
{ text: "せ", startTime: 500, endTime: 750 },
{ text: "ん", startTime: 750, endTime: 1000 },
]);
});
});
describe("TTML BlockIndex Boundary Tests", () => {
let parser: TTMLParser;
let result: TTMLResult;
beforeAll(() => {
parser = new TTMLParser({ domParser: new DOMParser() });
const xml = readFileSync(
join(import.meta.dirname, "fixtures", "apple-music-duet.ttml"),
"utf-8",
);
result = parser.parse(xml);
});
it("assigns an incremental blockIndex to parsed lines", () => {
let previousBlockIndex = 0;
for (const line of result.lines) {
expect(line.blockIndex).toBeDefined();
expect(line.blockIndex).toBeTypeOf("number");
expect(line.blockIndex).toBeGreaterThanOrEqual(previousBlockIndex);
if (line.blockIndex !== undefined) {
previousBlockIndex = line.blockIndex;
}
}
});
it("preserves continuous block boundaries in Generator round-trip", () => {
const generator = new TTMLGenerator({
domImplementation: new DOMImplementation(),
xmlSerializer: new XMLSerializer(),
useSidecar: true,
});
const generatedXML = generator.generate(result);
const roundTripParser = new TTMLParser({ domParser: new DOMParser() });
const roundTripResult = roundTripParser.parse(generatedXML);
const originalBlocks = new Set(result.lines.map((l) => l.blockIndex));
const roundTripBlocks = new Set(
roundTripResult.lines.map((l) => l.blockIndex),
);
expect(roundTripBlocks.size).toBe(originalBlocks.size);
expect(roundTripResult.lines.map((l) => l.blockIndex)).toEqual(
result.lines.map((l) => l.blockIndex),
);
});
it("distinguishes adjacent containers with the same songPart using exact blockIndex assertions", () => {
const getLine = (id: string) => {
const line = result.lines.find((l) => l.id === id);
if (!line) throw new Error(`找不到 ID 为 ${id} 的歌词行`);
return line;
};
expect(getLine("L18").songPart).toBe("Verse");
expect(getLine("L18").blockIndex).toBe(4);
expect(getLine("L19").songPart).toBe("Verse");
expect(getLine("L19").blockIndex).toBe(5);
expect(getLine("L27").songPart).toBe("Verse");
expect(getLine("L27").blockIndex).toBe(7);
expect(getLine("L28").songPart).toBe("Verse");
expect(getLine("L28").blockIndex).toBe(8);
expect(getLine("L31").songPart).toBe("Verse");
expect(getLine("L31").blockIndex).toBe(8);
expect(getLine("L32").songPart).toBe("Verse");
expect(getLine("L32").blockIndex).toBe(9);
expect(getLine("L38").songPart).toBe("Verse");
expect(getLine("L38").blockIndex).toBe(9);
expect(getLine("L39").songPart).toBe("Verse");
expect(getLine("L39").blockIndex).toBe(10);
expect(getLine("L43").songPart).toBe("Verse");
expect(getLine("L43").blockIndex).toBe(10);
expect(getLine("L44").songPart).toBe("Verse");
expect(getLine("L44").blockIndex).toBe(11);
expect(getLine("L48").songPart).toBe("Verse");
expect(getLine("L48").blockIndex).toBe(11);
expect(getLine("L49").songPart).toBe("Verse");
expect(getLine("L49").blockIndex).toBe(12);
});
});