commit 02e68a4c764537f911f5f17a1b82cdb9c101903e from: Oliver Lowe date: Mon Jul 28 11:28:55 2025 UTC m3u8: parse segment title from EXTINF Resolves: https://github.com/untangledco/streaming/issues/41 commit - a42aeb2b574ec7a0b514a5f5bc08485de1c54bde commit + 02e68a4c764537f911f5f17a1b82cdb9c101903e blob - 0e22fc020003e7247af625a953ad3f112aa4c8f8 blob + bced687c45bbfae7a935c71e155313e42365ccd0 --- m3u8/lex.go +++ m3u8/lex.go @@ -256,6 +256,7 @@ func lexAttrValue(l *lexer) stateFn { } func lexNumber(l *lexer) stateFn { +Loop: for { switch r := l.peek(); r { case 'x', '@': @@ -274,11 +275,27 @@ func lexNumber(l *lexer) stateFn { return lexRawString(l) default: l.emit(itemNumber) - return lexAttrs(l) + break Loop } } + + // Hack to lex segment titles. A title can be any UTF-8 text until a newline! + // Titles are an exception since they don't follow any of the + // other rules for text in HLS tags. Since titles can only be + // placed after the segment duration we jam our lexing workaround here. + // The lexer is too clever/complicated and I'm not smart enough to handle anything smarter. + // TODO(otl): good case for using a more basic line-by-line parser + if strings.HasPrefix(l.input, tagSegmentDuration) { + if l.peek() == ',' { + l.next() + l.emit(itemComma) + return lexSegmentTitle(l) + } + } + return lexAttrs(l) } + func lexQString(l *lexer) stateFn { for { r := l.next() @@ -301,3 +318,14 @@ func lexRawString(l *lexer) stateFn { l.emit(itemString) return lexAttrs(l) } + +func lexSegmentTitle(l *lexer) stateFn { + for { + if l.peek() == '\n' { + break + } + l.next() + } + l.emit(itemString) + return lexStart(l) +} blob - 42e7b755a242e367ee4becedb63bba89ecd28c46 blob + 1b880fac89073ac5d06fe7a1787457f38dec870e --- m3u8/m3u8.go +++ m3u8/m3u8.go @@ -44,6 +44,10 @@ type Segment struct { // Duration of this specific segment from the EXTINF tag. Duration time.Duration + // Title is an optional human-readable name of the segment + // from the EXTINF tag. + Title string + // Indicates this segment holds a subset of the segment point to by URI. // Range is the length of the subsegment from the EXT-X-BYTERANGE tag. Range ByteRange blob - da1832535bcf1ffd0759decdc5da937d5e849971 blob + de46b716975920fe124b3fd9d63d978052071800 --- m3u8/parse_test.go +++ m3u8/parse_test.go @@ -125,7 +125,7 @@ func TestVariant(t *testing.T) { func TestParseDuration(t *testing.T) { want := 9967000 * time.Microsecond it := item{typ: itemNumber, val: "9.967"} - dur, err := parseSegmentDuration(it) + dur, err := parseSegmentDuration(it.val) if err != nil { t.Fatal(err) } blob - 0a40a92ff5f85cbd5b98a03af439c521889bd4ee blob + cc0f04e96633932538c776a3676665f8d35363f7 --- m3u8/segment.go +++ m3u8/segment.go @@ -58,11 +58,26 @@ func parseSegment(items chan item, leading item) (*Seg switch it.val { case tagSegmentDuration: it = <-segItems - dur, err := parseSegmentDuration(it) + if it.typ != itemAttrName && it.typ != itemNumber { + return nil, fmt.Errorf("parse segment duration: unexpected %s: want attribute name or number", it) + } + dur, err := parseSegmentDuration(it.val) if err != nil { return nil, fmt.Errorf("parse segment duration: %w", err) } seg.Duration = dur + + // check for the optional segment title + it = <-segItems + if it.typ == itemNewline { + continue + } else if it.typ != itemComma { + return nil, fmt.Errorf("expected comma after segment duration, got %s", it) + } + it = <-segItems + seg.Title = it.val + + case tagByteRange: it = <-segItems r, err := parseByteRange(it.val) @@ -98,25 +113,23 @@ func parseSegment(items chan item, leading item) (*Seg return nil, fmt.Errorf("no url") } -func parseSegmentDuration(it item) (time.Duration, error) { - if it.typ != itemAttrName && it.typ != itemNumber { - return 0, fmt.Errorf("got %s: want attribute name or number", it) - } +func parseSegmentDuration(s string) (time.Duration, error) { // Some numbers can be converted straight to ints, e.g.: // 10 // 10.000 // Others need to be converted from floating point, e.g: // 9.967 // Try the easiest paths first. - if !strings.Contains(it.val, ".") { - i, err := strconv.Atoi(it.val) + if !strings.Contains(s, ".") { + i, err := strconv.Atoi(s) if err != nil { return 0, err } return time.Duration(i) * time.Second, nil } + // 10.000 - before, after, _ := strings.Cut(it.val, ".") + before, after, _ := strings.Cut(s, ".") var allZeroes = true for r := range after { if r != '0' { @@ -130,7 +143,7 @@ func parseSegmentDuration(it item) (time.Duration, err } return time.Duration(i) * time.Second, nil } - seconds, err := strconv.ParseFloat(it.val, 32) + seconds, err := strconv.ParseFloat(s, 32) if err != nil { return 0, err } @@ -285,7 +298,11 @@ func (seg *Segment) MarshalText() ([]byte, error) { } us := seg.Duration / time.Microsecond // we do .03f for the same precision as test-streams.mux.dev. - tags = append(tags, fmt.Sprintf("%s:%.03f", tagSegmentDuration, float32(us)/1e6)) + durTag := fmt.Sprintf("%s:%.03f", tagSegmentDuration, float32(us)/1e6) + if seg.Title != "" { + durTag += ","+seg.Title + } + tags = append(tags, durTag) tags = append(tags, seg.URI) return []byte(strings.Join(tags, "\n")), nil } blob - 06f9851ab3e4f2273de6dc4d044dc7b4c2a95a2b blob + 1c8ee76631a6fbd6327c4203fadd742f50ae9660 --- m3u8/segment_test.go +++ m3u8/segment_test.go @@ -42,6 +42,15 @@ func TestMarshalSegments(t *testing.T) { }, "#EXT-X-BYTERANGE:69@420\n#EXTINF:2.000\nvid.ts", }, + { + "title", + Segment{ + Duration: 2 * time.Second, + URI: "1.fmp4", + Title: "first", + }, + "#EXTINF:2.000,first\n1.fmp4", + }, } for _, tt := range cases { t.Run(tt.name, func(t *testing.T) { @@ -123,3 +132,29 @@ func TestParseSegment(t *testing.T) { t.Errorf("decode encrypted segment: got %v, want %v", plist.Segments[0], encrypted) } } + +func TestSegmentTitles(t *testing.T) { + f, err := os.Open("testdata/segment_titles.m3u8") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + plist, err := Decode(f) + if err != nil { + t.Fatalf("decode %s: %v", f.Name(), err) + } + + want := Segment{ + Duration: 6 * time.Second, + Title: "second", + URI: "002.fmp4", + } + + if want.Title != plist.Segments[1].Title { + t.Errorf("second segment title = %s, want %s", plist.Segments[1].Title, want.Title) + } + if plist.Segments[0].Title != "" || plist.Segments[2].Title != "" { + t.Errorf("unexpected non-empty titles in parsed segments") + } +} blob - /dev/null blob + 5e5a161cf3a9b609942cd8e55b79ce50cb537085 (mode 644) --- /dev/null +++ m3u8/testdata/segment_titles.m3u8 @@ -0,0 +1,7 @@ +#EXTM3U +#EXTINF:6.000 +001.fmp4 +#EXTINF:6.000,second +002.fmp4 +#EXTINF:6.000, +003.fmp4