Browse Source

[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038
totalwebcasting
Yen Chi Hsuan 10 years ago
parent
commit
4e33577173
1 changed files with 7 additions and 4 deletions
  1. +7
    -4
      youtube_dl/utils.py

+ 7
- 4
youtube_dl/utils.py View File

@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):
def dfxp2srt(dfxp_data): def dfxp2srt(dfxp_data):
_x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
_x = functools.partial(xpath_with_ns, ns_map={
'ttml': 'http://www.w3.org/ns/ttml',
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
})
def parse_node(node): def parse_node(node):
str_or_empty = functools.partial(str_or_none, default='') str_or_empty = functools.partial(str_or_none, default='')
@ -1849,9 +1852,9 @@ def dfxp2srt(dfxp_data):
out = str_or_empty(node.text) out = str_or_empty(node.text)
for child in node: for child in node:
if child.tag in (_x('ttml:br'), 'br'):
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
out += '\n' + str_or_empty(child.tail) out += '\n' + str_or_empty(child.tail)
elif child.tag in (_x('ttml:span'), 'span'):
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
out += str_or_empty(parse_node(child)) out += str_or_empty(parse_node(child))
else: else:
out += str_or_empty(xml.etree.ElementTree.tostring(child)) out += str_or_empty(xml.etree.ElementTree.tostring(child))
@ -1860,7 +1863,7 @@ def dfxp2srt(dfxp_data):
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
out = [] out = []
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
if not paras: if not paras:
raise ValueError('Invalid dfxp/TTML subtitle') raise ValueError('Invalid dfxp/TTML subtitle')


Loading…
Cancel
Save