|
|
@ -1551,42 +1551,52 @@ class InfoExtractor(object): |
|
|
|
|
|
|
|
def extract_multisegment_info(element, ms_parent_info): |
|
|
|
ms_info = ms_parent_info.copy() |
|
|
|
|
|
|
|
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some |
|
|
|
# common attributes and elements. We will only extract relevant |
|
|
|
# for us. |
|
|
|
def extract_common(source): |
|
|
|
segment_timeline = source.find(_add_ns('SegmentTimeline')) |
|
|
|
if segment_timeline is not None: |
|
|
|
s_e = segment_timeline.findall(_add_ns('S')) |
|
|
|
if s_e: |
|
|
|
ms_info['total_number'] = 0 |
|
|
|
ms_info['s'] = [] |
|
|
|
for s in s_e: |
|
|
|
r = int(s.get('r', 0)) |
|
|
|
ms_info['total_number'] += 1 + r |
|
|
|
ms_info['s'].append({ |
|
|
|
't': int(s.get('t', 0)), |
|
|
|
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) |
|
|
|
'd': int(s.attrib['d']), |
|
|
|
'r': r, |
|
|
|
}) |
|
|
|
start_number = source.get('startNumber') |
|
|
|
if start_number: |
|
|
|
ms_info['start_number'] = int(start_number) |
|
|
|
timescale = source.get('timescale') |
|
|
|
if timescale: |
|
|
|
ms_info['timescale'] = int(timescale) |
|
|
|
segment_duration = source.get('duration') |
|
|
|
if segment_duration: |
|
|
|
ms_info['segment_duration'] = int(segment_duration) |
|
|
|
|
|
|
|
def extract_Initialization(source): |
|
|
|
initialization = source.find(_add_ns('Initialization')) |
|
|
|
if initialization is not None: |
|
|
|
ms_info['initialization_url'] = initialization.attrib['sourceURL'] |
|
|
|
|
|
|
|
segment_list = element.find(_add_ns('SegmentList')) |
|
|
|
if segment_list is not None: |
|
|
|
extract_common(segment_list) |
|
|
|
extract_Initialization(segment_list) |
|
|
|
segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) |
|
|
|
if segment_urls_e: |
|
|
|
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e] |
|
|
|
initialization = segment_list.find(_add_ns('Initialization')) |
|
|
|
if initialization is not None: |
|
|
|
ms_info['initialization_url'] = initialization.attrib['sourceURL'] |
|
|
|
else: |
|
|
|
segment_template = element.find(_add_ns('SegmentTemplate')) |
|
|
|
if segment_template is not None: |
|
|
|
start_number = segment_template.get('startNumber') |
|
|
|
if start_number: |
|
|
|
ms_info['start_number'] = int(start_number) |
|
|
|
segment_timeline = segment_template.find(_add_ns('SegmentTimeline')) |
|
|
|
if segment_timeline is not None: |
|
|
|
s_e = segment_timeline.findall(_add_ns('S')) |
|
|
|
if s_e: |
|
|
|
ms_info['total_number'] = 0 |
|
|
|
ms_info['s'] = [] |
|
|
|
for s in s_e: |
|
|
|
r = int(s.get('r', 0)) |
|
|
|
ms_info['total_number'] += 1 + r |
|
|
|
ms_info['s'].append({ |
|
|
|
't': int(s.get('t', 0)), |
|
|
|
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) |
|
|
|
'd': int(s.attrib['d']), |
|
|
|
'r': r, |
|
|
|
}) |
|
|
|
else: |
|
|
|
timescale = segment_template.get('timescale') |
|
|
|
if timescale: |
|
|
|
ms_info['timescale'] = int(timescale) |
|
|
|
segment_duration = segment_template.get('duration') |
|
|
|
if segment_duration: |
|
|
|
ms_info['segment_duration'] = int(segment_duration) |
|
|
|
extract_common(segment_template) |
|
|
|
media_template = segment_template.get('media') |
|
|
|
if media_template: |
|
|
|
ms_info['media_template'] = media_template |
|
|
@ -1594,11 +1604,14 @@ class InfoExtractor(object): |
|
|
|
if initialization: |
|
|
|
ms_info['initialization_url'] = initialization |
|
|
|
else: |
|
|
|
initialization = segment_template.find(_add_ns('Initialization')) |
|
|
|
if initialization is not None: |
|
|
|
ms_info['initialization_url'] = initialization.attrib['sourceURL'] |
|
|
|
extract_Initialization(segment_template) |
|
|
|
return ms_info |
|
|
|
|
|
|
|
def combine_url(base_url, target_url): |
|
|
|
if re.match(r'^https?://', target_url): |
|
|
|
return target_url |
|
|
|
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) |
|
|
|
|
|
|
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) |
|
|
|
formats = [] |
|
|
|
for period in mpd_doc.findall(_add_ns('Period')): |
|
|
@ -1655,9 +1668,7 @@ class InfoExtractor(object): |
|
|
|
} |
|
|
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) |
|
|
|
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: |
|
|
|
if 'total_number' not in representation_ms_info and 'segment_duration': |
|
|
|
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale']) |
|
|
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) |
|
|
|
|
|
|
|
media_template = representation_ms_info['media_template'] |
|
|
|
media_template = media_template.replace('$RepresentationID$', representation_id) |
|
|
|
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) |
|
|
@ -1666,7 +1677,11 @@ class InfoExtractor(object): |
|
|
|
|
|
|
|
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ |
|
|
|
# can't be used at the same time |
|
|
|
if '%(Number' in media_template: |
|
|
|
if '%(Number' in media_template and 's' not in representation_ms_info: |
|
|
|
segment_duration = None |
|
|
|
if 'total_number' not in representation_ms_info and 'segment_duration': |
|
|
|
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) |
|
|
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) |
|
|
|
representation_ms_info['segment_urls'] = [ |
|
|
|
media_template % { |
|
|
|
'Number': segment_number, |
|
|
@ -1675,28 +1690,65 @@ class InfoExtractor(object): |
|
|
|
for segment_number in range( |
|
|
|
representation_ms_info['start_number'], |
|
|
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])] |
|
|
|
representation_ms_info['fragments'] = [{ |
|
|
|
'url': media_template % { |
|
|
|
'Number': segment_number, |
|
|
|
'Bandwidth': representation_attrib.get('bandwidth'), |
|
|
|
}, |
|
|
|
'duration': segment_duration, |
|
|
|
} for segment_number in range( |
|
|
|
representation_ms_info['start_number'], |
|
|
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])] |
|
|
|
else: |
|
|
|
# $Number*$ or $Time$ in media template with S list available |
|
|
|
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg |
|
|
|
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 |
|
|
|
representation_ms_info['segment_urls'] = [] |
|
|
|
representation_ms_info['fragments'] = [] |
|
|
|
segment_time = 0 |
|
|
|
segment_d = None |
|
|
|
segment_number = representation_ms_info['start_number'] |
|
|
|
|
|
|
|
def add_segment_url(): |
|
|
|
representation_ms_info['segment_urls'].append( |
|
|
|
media_template % { |
|
|
|
'Time': segment_time, |
|
|
|
'Bandwidth': representation_attrib.get('bandwidth'), |
|
|
|
} |
|
|
|
) |
|
|
|
segment_url = media_template % { |
|
|
|
'Time': segment_time, |
|
|
|
'Bandwidth': representation_attrib.get('bandwidth'), |
|
|
|
'Number': segment_number, |
|
|
|
} |
|
|
|
representation_ms_info['segment_urls'].append(segment_url) |
|
|
|
representation_ms_info['fragments'].append({ |
|
|
|
'url': segment_url, |
|
|
|
'duration': float_or_none(segment_d, representation_ms_info['timescale']), |
|
|
|
}) |
|
|
|
|
|
|
|
for num, s in enumerate(representation_ms_info['s']): |
|
|
|
segment_time = s.get('t') or segment_time |
|
|
|
segment_d = s['d'] |
|
|
|
add_segment_url() |
|
|
|
segment_number += 1 |
|
|
|
for r in range(s.get('r', 0)): |
|
|
|
segment_time += s['d'] |
|
|
|
segment_time += segment_d |
|
|
|
add_segment_url() |
|
|
|
segment_time += s['d'] |
|
|
|
segment_number += 1 |
|
|
|
segment_time += segment_d |
|
|
|
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: |
|
|
|
# No media template |
|
|
|
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI |
|
|
|
# or any YouTube dashsegments video |
|
|
|
fragments = [] |
|
|
|
s_num = 0 |
|
|
|
for segment_url in representation_ms_info['segment_urls']: |
|
|
|
s = representation_ms_info['s'][s_num] |
|
|
|
for r in range(s.get('r', 0) + 1): |
|
|
|
fragments.append({ |
|
|
|
'url': segment_url, |
|
|
|
'duration': float_or_none(s['d'], representation_ms_info['timescale']), |
|
|
|
}) |
|
|
|
representation_ms_info['fragments'] = fragments |
|
|
|
if 'segment_urls' in representation_ms_info: |
|
|
|
f.update({ |
|
|
|
'segment_urls': representation_ms_info['segment_urls'], |
|
|
|
'fragments': [], |
|
|
|
'protocol': 'http_dash_segments', |
|
|
|
}) |
|
|
|
if 'initialization_url' in representation_ms_info: |
|
|
@ -1706,6 +1758,10 @@ class InfoExtractor(object): |
|
|
|
}) |
|
|
|
if not f.get('url'): |
|
|
|
f['url'] = initialization_url |
|
|
|
f['fragments'].append({'url': initialization_url}) |
|
|
|
f['fragments'].extend(representation_ms_info['fragments']) |
|
|
|
for fragment in f['fragments']: |
|
|
|
fragment['url'] = combine_url(base_url, fragment['url']) |
|
|
|
try: |
|
|
|
existing_format = next( |
|
|
|
fo for fo in formats |
|
|
|