|
|
@ -8,10 +8,10 @@ from ..utils import ( |
|
|
|
ExtractorError, |
|
|
|
find_xpath_attr, |
|
|
|
unified_strdate, |
|
|
|
determine_ext, |
|
|
|
get_element_by_id, |
|
|
|
get_element_by_attribute, |
|
|
|
int_or_none, |
|
|
|
qualities, |
|
|
|
) |
|
|
|
|
|
|
|
# There are different sources of video in arte.tv, the extraction process |
|
|
@ -102,79 +102,54 @@ class ArteTVPlus7IE(InfoExtractor): |
|
|
|
'upload_date': unified_strdate(upload_date_str), |
|
|
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), |
|
|
|
} |
|
|
|
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) |
|
|
|
|
|
|
|
all_formats = [] |
|
|
|
formats = [] |
|
|
|
for format_id, format_dict in player_info['VSR'].items(): |
|
|
|
fmt = dict(format_dict) |
|
|
|
fmt['format_id'] = format_id |
|
|
|
all_formats.append(fmt) |
|
|
|
# Some formats use the m3u8 protocol |
|
|
|
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) |
|
|
|
def _match_lang(f): |
|
|
|
if f.get('versionCode') is None: |
|
|
|
return True |
|
|
|
# Return true if that format is in the language of the url |
|
|
|
if lang == 'fr': |
|
|
|
l = 'F' |
|
|
|
elif lang == 'de': |
|
|
|
l = 'A' |
|
|
|
else: |
|
|
|
l = lang |
|
|
|
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] |
|
|
|
return any(re.match(r, f['versionCode']) for r in regexes) |
|
|
|
# Some formats may not be in the same language as the url |
|
|
|
# TODO: Might want not to drop videos that does not match requested language |
|
|
|
# but to process those formats with lower precedence |
|
|
|
formats = filter(_match_lang, all_formats) |
|
|
|
formats = list(formats) # in python3 filter returns an iterator |
|
|
|
if not formats: |
|
|
|
# Some videos are only available in the 'Originalversion' |
|
|
|
# they aren't tagged as being in French or German |
|
|
|
# Sometimes there are neither videos of requested lang code |
|
|
|
# nor original version videos available |
|
|
|
# For such cases we just take all_formats as is |
|
|
|
formats = all_formats |
|
|
|
if not formats: |
|
|
|
raise ExtractorError('The formats list is empty') |
|
|
|
|
|
|
|
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: |
|
|
|
def sort_key(f): |
|
|
|
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) |
|
|
|
else: |
|
|
|
def sort_key(f): |
|
|
|
versionCode = f.get('versionCode') |
|
|
|
if versionCode is None: |
|
|
|
versionCode = '' |
|
|
|
return ( |
|
|
|
# Sort first by quality |
|
|
|
int(f.get('height', -1)), |
|
|
|
int(f.get('bitrate', -1)), |
|
|
|
# The original version with subtitles has lower relevance |
|
|
|
re.match(r'VO-ST(F|A)', versionCode) is None, |
|
|
|
# The version with sourds/mal subtitles has also lower relevance |
|
|
|
re.match(r'VO?(F|A)-STM\1', versionCode) is None, |
|
|
|
# Prefer http downloads over m3u8 |
|
|
|
0 if f['url'].endswith('m3u8') else 1, |
|
|
|
) |
|
|
|
formats = sorted(formats, key=sort_key) |
|
|
|
def _format(format_info): |
|
|
|
info = { |
|
|
|
'format_id': format_info['format_id'], |
|
|
|
'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), |
|
|
|
'width': int_or_none(format_info.get('width')), |
|
|
|
'height': int_or_none(format_info.get('height')), |
|
|
|
'tbr': int_or_none(format_info.get('bitrate')), |
|
|
|
f = dict(format_dict) |
|
|
|
versionCode = f.get('versionCode') |
|
|
|
|
|
|
|
langcode = { |
|
|
|
'fr': 'F', |
|
|
|
'de': 'A', |
|
|
|
}.get(lang, lang) |
|
|
|
lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode] |
|
|
|
lang_pref = ( |
|
|
|
None if versionCode is None else ( |
|
|
|
10 if any(re.match(r, versionCode) for r in lang_rexs) |
|
|
|
else -10)) |
|
|
|
source_pref = 0 |
|
|
|
if versionCode is not None: |
|
|
|
# The original version with subtitles has lower relevance |
|
|
|
if re.match(r'VO-ST(F|A)', versionCode): |
|
|
|
source_pref -= 10 |
|
|
|
# The version with sourds/mal subtitles has also lower relevance |
|
|
|
elif re.match(r'VO?(F|A)-STM\1', versionCode): |
|
|
|
source_pref -= 9 |
|
|
|
format = { |
|
|
|
'format_id': format_id, |
|
|
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None, |
|
|
|
'language_preference': lang_pref, |
|
|
|
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), |
|
|
|
'width': int_or_none(f.get('width')), |
|
|
|
'height': int_or_none(f.get('height')), |
|
|
|
'tbr': int_or_none(f.get('bitrate')), |
|
|
|
'quality': qfunc(f['quality']), |
|
|
|
'source_preference': source_pref, |
|
|
|
} |
|
|
|
if format_info['mediaType'] == 'rtmp': |
|
|
|
info['url'] = format_info['streamer'] |
|
|
|
info['play_path'] = 'mp4:' + format_info['url'] |
|
|
|
info['ext'] = 'flv' |
|
|
|
|
|
|
|
if f.get('mediaType') == 'rtmp': |
|
|
|
format['url'] = f['streamer'] |
|
|
|
format['play_path'] = 'mp4:' + f['url'] |
|
|
|
format['ext'] = 'flv' |
|
|
|
else: |
|
|
|
info['url'] = format_info['url'] |
|
|
|
info['ext'] = determine_ext(info['url']) |
|
|
|
return info |
|
|
|
info_dict['formats'] = [_format(f) for f in formats] |
|
|
|
format['url'] = f['url'] |
|
|
|
|
|
|
|
formats.append(format) |
|
|
|
|
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
info_dict['formats'] = formats |
|
|
|
return info_dict |
|
|
|
|
|
|
|
|
|
|
|