Browse Source

[ard] Make more robust against missing thumbnails

I cannot reproduce this error, it's from travis.
totalwebcasting
Philipp Hagemeister 10 years ago
parent
commit
bf0ff93277
2 changed files with 15 additions and 2 deletions
  1. +4
    -2
      youtube_dl/extractor/ard.py
  2. +11
    -0
      youtube_dl/utils.py

+ 4
- 2
youtube_dl/extractor/ard.py View File

@ -13,6 +13,7 @@ from ..utils import (
int_or_none,
parse_duration,
unified_strdate,
xpath_text,
)
@ -157,8 +158,9 @@ class ARDIE(InfoExtractor):
player_url = mobj.group('mainurl') + '~playerXml.xml'
doc = self._download_xml(player_url, display_id)
video_node = doc.find('./video')
upload_date = unified_strdate(video_node.find('./broadcastDate').text)
thumbnail = video_node.find('.//teaserImage//variant/url').text
upload_date = unified_strdate(xpath_text(
video_node, './broadcastDate'))
thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
formats = []
for a in video_node.findall('.//asset'):


+ 11
- 0
youtube_dl/utils.py View File

@ -304,6 +304,17 @@ def xpath_with_ns(path, ns_map):
return '/'.join(replaced)
def xpath_text(node, xpath, name=None, fatal=False):
n = node.find(xpath)
if n is None:
if fatal:
name = xpath if name is None else name
raise ExtractorError('Could not find XML element %s' % name)
else:
return None
return n.text
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class BaseHTMLParser(compat_html_parser.HTMLParser):
def __init(self):


Loading…
Cancel
Save