Browse Source

[brightcove] Handle non well-formed XMLs (#5421)

totalwebcasting
Sergey M․ 10 years ago
parent
commit
94c1255782
1 changed files with 5 additions and 2 deletions
  1. +5
    -2
      youtube_dl/extractor/brightcove.py

+ 5
- 2
youtube_dl/extractor/brightcove.py View File

@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str) object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
object_str = fix_xml_ampersands(object_str) object_str = fix_xml_ampersands(object_str)
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
try:
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
except xml.etree.ElementTree.ParseError:
return
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
if fv_el is not None: if fv_el is not None:
@ -185,7 +188,7 @@ class BrightcoveIE(InfoExtractor):
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?>\s*</object>''', ).+?>\s*</object>''',
webpage) webpage)
return [cls._build_brighcove_url(m) for m in matches]
return filter(None, [cls._build_brighcove_url(m) for m in matches])
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})


Loading…
Cancel
Save