Browse Source

[extractor/common] Add expected_type in json ld routines

totalwebcasting
Sergey M․ 9 years ago
parent
commit
95b31e266b
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
1 changed files with 7 additions and 3 deletions
  1. +7
    -3
      youtube_dl/extractor/common.py

+ 7
- 3
youtube_dl/extractor/common.py View File

@ -805,15 +805,17 @@ class InfoExtractor(object):
return self._html_search_meta('twitter:player', html, return self._html_search_meta('twitter:player', html,
'twitter card player') 'twitter card player')
def _search_json_ld(self, html, video_id, **kwargs):
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
json_ld = self._search_regex( json_ld = self._search_regex(
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>', r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
html, 'JSON-LD', group='json_ld', **kwargs) html, 'JSON-LD', group='json_ld', **kwargs)
if not json_ld: if not json_ld:
return {} return {}
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
return self._json_ld(
json_ld, video_id, fatal=kwargs.get('fatal', True),
expected_type=expected_type)
def _json_ld(self, json_ld, video_id, fatal=True):
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
if isinstance(json_ld, compat_str): if isinstance(json_ld, compat_str):
json_ld = self._parse_json(json_ld, video_id, fatal=fatal) json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
if not json_ld: if not json_ld:
@ -821,6 +823,8 @@ class InfoExtractor(object):
info = {} info = {}
if json_ld.get('@context') == 'http://schema.org': if json_ld.get('@context') == 'http://schema.org':
item_type = json_ld.get('@type') item_type = json_ld.get('@type')
if expected_type is not None and expected_type != item_type:
return info
if item_type == 'TVEpisode': if item_type == 'TVEpisode':
info.update({ info.update({
'episode': unescapeHTML(json_ld.get('name')), 'episode': unescapeHTML(json_ld.get('name')),


Loading…
Cancel
Save