Browse Source

[youtube:playlist] Remove the link with index 0

It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)
totalwebcasting
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
6e47b51eef
1 changed files with 5 additions and 3 deletions
  1. +5
    -3
      youtube_dl/extractor/youtube.py

+ 5
- 3
youtube_dl/extractor/youtube.py View File

@ -1528,7 +1528,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
)""" )"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"' _MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist' IE_NAME = u'youtube:playlist'
@classmethod @classmethod
@ -1562,8 +1562,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
for page_num in itertools.count(1): for page_num in itertools.count(1):
url = self._TEMPLATE_URL % (playlist_id, page_num) url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
# The ids are duplicated
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
matches = re.finditer(self._VIDEO_RE, page)
# We remove the duplicates and the link with index 0
# (it's not the first video of the playlist)
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
ids.extend(new_ids) ids.extend(new_ids)
if re.search(self._MORE_PAGES_INDICATOR, page) is None: if re.search(self._MORE_PAGES_INDICATOR, page) is None:


Loading…
Cancel
Save