|
@ -1370,10 +1370,18 @@ class YoutubeChannelIE(InfoExtractor): |
|
|
|
|
|
|
|
|
def extract_videos_from_page(self, page): |
|
|
def extract_videos_from_page(self, page): |
|
|
ids_in_page = [] |
|
|
ids_in_page = [] |
|
|
for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page): |
|
|
|
|
|
if mobj.group(1) not in ids_in_page: |
|
|
|
|
|
ids_in_page.append(mobj.group(1)) |
|
|
|
|
|
return ids_in_page |
|
|
|
|
|
|
|
|
titles_in_page = [] |
|
|
|
|
|
for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page): |
|
|
|
|
|
video_id = mobj.group('id') |
|
|
|
|
|
video_title = unescapeHTML(mobj.group('title')) |
|
|
|
|
|
try: |
|
|
|
|
|
idx = ids_in_page.index(video_id) |
|
|
|
|
|
if video_title and not titles_in_page[idx]: |
|
|
|
|
|
titles_in_page[idx] = video_title |
|
|
|
|
|
except ValueError: |
|
|
|
|
|
ids_in_page.append(video_id) |
|
|
|
|
|
titles_in_page.append(video_title) |
|
|
|
|
|
return zip(ids_in_page, titles_in_page) |
|
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
def _real_extract(self, url): |
|
|
channel_id = self._match_id(url) |
|
|
channel_id = self._match_id(url) |
|
@ -1390,10 +1398,12 @@ class YoutubeChannelIE(InfoExtractor): |
|
|
if autogenerated: |
|
|
if autogenerated: |
|
|
# The videos are contained in a single page |
|
|
# The videos are contained in a single page |
|
|
# the ajax pages can't be used, they are empty |
|
|
# the ajax pages can't be used, they are empty |
|
|
video_ids = self.extract_videos_from_page(channel_page) |
|
|
|
|
|
|
|
|
videos = self.extract_videos_from_page(channel_page) |
|
|
entries = [ |
|
|
entries = [ |
|
|
self.url_result(video_id, 'Youtube', video_id=video_id) |
|
|
|
|
|
for video_id in video_ids] |
|
|
|
|
|
|
|
|
self.url_result( |
|
|
|
|
|
video_id, 'Youtube', video_id=video_id, |
|
|
|
|
|
video_title=video_title) |
|
|
|
|
|
for video_id, video_title in videos] |
|
|
return self.playlist_result(entries, channel_id) |
|
|
return self.playlist_result(entries, channel_id) |
|
|
|
|
|
|
|
|
def _entries(): |
|
|
def _entries(): |
|
@ -1401,9 +1411,10 @@ class YoutubeChannelIE(InfoExtractor): |
|
|
for pagenum in itertools.count(1): |
|
|
for pagenum in itertools.count(1): |
|
|
|
|
|
|
|
|
ids_in_page = self.extract_videos_from_page(content_html) |
|
|
ids_in_page = self.extract_videos_from_page(content_html) |
|
|
for video_id in ids_in_page: |
|
|
|
|
|
|
|
|
for video_id, video_title in ids_in_page: |
|
|
yield self.url_result( |
|
|
yield self.url_result( |
|
|
video_id, 'Youtube', video_id=video_id) |
|
|
|
|
|
|
|
|
video_id, 'Youtube', video_id=video_id, |
|
|
|
|
|
video_title=video_title) |
|
|
|
|
|
|
|
|
mobj = re.search( |
|
|
mobj = re.search( |
|
|
r'data-uix-load-more-href="/?(?P<more>[^"]+)"', |
|
|
r'data-uix-load-more-href="/?(?P<more>[^"]+)"', |
|
|