|
|
@ -8,6 +8,7 @@ from ..utils import ( |
|
|
|
int_or_none, |
|
|
|
limit_length, |
|
|
|
lowercase_escape, |
|
|
|
try_get, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
@ -19,10 +20,16 @@ class InstagramIE(InfoExtractor): |
|
|
|
'info_dict': { |
|
|
|
'id': 'aye83DjauH', |
|
|
|
'ext': 'mp4', |
|
|
|
'uploader_id': 'naomipq', |
|
|
|
'title': 'Video by naomipq', |
|
|
|
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', |
|
|
|
} |
|
|
|
'thumbnail': 're:^https?://.*\.jpg', |
|
|
|
'timestamp': 1371748545, |
|
|
|
'upload_date': '20130620', |
|
|
|
'uploader_id': 'naomipq', |
|
|
|
'uploader': 'Naomi Leonor Phan-Quang', |
|
|
|
'like_count': int, |
|
|
|
'comment_count': int, |
|
|
|
}, |
|
|
|
}, { |
|
|
|
# missing description |
|
|
|
'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', |
|
|
@ -31,6 +38,13 @@ class InstagramIE(InfoExtractor): |
|
|
|
'ext': 'mp4', |
|
|
|
'uploader_id': 'britneyspears', |
|
|
|
'title': 'Video by britneyspears', |
|
|
|
'thumbnail': 're:^https?://.*\.jpg', |
|
|
|
'timestamp': 1453760977, |
|
|
|
'upload_date': '20160125', |
|
|
|
'uploader_id': 'britneyspears', |
|
|
|
'uploader': 'Britney Spears', |
|
|
|
'like_count': int, |
|
|
|
'comment_count': int, |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
@ -67,21 +81,57 @@ class InstagramIE(InfoExtractor): |
|
|
|
url = mobj.group('url') |
|
|
|
|
|
|
|
webpage = self._download_webpage(url, video_id) |
|
|
|
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', |
|
|
|
webpage, 'uploader id', fatal=False) |
|
|
|
desc = self._search_regex( |
|
|
|
r'"caption":"(.+?)"', webpage, 'description', default=None) |
|
|
|
if desc is not None: |
|
|
|
desc = lowercase_escape(desc) |
|
|
|
|
|
|
|
(video_url, description, thumbnail, timestamp, uploader, |
|
|
|
uploader_id, like_count, comment_count) = [None] * 8 |
|
|
|
|
|
|
|
shared_data = self._parse_json( |
|
|
|
self._search_regex( |
|
|
|
r'window\._sharedData\s*=\s*({.+?});', |
|
|
|
webpage, 'shared data', default='{}'), |
|
|
|
video_id, fatal=False) |
|
|
|
if shared_data: |
|
|
|
media = try_get( |
|
|
|
shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) |
|
|
|
if media: |
|
|
|
video_url = media.get('video_url') |
|
|
|
description = media.get('caption') |
|
|
|
thumbnail = media.get('display_src') |
|
|
|
timestamp = int_or_none(media.get('date')) |
|
|
|
uploader = media.get('owner', {}).get('full_name') |
|
|
|
uploader_id = media.get('owner', {}).get('username') |
|
|
|
like_count = int_or_none(media.get('likes', {}).get('count')) |
|
|
|
comment_count = int_or_none(media.get('comments', {}).get('count')) |
|
|
|
|
|
|
|
if not video_url: |
|
|
|
video_url = self._og_search_video_url(webpage, secure=False) |
|
|
|
|
|
|
|
if not uploader_id: |
|
|
|
uploader_id = self._search_regex( |
|
|
|
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', |
|
|
|
webpage, 'uploader id', fatal=False) |
|
|
|
|
|
|
|
if not description: |
|
|
|
description = self._search_regex( |
|
|
|
r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) |
|
|
|
if description is not None: |
|
|
|
description = lowercase_escape(description) |
|
|
|
|
|
|
|
if not thumbnail: |
|
|
|
thumbnail = self._og_search_thumbnail(webpage) |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'url': self._og_search_video_url(webpage, secure=False), |
|
|
|
'url': video_url, |
|
|
|
'ext': 'mp4', |
|
|
|
'title': 'Video by %s' % uploader_id, |
|
|
|
'thumbnail': self._og_search_thumbnail(webpage), |
|
|
|
'description': description, |
|
|
|
'thumbnail': thumbnail, |
|
|
|
'timestamp': timestamp, |
|
|
|
'uploader_id': uploader_id, |
|
|
|
'description': desc, |
|
|
|
'uploader': uploader, |
|
|
|
'like_count': like_count, |
|
|
|
'comment_count': comment_count, |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|