Browse Source

[instagram] Add support for user profiles (Fixes #2606)

totalwebcasting
Philipp Hagemeister 11 years ago
parent
commit
ea38e55fff
6 changed files with 124 additions and 29 deletions
  1. +18
    -0
      test/helper.py
  2. +0
    -19
      test/test_download.py
  3. +28
    -2
      test/test_playlists.py
  4. +9
    -7
      youtube_dl/YoutubeDL.py
  5. +1
    -1
      youtube_dl/extractor/__init__.py
  6. +68
    -0
      youtube_dl/extractor/instagram.py

+ 18
- 0
test/helper.py View File

@ -110,3 +110,21 @@ def expect_info_dict(self, expected_dict, got_dict):
self.assertEqual(expected, got, self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
# Are checkable fields missing from the test case definition?
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
for key, value in got_dict.items()
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys:
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
self.assertFalse(
missing_keys,
'Missing keys in test definition: %s' % (
', '.join(sorted(missing_keys))))

+ 0
- 19
test/test_download.py View File

@ -137,25 +137,6 @@ def generator(test_case):
info_dict = json.load(infof) info_dict = json.load(infof)
expect_info_dict(self, tc.get('info_dict', {}), info_dict) expect_info_dict(self, tc.get('info_dict', {}), info_dict)
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
# Are checkable fields missing from the test case definition?
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
for key, value in info_dict.items()
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys())
if missing_keys:
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
self.assertFalse(
missing_keys,
'Missing keys in test definition: %s' % (
','.join(sorted(missing_keys))))
finally: finally:
try_rm_tcs_files() try_rm_tcs_files()


+ 28
- 2
test/test_playlists.py View File

@ -9,8 +9,10 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from test.helper import (
expect_info_dict,
FakeYDL,
)
from youtube_dl.extractor import ( from youtube_dl.extractor import (
AcademicEarthCourseIE, AcademicEarthCourseIE,
@ -39,6 +41,7 @@ from youtube_dl.extractor import (
TEDIE, TEDIE,
ToypicsUserIE, ToypicsUserIE,
XTubeUserIE, XTubeUserIE,
InstagramUserIE,
) )
@ -287,5 +290,28 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], 'greenshowers') self.assertEqual(result['id'], 'greenshowers')
self.assertTrue(len(result['entries']) >= 155) self.assertTrue(len(result['entries']) >= 155)
def test_InstagramUser(self):
dl = FakeYDL()
ie = InstagramUserIE(dl)
result = ie.extract('http://instagram.com/porsche')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'porsche')
self.assertTrue(len(result['entries']) >= 2)
test_video = next(
e for e in result['entries']
if e['id'] == '614605558512799803_462752227')
dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
dl.process_video_result(test_video, download=False)
EXPECTED = {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
}
expect_info_dict(self, EXPECTED, test_video)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

+ 9
- 7
youtube_dl/YoutubeDL.py View File

@ -512,13 +512,7 @@ class YoutubeDL(object):
'_type': 'compat_list', '_type': 'compat_list',
'entries': ie_result, 'entries': ie_result,
} }
self.add_extra_info(ie_result,
{
'extractor': ie.IE_NAME,
'webpage_url': url,
'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(),
})
self.add_default_extra_info(ie_result, ie, url)
if process: if process:
return self.process_ie_result(ie_result, download, extra_info) return self.process_ie_result(ie_result, download, extra_info)
else: else:
@ -537,6 +531,14 @@ class YoutubeDL(object):
else: else:
self.report_error('no suitable InfoExtractor for URL %s' % url) self.report_error('no suitable InfoExtractor for URL %s' % url)
def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, {
'extractor': ie.IE_NAME,
'webpage_url': url,
'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(),
})
def process_ie_result(self, ie_result, download=True, extra_info={}): def process_ie_result(self, ie_result, download=True, extra_info={}):
""" """
Take the result of the ie(may be modified) and resolve all unresolved Take the result of the ie(may be modified) and resolve all unresolved


+ 1
- 1
youtube_dl/extractor/__init__.py View File

@ -112,7 +112,7 @@ from .imdb import (
) )
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE
from .instagram import InstagramIE, InstagramUserIE
from .internetvideoarchive import InternetVideoArchiveIE from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE from .iprima import IPrimaIE
from .ivi import ( from .ivi import (


+ 68
- 0
youtube_dl/extractor/instagram.py View File

@ -3,6 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
int_or_none,
)
class InstagramIE(InfoExtractor): class InstagramIE(InfoExtractor):
@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'description': desc, 'description': desc,
} }
class InstagramUserIE(InfoExtractor):
_VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('username')
entries = []
page_count = 0
media_url = 'http://instagram.com/%s/media' % uploader_id
while True:
page = self._download_json(
media_url, uploader_id,
note='Downloading page %d ' % (page_count + 1),
)
page_count += 1
for it in page['items']:
if it.get('type') != 'video':
continue
like_count = int_or_none(it.get('likes', {}).get('count'))
user = it.get('user', {})
formats = [{
'format_id': k,
'height': v.get('height'),
'width': v.get('width'),
'url': v['url'],
} for k, v in it['videos'].items()]
self._sort_formats(formats)
thumbnails_el = it.get('images', {})
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
title = it.get('caption', {}).get('text', it['id'])
entries.append({
'id': it['id'],
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'webpage_url': it.get('link'),
'uploader': user.get('full_name'),
'uploader_id': user.get('username'),
'like_count': like_count,
'upload_timestamp': int_or_none(it.get('created_time')),
})
if not page['items']:
break
max_id = page['items'][-1]['id']
media_url = (
'http://instagram.com/%s/media?max_id=%s' % (
uploader_id, max_id))
return {
'_type': 'playlist',
'entries': entries,
'id': uploader_id,
'title': uploader_id,
}

Loading…
Cancel
Save