You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
4.1 KiB

8 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import compat_str
  5. from ..utils import (
  6. int_or_none,
  7. float_or_none,
  8. try_get,
  9. unified_timestamp,
  10. )
  11. class FlipagramIE(InfoExtractor):
  12. _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
  13. _TEST = {
  14. 'url': 'https://flipagram.com/f/nyvTSJMKId',
  15. 'md5': '888dcf08b7ea671381f00fab74692755',
  16. 'info_dict': {
  17. 'id': 'nyvTSJMKId',
  18. 'ext': 'mp4',
  19. 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
  20. 'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
  21. 'duration': 35.571,
  22. 'timestamp': 1461244995,
  23. 'upload_date': '20160421',
  24. 'uploader': 'kitty juria',
  25. 'uploader_id': 'sjuria101',
  26. 'creator': 'kitty juria',
  27. 'view_count': int,
  28. 'like_count': int,
  29. 'repost_count': int,
  30. 'comment_count': int,
  31. 'comments': list,
  32. 'formats': 'mincount:2',
  33. },
  34. }
  35. def _real_extract(self, url):
  36. video_id = self._match_id(url)
  37. webpage = self._download_webpage(url, video_id)
  38. video_data = self._parse_json(
  39. self._search_regex(
  40. r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
  41. video_id)
  42. flipagram = video_data['flipagram']
  43. video = flipagram['video']
  44. json_ld = self._search_json_ld(webpage, video_id, default={})
  45. title = json_ld.get('title') or flipagram['captionText']
  46. description = json_ld.get('description') or flipagram.get('captionText')
  47. formats = [{
  48. 'url': video['url'],
  49. 'width': int_or_none(video.get('width')),
  50. 'height': int_or_none(video.get('height')),
  51. 'filesize': int_or_none(video_data.get('size')),
  52. }]
  53. preview_url = try_get(
  54. flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
  55. if preview_url:
  56. formats.append({
  57. 'url': preview_url,
  58. 'ext': 'm4a',
  59. 'vcodec': 'none',
  60. })
  61. self._sort_formats(formats)
  62. counts = flipagram.get('counts', {})
  63. user = flipagram.get('user', {})
  64. video_data = flipagram.get('video', {})
  65. thumbnails = [{
  66. 'url': self._proto_relative_url(cover['url']),
  67. 'width': int_or_none(cover.get('width')),
  68. 'height': int_or_none(cover.get('height')),
  69. 'filesize': int_or_none(cover.get('size')),
  70. } for cover in flipagram.get('covers', []) if cover.get('url')]
  71. # Note that this only retrieves comments that are initially loaded.
  72. # For videos with large amounts of comments, most won't be retrieved.
  73. comments = []
  74. for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
  75. text = comment.get('comment')
  76. if not text or not isinstance(text, list):
  77. continue
  78. comments.append({
  79. 'author': comment.get('user', {}).get('name'),
  80. 'author_id': comment.get('user', {}).get('username'),
  81. 'id': comment.get('id'),
  82. 'text': text[0],
  83. 'timestamp': unified_timestamp(comment.get('created')),
  84. })
  85. return {
  86. 'id': video_id,
  87. 'title': title,
  88. 'description': description,
  89. 'duration': float_or_none(flipagram.get('duration'), 1000),
  90. 'thumbnails': thumbnails,
  91. 'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
  92. 'uploader': user.get('name'),
  93. 'uploader_id': user.get('username'),
  94. 'creator': user.get('name'),
  95. 'view_count': int_or_none(counts.get('plays')),
  96. 'like_count': int_or_none(counts.get('likes')),
  97. 'repost_count': int_or_none(counts.get('reflips')),
  98. 'comment_count': int_or_none(counts.get('comments')),
  99. 'comments': comments,
  100. 'formats': formats,
  101. }