You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

117 lines
3.9 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. compat_str,
  6. ExtractorError,
  7. int_or_none,
  8. str_or_none,
  9. try_get,
  10. url_or_none,
  11. )
  12. class TikTokBaseIE(InfoExtractor):
  13. def _extract_aweme(self, data):
  14. video = data['video']
  15. description = str_or_none(try_get(data, lambda x: x['desc']))
  16. width = int_or_none(try_get(data, lambda x: video['width']))
  17. height = int_or_none(try_get(data, lambda x: video['height']))
  18. format_urls = set()
  19. formats = []
  20. for format_id in (
  21. 'play_addr_lowbr', 'play_addr', 'play_addr_h264',
  22. 'download_addr'):
  23. for format in try_get(
  24. video, lambda x: x[format_id]['url_list'], list) or []:
  25. format_url = url_or_none(format)
  26. if not format_url:
  27. continue
  28. if format_url in format_urls:
  29. continue
  30. format_urls.add(format_url)
  31. formats.append({
  32. 'url': format_url,
  33. 'ext': 'mp4',
  34. 'height': height,
  35. 'width': width,
  36. })
  37. self._sort_formats(formats)
  38. thumbnail = url_or_none(try_get(
  39. video, lambda x: x['cover']['url_list'][0], compat_str))
  40. uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
  41. timestamp = int_or_none(data.get('create_time'))
  42. comment_count = int_or_none(data.get('comment_count')) or int_or_none(
  43. try_get(data, lambda x: x['statistics']['comment_count']))
  44. repost_count = int_or_none(try_get(
  45. data, lambda x: x['statistics']['share_count']))
  46. aweme_id = data['aweme_id']
  47. return {
  48. 'id': aweme_id,
  49. 'title': uploader or aweme_id,
  50. 'description': description,
  51. 'thumbnail': thumbnail,
  52. 'uploader': uploader,
  53. 'timestamp': timestamp,
  54. 'comment_count': comment_count,
  55. 'repost_count': repost_count,
  56. 'formats': formats,
  57. }
  58. class TikTokIE(TikTokBaseIE):
  59. _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
  60. _TEST = {
  61. 'url': 'https://m.tiktok.com/v/6606727368545406213.html',
  62. 'md5': 'd584b572e92fcd48888051f238022420',
  63. 'info_dict': {
  64. 'id': '6606727368545406213',
  65. 'ext': 'mp4',
  66. 'title': 'Zureeal',
  67. 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
  68. 'thumbnail': r're:^https?://.*~noop.image',
  69. 'uploader': 'Zureeal',
  70. 'timestamp': 1538248586,
  71. 'upload_date': '20180929',
  72. 'comment_count': int,
  73. 'repost_count': int,
  74. }
  75. }
  76. def _real_extract(self, url):
  77. video_id = self._match_id(url)
  78. webpage = self._download_webpage(url, video_id)
  79. data = self._parse_json(self._search_regex(
  80. r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
  81. return self._extract_aweme(data)
  82. class TikTokUserIE(TikTokBaseIE):
  83. _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
  84. _TEST = {
  85. 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
  86. 'info_dict': {
  87. 'id': '188294915489964032',
  88. },
  89. 'playlist_mincount': 24,
  90. }
  91. def _real_extract(self, url):
  92. user_id = self._match_id(url)
  93. data = self._download_json(
  94. 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
  95. query={'_signature': '_'})
  96. entries = []
  97. for aweme in data['aweme_list']:
  98. try:
  99. entry = self._extract_aweme(aweme)
  100. except ExtractorError:
  101. continue
  102. entry['extractor_key'] = TikTokIE.ie_key()
  103. entries.append(entry)
  104. return self.playlist_result(entries, user_id)