You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

138 lines
4.7 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. compat_str,
  6. ExtractorError,
  7. int_or_none,
  8. str_or_none,
  9. try_get,
  10. url_or_none,
  11. )
  12. class TikTokBaseIE(InfoExtractor):
  13. def _extract_aweme(self, data):
  14. video = data['video']
  15. description = str_or_none(try_get(data, lambda x: x['desc']))
  16. width = int_or_none(try_get(data, lambda x: video['width']))
  17. height = int_or_none(try_get(data, lambda x: video['height']))
  18. format_urls = set()
  19. formats = []
  20. for format_id in (
  21. 'play_addr_lowbr', 'play_addr', 'play_addr_h264',
  22. 'download_addr'):
  23. for format in try_get(
  24. video, lambda x: x[format_id]['url_list'], list) or []:
  25. format_url = url_or_none(format)
  26. if not format_url:
  27. continue
  28. if format_url in format_urls:
  29. continue
  30. format_urls.add(format_url)
  31. formats.append({
  32. 'url': format_url,
  33. 'ext': 'mp4',
  34. 'height': height,
  35. 'width': width,
  36. })
  37. self._sort_formats(formats)
  38. thumbnail = url_or_none(try_get(
  39. video, lambda x: x['cover']['url_list'][0], compat_str))
  40. uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
  41. timestamp = int_or_none(data.get('create_time'))
  42. comment_count = int_or_none(data.get('comment_count')) or int_or_none(
  43. try_get(data, lambda x: x['statistics']['comment_count']))
  44. repost_count = int_or_none(try_get(
  45. data, lambda x: x['statistics']['share_count']))
  46. aweme_id = data['aweme_id']
  47. return {
  48. 'id': aweme_id,
  49. 'title': uploader or aweme_id,
  50. 'description': description,
  51. 'thumbnail': thumbnail,
  52. 'uploader': uploader,
  53. 'timestamp': timestamp,
  54. 'comment_count': comment_count,
  55. 'repost_count': repost_count,
  56. 'formats': formats,
  57. }
  58. class TikTokIE(TikTokBaseIE):
  59. _VALID_URL = r'''(?x)
  60. https?://
  61. (?:
  62. (?:m\.)?tiktok\.com/v|
  63. (?:www\.)?tiktok\.com/share/video
  64. )
  65. /(?P<id>\d+)
  66. '''
  67. _TESTS = [{
  68. 'url': 'https://m.tiktok.com/v/6606727368545406213.html',
  69. 'md5': 'd584b572e92fcd48888051f238022420',
  70. 'info_dict': {
  71. 'id': '6606727368545406213',
  72. 'ext': 'mp4',
  73. 'title': 'Zureeal',
  74. 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
  75. 'thumbnail': r're:^https?://.*~noop.image',
  76. 'uploader': 'Zureeal',
  77. 'timestamp': 1538248586,
  78. 'upload_date': '20180929',
  79. 'comment_count': int,
  80. 'repost_count': int,
  81. }
  82. }, {
  83. 'url': 'https://www.tiktok.com/share/video/6606727368545406213',
  84. 'only_matching': True,
  85. }]
  86. def _real_extract(self, url):
  87. video_id = self._match_id(url)
  88. webpage = self._download_webpage(
  89. 'https://m.tiktok.com/v/%s.html' % video_id, video_id)
  90. data = self._parse_json(self._search_regex(
  91. r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
  92. return self._extract_aweme(data)
  93. class TikTokUserIE(TikTokBaseIE):
  94. _VALID_URL = r'''(?x)
  95. https?://
  96. (?:
  97. (?:m\.)?tiktok\.com/h5/share/usr|
  98. (?:www\.)?tiktok\.com/share/user
  99. )
  100. /(?P<id>\d+)
  101. '''
  102. _TESTS = [{
  103. 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
  104. 'info_dict': {
  105. 'id': '188294915489964032',
  106. },
  107. 'playlist_mincount': 24,
  108. }, {
  109. 'url': 'https://www.tiktok.com/share/user/188294915489964032',
  110. 'only_matching': True,
  111. }]
  112. def _real_extract(self, url):
  113. user_id = self._match_id(url)
  114. data = self._download_json(
  115. 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
  116. query={'_signature': '_'})
  117. entries = []
  118. for aweme in data['aweme_list']:
  119. try:
  120. entry = self._extract_aweme(aweme)
  121. except ExtractorError:
  122. continue
  123. entry['extractor_key'] = TikTokIE.ie_key()
  124. entries.append(entry)
  125. return self.playlist_result(entries, user_id)