You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
1.7 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. str_to_int,
  6. ExtractorError
  7. )
  8. class AppleConnectIE(InfoExtractor):
  9. _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
  10. _TEST = {
  11. 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
  12. 'md5': 'e7c38568a01ea45402570e6029206723',
  13. 'info_dict': {
  14. 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
  15. 'ext': 'm4v',
  16. 'title': 'Energy',
  17. 'uploader': 'Drake',
  18. 'thumbnail': r're:^https?://.*\.jpg$',
  19. 'upload_date': '20150710',
  20. 'timestamp': 1436545535,
  21. },
  22. }
  23. def _real_extract(self, url):
  24. video_id = self._match_id(url)
  25. webpage = self._download_webpage(url, video_id)
  26. try:
  27. video_json = self._html_search_regex(
  28. r'class="auc-video-data">(\{.*?\})', webpage, 'json')
  29. except ExtractorError:
  30. raise ExtractorError('This post doesn\'t contain a video', expected=True)
  31. video_data = self._parse_json(video_json, video_id)
  32. timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
  33. like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
  34. return {
  35. 'id': video_id,
  36. 'url': video_data['sslSrc'],
  37. 'title': video_data['title'],
  38. 'description': video_data['description'],
  39. 'uploader': video_data['artistName'],
  40. 'thumbnail': video_data['artworkUrl'],
  41. 'timestamp': timestamp,
  42. 'like_count': like_count,
  43. }