You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

111 lines
3.6 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_b64decode,
  8. compat_str,
  9. compat_urlparse,
  10. )
  11. from ..utils import (
  12. extract_attributes,
  13. ExtractorError,
  14. get_elements_by_class,
  15. urlencode_postdata,
  16. )
  17. class EinthusanIE(InfoExtractor):
  18. _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
  19. _TESTS = [{
  20. 'url': 'https://einthusan.tv/movie/watch/9097/',
  21. 'md5': 'ff0f7f2065031b8a2cf13a933731c035',
  22. 'info_dict': {
  23. 'id': '9097',
  24. 'ext': 'mp4',
  25. 'title': 'Ae Dil Hai Mushkil',
  26. 'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
  27. 'thumbnail': r're:^https?://.*\.jpg$',
  28. }
  29. }, {
  30. 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
  31. 'only_matching': True,
  32. }, {
  33. 'url': 'https://einthusan.com/movie/watch/9097/',
  34. 'only_matching': True,
  35. }, {
  36. 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
  37. 'only_matching': True,
  38. }]
  39. # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
  40. def _decrypt(self, encrypted_data, video_id):
  41. return self._parse_json(compat_b64decode((
  42. encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
  43. )).decode('utf-8'), video_id)
  44. def _real_extract(self, url):
  45. mobj = re.match(self._VALID_URL, url)
  46. host = mobj.group('host')
  47. video_id = mobj.group('id')
  48. webpage = self._download_webpage(url, video_id)
  49. title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
  50. player_params = extract_attributes(self._search_regex(
  51. r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
  52. page_id = self._html_search_regex(
  53. '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
  54. video_data = self._download_json(
  55. 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
  56. data=urlencode_postdata({
  57. 'xEvent': 'UIVideoPlayer.PingOutcome',
  58. 'xJson': json.dumps({
  59. 'EJOutcomes': player_params['data-ejpingables'],
  60. 'NativeHLS': False
  61. }),
  62. 'arcVersion': 3,
  63. 'appVersion': 59,
  64. 'gorilla.csrf.Token': page_id,
  65. }))['Data']
  66. if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
  67. raise ExtractorError(
  68. 'Download rate reached. Please try again later.', expected=True)
  69. ej_links = self._decrypt(video_data['EJLinks'], video_id)
  70. formats = []
  71. m3u8_url = ej_links.get('HLSLink')
  72. if m3u8_url:
  73. formats.extend(self._extract_m3u8_formats(
  74. m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
  75. mp4_url = ej_links.get('MP4Link')
  76. if mp4_url:
  77. formats.append({
  78. 'url': mp4_url,
  79. })
  80. self._sort_formats(formats)
  81. description = get_elements_by_class('synopsis', webpage)[0]
  82. thumbnail = self._html_search_regex(
  83. r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
  84. webpage, 'thumbnail url', fatal=False, group='url')
  85. if thumbnail is not None:
  86. thumbnail = compat_urlparse.urljoin(url, thumbnail)
  87. return {
  88. 'id': video_id,
  89. 'title': title,
  90. 'formats': formats,
  91. 'thumbnail': thumbnail,
  92. 'description': description,
  93. }