You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

255 lines
8.9 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import (
  5. compat_HTTPError,
  6. compat_str,
  7. )
  8. from ..utils import (
  9. ExtractorError,
  10. int_or_none,
  11. float_or_none,
  12. parse_resolution,
  13. str_or_none,
  14. try_get,
  15. unified_timestamp,
  16. url_or_none,
  17. urljoin,
  18. )
  19. class PuhuTVIE(InfoExtractor):
  20. _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
  21. IE_NAME = 'puhutv'
  22. _TESTS = [{
  23. # film
  24. 'url': 'https://puhutv.com/sut-kardesler-izle',
  25. 'md5': 'a347470371d56e1585d1b2c8dab01c96',
  26. 'info_dict': {
  27. 'id': '5085',
  28. 'display_id': 'sut-kardesler',
  29. 'ext': 'mp4',
  30. 'title': 'Süt Kardeşler',
  31. 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
  32. 'thumbnail': r're:^https?://.*\.jpg$',
  33. 'duration': 4832.44,
  34. 'creator': 'Arzu Film',
  35. 'timestamp': 1561062602,
  36. 'upload_date': '20190620',
  37. 'release_year': 1976,
  38. 'view_count': int,
  39. 'tags': list,
  40. },
  41. }, {
  42. # episode, geo restricted, bypassable with --geo-verification-proxy
  43. 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
  44. 'only_matching': True,
  45. }, {
  46. # 4k, with subtitles
  47. 'url': 'https://puhutv.com/dip-1-bolum-izle',
  48. 'only_matching': True,
  49. }]
  50. _SUBTITLE_LANGS = {
  51. 'English': 'en',
  52. 'Deutsch': 'de',
  53. 'عربى': 'ar'
  54. }
  55. def _real_extract(self, url):
  56. display_id = self._match_id(url)
  57. info = self._download_json(
  58. urljoin(url, '/api/slug/%s-izle' % display_id),
  59. display_id)['data']
  60. video_id = compat_str(info['id'])
  61. show = info.get('title') or {}
  62. title = info.get('name') or show['name']
  63. if info.get('display_name'):
  64. title = '%s %s' % (title, info['display_name'])
  65. try:
  66. videos = self._download_json(
  67. 'https://puhutv.com/api/assets/%s/videos' % video_id,
  68. display_id, 'Downloading video JSON',
  69. headers=self.geo_verification_headers())
  70. except ExtractorError as e:
  71. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  72. self.raise_geo_restricted()
  73. raise
  74. urls = []
  75. formats = []
  76. def add_http_from_hls(m3u8_f):
  77. http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4')
  78. if http_url != m3u8_f['url']:
  79. f = m3u8_f.copy()
  80. f.update({
  81. 'format_id': f['format_id'].replace('hls', 'http'),
  82. 'protocol': 'http',
  83. 'url': http_url,
  84. })
  85. formats.append(f)
  86. for video in videos['data']['videos']:
  87. media_url = url_or_none(video.get('url'))
  88. if not media_url or media_url in urls:
  89. continue
  90. urls.append(media_url)
  91. playlist = video.get('is_playlist')
  92. if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
  93. m3u8_formats = self._extract_m3u8_formats(
  94. media_url, video_id, 'mp4', entry_protocol='m3u8_native',
  95. m3u8_id='hls', fatal=False)
  96. for m3u8_f in m3u8_formats:
  97. formats.append(m3u8_f)
  98. add_http_from_hls(m3u8_f)
  99. continue
  100. quality = int_or_none(video.get('quality'))
  101. f = {
  102. 'url': media_url,
  103. 'ext': 'mp4',
  104. 'height': quality
  105. }
  106. video_format = video.get('video_format')
  107. is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
  108. if is_hls:
  109. format_id = 'hls'
  110. f['protocol'] = 'm3u8_native'
  111. elif video_format == 'mp4':
  112. format_id = 'http'
  113. else:
  114. continue
  115. if quality:
  116. format_id += '-%sp' % quality
  117. f['format_id'] = format_id
  118. formats.append(f)
  119. if is_hls:
  120. add_http_from_hls(f)
  121. self._sort_formats(formats)
  122. creator = try_get(
  123. show, lambda x: x['producer']['name'], compat_str)
  124. content = info.get('content') or {}
  125. images = try_get(
  126. content, lambda x: x['images']['wide'], dict) or {}
  127. thumbnails = []
  128. for image_id, image_url in images.items():
  129. if not isinstance(image_url, compat_str):
  130. continue
  131. if not image_url.startswith(('http', '//')):
  132. image_url = 'https://%s' % image_url
  133. t = parse_resolution(image_id)
  134. t.update({
  135. 'id': image_id,
  136. 'url': image_url
  137. })
  138. thumbnails.append(t)
  139. tags = []
  140. for genre in show.get('genres') or []:
  141. if not isinstance(genre, dict):
  142. continue
  143. genre_name = genre.get('name')
  144. if genre_name and isinstance(genre_name, compat_str):
  145. tags.append(genre_name)
  146. subtitles = {}
  147. for subtitle in content.get('subtitles') or []:
  148. if not isinstance(subtitle, dict):
  149. continue
  150. lang = subtitle.get('language')
  151. sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
  152. if not lang or not isinstance(lang, compat_str) or not sub_url:
  153. continue
  154. subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
  155. 'url': sub_url
  156. }]
  157. return {
  158. 'id': video_id,
  159. 'display_id': display_id,
  160. 'title': title,
  161. 'description': info.get('description') or show.get('description'),
  162. 'season_id': str_or_none(info.get('season_id')),
  163. 'season_number': int_or_none(info.get('season_number')),
  164. 'episode_number': int_or_none(info.get('episode_number')),
  165. 'release_year': int_or_none(show.get('released_at')),
  166. 'timestamp': unified_timestamp(info.get('created_at')),
  167. 'creator': creator,
  168. 'view_count': int_or_none(content.get('watch_count')),
  169. 'duration': float_or_none(content.get('duration_in_ms'), 1000),
  170. 'tags': tags,
  171. 'subtitles': subtitles,
  172. 'thumbnails': thumbnails,
  173. 'formats': formats
  174. }
  175. class PuhuTVSerieIE(InfoExtractor):
  176. _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
  177. IE_NAME = 'puhutv:serie'
  178. _TESTS = [{
  179. 'url': 'https://puhutv.com/deniz-yildizi-detay',
  180. 'info_dict': {
  181. 'title': 'Deniz Yıldızı',
  182. 'id': 'deniz-yildizi',
  183. },
  184. 'playlist_mincount': 205,
  185. }, {
  186. # a film detail page which is using same url with serie page
  187. 'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
  188. 'only_matching': True,
  189. }]
  190. def _extract_entries(self, seasons):
  191. for season in seasons:
  192. season_id = season.get('id')
  193. if not season_id:
  194. continue
  195. page = 1
  196. has_more = True
  197. while has_more is True:
  198. season = self._download_json(
  199. 'https://galadriel.puhutv.com/seasons/%s' % season_id,
  200. season_id, 'Downloading page %s' % page, query={
  201. 'page': page,
  202. 'per': 40,
  203. })
  204. episodes = season.get('episodes')
  205. if isinstance(episodes, list):
  206. for ep in episodes:
  207. slug_path = str_or_none(ep.get('slugPath'))
  208. if not slug_path:
  209. continue
  210. video_id = str_or_none(int_or_none(ep.get('id')))
  211. yield self.url_result(
  212. 'https://puhutv.com/%s' % slug_path,
  213. ie=PuhuTVIE.ie_key(), video_id=video_id,
  214. video_title=ep.get('name') or ep.get('eventLabel'))
  215. page += 1
  216. has_more = season.get('hasMore')
  217. def _real_extract(self, url):
  218. playlist_id = self._match_id(url)
  219. info = self._download_json(
  220. urljoin(url, '/api/slug/%s-detay' % playlist_id),
  221. playlist_id)['data']
  222. seasons = info.get('seasons')
  223. if seasons:
  224. return self.playlist_result(
  225. self._extract_entries(seasons), playlist_id, info.get('name'))
  226. # For films, these are using same url with series
  227. video_id = info.get('slug') or info['assets'][0]['slug']
  228. return self.url_result(
  229. 'https://puhutv.com/%s-izle' % video_id,
  230. PuhuTVIE.ie_key(), video_id)