You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

200 lines
7.7 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. clean_html,
  7. determine_ext,
  8. int_or_none,
  9. js_to_json,
  10. parse_duration,
  11. )
  12. class ViewLiftBaseIE(InfoExtractor):
  13. _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
  14. class ViewLiftEmbedIE(ViewLiftBaseIE):
  15. _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
  16. _TESTS = [{
  17. 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
  18. 'md5': '2924e9215c6eff7a55ed35b72276bd93',
  19. 'info_dict': {
  20. 'id': '74849a00-85a9-11e1-9660-123139220831',
  21. 'ext': 'mp4',
  22. 'title': '#whilewewatch',
  23. }
  24. }, {
  25. # invalid labels, 360p is better that 480p
  26. 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
  27. 'md5': '882fca19b9eb27ef865efeeaed376a48',
  28. 'info_dict': {
  29. 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
  30. 'ext': 'mp4',
  31. 'title': 'Life in Limbo',
  32. }
  33. }, {
  34. 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
  35. 'only_matching': True,
  36. }]
  37. @staticmethod
  38. def _extract_url(webpage):
  39. mobj = re.search(
  40. r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
  41. webpage)
  42. if mobj:
  43. return mobj.group('url')
  44. def _real_extract(self, url):
  45. video_id = self._match_id(url)
  46. webpage = self._download_webpage(url, video_id)
  47. if '>This film is not playable in your area.<' in webpage:
  48. raise ExtractorError(
  49. 'Film %s is not playable in your area.' % video_id, expected=True)
  50. formats = []
  51. has_bitrate = False
  52. for source in self._parse_json(js_to_json(self._search_regex(
  53. r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
  54. file_ = source.get('file')
  55. if not file_:
  56. continue
  57. type_ = source.get('type')
  58. ext = determine_ext(file_)
  59. format_id = source.get('label') or ext
  60. if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
  61. formats.extend(self._extract_m3u8_formats(
  62. file_, video_id, 'mp4', m3u8_id='hls'))
  63. else:
  64. bitrate = int_or_none(self._search_regex(
  65. [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
  66. file_, 'bitrate', default=None))
  67. if not has_bitrate and bitrate:
  68. has_bitrate = True
  69. height = int_or_none(self._search_regex(
  70. r'^(\d+)[pP]$', format_id, 'height', default=None))
  71. formats.append({
  72. 'url': file_,
  73. 'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
  74. 'tbr': bitrate,
  75. 'height': height,
  76. })
  77. field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
  78. self._sort_formats(formats, field_preference)
  79. title = self._search_regex(
  80. [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
  81. webpage, 'title')
  82. return {
  83. 'id': video_id,
  84. 'title': title,
  85. 'formats': formats,
  86. }
  87. class ViewLiftIE(ViewLiftBaseIE):
  88. _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
  89. _TESTS = [{
  90. 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
  91. 'md5': '19844f897b35af219773fd63bdec2942',
  92. 'info_dict': {
  93. 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
  94. 'display_id': 'lost_for_life',
  95. 'ext': 'mp4',
  96. 'title': 'Lost for Life',
  97. 'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
  98. 'thumbnail': r're:^https?://.*\.jpg',
  99. 'duration': 4489,
  100. 'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
  101. }
  102. }, {
  103. 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
  104. 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
  105. 'info_dict': {
  106. 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
  107. 'display_id': 'the_world_cut_project/india',
  108. 'ext': 'mp4',
  109. 'title': 'India',
  110. 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
  111. 'thumbnail': r're:^https?://.*\.jpg',
  112. 'duration': 979,
  113. 'categories': ['Documentary', 'Sports', 'Politics']
  114. }
  115. }, {
  116. # Film is not playable in your area.
  117. 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
  118. 'only_matching': True,
  119. }, {
  120. # Film is not available.
  121. 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
  122. 'only_matching': True,
  123. }, {
  124. 'url': 'http://www.winnersview.com/videos/the-good-son',
  125. 'only_matching': True,
  126. }, {
  127. 'url': 'http://www.kesari.tv/news/video/1461919076414',
  128. 'only_matching': True,
  129. }, {
  130. # Was once Kaltura embed
  131. 'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
  132. 'only_matching': True,
  133. }]
  134. def _real_extract(self, url):
  135. domain, display_id = re.match(self._VALID_URL, url).groups()
  136. webpage = self._download_webpage(url, display_id)
  137. if ">Sorry, the Film you're looking for is not available.<" in webpage:
  138. raise ExtractorError(
  139. 'Film %s is not available.' % display_id, expected=True)
  140. film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
  141. snag = self._parse_json(
  142. self._search_regex(
  143. r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
  144. display_id)
  145. for item in snag:
  146. if item.get('data', {}).get('film', {}).get('id') == film_id:
  147. data = item['data']['film']
  148. title = data['title']
  149. description = clean_html(data.get('synopsis'))
  150. thumbnail = data.get('image')
  151. duration = int_or_none(data.get('duration') or data.get('runtime'))
  152. categories = [
  153. category['title'] for category in data.get('categories', [])
  154. if category.get('title')]
  155. break
  156. else:
  157. title = self._search_regex(
  158. r'itemprop="title">([^<]+)<', webpage, 'title')
  159. description = self._html_search_regex(
  160. r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
  161. webpage, 'description', default=None) or self._og_search_description(webpage)
  162. thumbnail = self._og_search_thumbnail(webpage)
  163. duration = parse_duration(self._search_regex(
  164. r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
  165. webpage, 'duration', fatal=False))
  166. categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
  167. return {
  168. '_type': 'url_transparent',
  169. 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
  170. 'id': film_id,
  171. 'display_id': display_id,
  172. 'title': title,
  173. 'description': description,
  174. 'thumbnail': thumbnail,
  175. 'duration': duration,
  176. 'categories': categories,
  177. 'ie_key': 'ViewLiftEmbed',
  178. }