You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

181 lines
6.8 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. clean_html,
  7. determine_ext,
  8. int_or_none,
  9. js_to_json,
  10. parse_duration,
  11. )
  12. class SnagFilmsEmbedIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})'
  14. _TESTS = [{
  15. 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
  16. 'md5': '2924e9215c6eff7a55ed35b72276bd93',
  17. 'info_dict': {
  18. 'id': '74849a00-85a9-11e1-9660-123139220831',
  19. 'ext': 'mp4',
  20. 'title': '#whilewewatch',
  21. }
  22. }, {
  23. # invalid labels, 360p is better that 480p
  24. 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
  25. 'md5': '882fca19b9eb27ef865efeeaed376a48',
  26. 'info_dict': {
  27. 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
  28. 'ext': 'mp4',
  29. 'title': 'Life in Limbo',
  30. }
  31. }, {
  32. 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
  33. 'only_matching': True,
  34. }]
  35. @staticmethod
  36. def _extract_url(webpage):
  37. mobj = re.search(
  38. r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
  39. webpage)
  40. if mobj:
  41. return mobj.group('url')
  42. def _real_extract(self, url):
  43. video_id = self._match_id(url)
  44. webpage = self._download_webpage(url, video_id)
  45. if '>This film is not playable in your area.<' in webpage:
  46. raise ExtractorError(
  47. 'Film %s is not playable in your area.' % video_id, expected=True)
  48. formats = []
  49. for source in self._parse_json(js_to_json(self._search_regex(
  50. r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
  51. file_ = source.get('file')
  52. if not file_:
  53. continue
  54. type_ = source.get('type')
  55. ext = determine_ext(file_)
  56. format_id = source.get('label') or ext
  57. if all(v == 'm3u8' for v in (type_, ext)):
  58. formats.extend(self._extract_m3u8_formats(
  59. file_, video_id, 'mp4', m3u8_id='hls'))
  60. else:
  61. bitrate = int_or_none(self._search_regex(
  62. [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
  63. file_, 'bitrate', default=None))
  64. height = int_or_none(self._search_regex(
  65. r'^(\d+)[pP]$', format_id, 'height', default=None))
  66. formats.append({
  67. 'url': file_,
  68. 'format_id': format_id,
  69. 'tbr': bitrate,
  70. 'height': height,
  71. })
  72. self._sort_formats(formats)
  73. title = self._search_regex(
  74. [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
  75. webpage, 'title')
  76. return {
  77. 'id': video_id,
  78. 'title': title,
  79. 'formats': formats,
  80. }
  81. class SnagFilmsIE(InfoExtractor):
  82. _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
  83. _TESTS = [{
  84. 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
  85. 'md5': '19844f897b35af219773fd63bdec2942',
  86. 'info_dict': {
  87. 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
  88. 'display_id': 'lost_for_life',
  89. 'ext': 'mp4',
  90. 'title': 'Lost for Life',
  91. 'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
  92. 'thumbnail': 're:^https?://.*\.jpg',
  93. 'duration': 4489,
  94. 'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
  95. }
  96. }, {
  97. 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
  98. 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
  99. 'info_dict': {
  100. 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
  101. 'display_id': 'the_world_cut_project/india',
  102. 'ext': 'mp4',
  103. 'title': 'India',
  104. 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
  105. 'thumbnail': 're:^https?://.*\.jpg',
  106. 'duration': 979,
  107. 'categories': ['Documentary', 'Sports', 'Politics']
  108. }
  109. }, {
  110. # Film is not playable in your area.
  111. 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
  112. 'only_matching': True,
  113. }, {
  114. # Film is not available.
  115. 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
  116. 'only_matching': True,
  117. }]
  118. def _real_extract(self, url):
  119. display_id = self._match_id(url)
  120. webpage = self._download_webpage(url, display_id)
  121. if ">Sorry, the Film you're looking for is not available.<" in webpage:
  122. raise ExtractorError(
  123. 'Film %s is not available.' % display_id, expected=True)
  124. film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
  125. snag = self._parse_json(
  126. self._search_regex(
  127. 'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
  128. display_id)
  129. for item in snag:
  130. if item.get('data', {}).get('film', {}).get('id') == film_id:
  131. data = item['data']['film']
  132. title = data['title']
  133. description = clean_html(data.get('synopsis'))
  134. thumbnail = data.get('image')
  135. duration = int_or_none(data.get('duration') or data.get('runtime'))
  136. categories = [
  137. category['title'] for category in data.get('categories', [])
  138. if category.get('title')]
  139. break
  140. else:
  141. title = self._search_regex(
  142. r'itemprop="title">([^<]+)<', webpage, 'title')
  143. description = self._html_search_regex(
  144. r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
  145. webpage, 'description', default=None) or self._og_search_description(webpage)
  146. thumbnail = self._og_search_thumbnail(webpage)
  147. duration = parse_duration(self._search_regex(
  148. r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
  149. webpage, 'duration', fatal=False))
  150. categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
  151. return {
  152. '_type': 'url_transparent',
  153. 'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id,
  154. 'id': film_id,
  155. 'display_id': display_id,
  156. 'title': title,
  157. 'description': description,
  158. 'thumbnail': thumbnail,
  159. 'duration': duration,
  160. 'categories': categories,
  161. }