You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
3.7 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_urllib_parse,
  7. compat_urlparse,
  8. )
  9. from ..utils import (
  10. ExtractorError,
  11. )
  12. class NaverIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
  14. _TESTS = [{
  15. 'url': 'http://tvcast.naver.com/v/81652',
  16. 'info_dict': {
  17. 'id': '81652',
  18. 'ext': 'mp4',
  19. 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
  20. 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
  21. 'upload_date': '20130903',
  22. },
  23. }, {
  24. 'url': 'http://tvcast.naver.com/v/395837',
  25. 'md5': '638ed4c12012c458fefcddfd01f173cd',
  26. 'info_dict': {
  27. 'id': '395837',
  28. 'ext': 'mp4',
  29. 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
  30. 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
  31. 'upload_date': '20150519',
  32. },
  33. 'skip': 'Georestricted',
  34. }]
  35. def _real_extract(self, url):
  36. video_id = self._match_id(url)
  37. webpage = self._download_webpage(url, video_id)
  38. m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
  39. webpage)
  40. if m_id is None:
  41. error = self._html_search_regex(
  42. r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
  43. webpage, 'error', default=None)
  44. if error:
  45. raise ExtractorError(error, expected=True)
  46. raise ExtractorError('couldn\'t extract vid and key')
  47. vid = m_id.group(1)
  48. key = m_id.group(2)
  49. query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, })
  50. query_urls = compat_urllib_parse.urlencode({
  51. 'masterVid': vid,
  52. 'protocol': 'p2p',
  53. 'inKey': key,
  54. })
  55. info = self._download_xml(
  56. 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
  57. video_id, 'Downloading video info')
  58. urls = self._download_xml(
  59. 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
  60. video_id, 'Downloading video formats info')
  61. formats = []
  62. for format_el in urls.findall('EncodingOptions/EncodingOption'):
  63. domain = format_el.find('Domain').text
  64. uri = format_el.find('uri').text
  65. f = {
  66. 'url': compat_urlparse.urljoin(domain, uri),
  67. 'ext': 'mp4',
  68. 'width': int(format_el.find('width').text),
  69. 'height': int(format_el.find('height').text),
  70. }
  71. if domain.startswith('rtmp'):
  72. # urlparse does not support custom schemes
  73. # https://bugs.python.org/issue18828
  74. f.update({
  75. 'url': domain + uri,
  76. 'ext': 'flv',
  77. 'rtmp_protocol': '1', # rtmpt
  78. })
  79. formats.append(f)
  80. self._sort_formats(formats)
  81. return {
  82. 'id': video_id,
  83. 'title': info.find('Subject').text,
  84. 'formats': formats,
  85. 'description': self._og_search_description(webpage),
  86. 'thumbnail': self._og_search_thumbnail(webpage),
  87. 'upload_date': info.find('WriteDate').text.replace('.', ''),
  88. 'view_count': int(info.find('PlayCount').text),
  89. }