You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.5 KiB

  1. # coding=utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. int_or_none,
  6. unified_strdate,
  7. )
  8. class JpopsukiIE(InfoExtractor):
  9. IE_NAME = 'jpopsuki.tv'
  10. _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
  11. _TEST = {
  12. 'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
  13. 'md5': '88018c0c1a9b1387940e90ec9e7e198e',
  14. 'info_dict': {
  15. 'id': '00be659d23b0b40508169cdee4545771',
  16. 'ext': 'mp4',
  17. 'title': 'ayumi hamasaki - evolution',
  18. 'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
  19. 'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
  20. 'uploader': 'plama_chan',
  21. 'uploader_id': '404',
  22. 'upload_date': '20121101'
  23. }
  24. }
  25. def _real_extract(self, url):
  26. video_id = self._match_id(url)
  27. webpage = self._download_webpage(url, video_id)
  28. video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
  29. r'<source src="(.*?)" type', webpage, 'video url')
  30. video_title = self._og_search_title(webpage)
  31. description = self._og_search_description(webpage)
  32. thumbnail = self._og_search_thumbnail(webpage)
  33. uploader = self._html_search_regex(
  34. r'<li>from: <a href="/user/view/user/(.*?)/uid/',
  35. webpage, 'video uploader', fatal=False)
  36. uploader_id = self._html_search_regex(
  37. r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
  38. webpage, 'video uploader_id', fatal=False)
  39. upload_date = unified_strdate(self._html_search_regex(
  40. r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
  41. fatal=False))
  42. view_count_str = self._html_search_regex(
  43. r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
  44. fatal=False)
  45. comment_count_str = self._html_search_regex(
  46. r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
  47. fatal=False)
  48. return {
  49. 'id': video_id,
  50. 'url': video_url,
  51. 'title': video_title,
  52. 'description': description,
  53. 'thumbnail': thumbnail,
  54. 'uploader': uploader,
  55. 'uploader_id': uploader_id,
  56. 'upload_date': upload_date,
  57. 'view_count': int_or_none(view_count_str),
  58. 'comment_count': int_or_none(comment_count_str),
  59. }