|
@ -1,6 +1,7 @@ |
|
|
# encoding: utf-8 |
|
|
# encoding: utf-8 |
|
|
from __future__ import unicode_literals |
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
|
|
|
|
import collections |
|
|
import re |
|
|
import re |
|
|
import json |
|
|
import json |
|
|
import sys |
|
|
import sys |
|
@ -16,7 +17,6 @@ from ..utils import ( |
|
|
get_element_by_class, |
|
|
get_element_by_class, |
|
|
int_or_none, |
|
|
int_or_none, |
|
|
orderedSet, |
|
|
orderedSet, |
|
|
parse_duration, |
|
|
|
|
|
remove_start, |
|
|
remove_start, |
|
|
str_to_int, |
|
|
str_to_int, |
|
|
unescapeHTML, |
|
|
unescapeHTML, |
|
@ -447,6 +447,9 @@ class VKWallPostIE(VKBaseIE): |
|
|
'skip_download': True, |
|
|
'skip_download': True, |
|
|
}, |
|
|
}, |
|
|
}], |
|
|
}], |
|
|
|
|
|
'params': { |
|
|
|
|
|
'usenetrc': True, |
|
|
|
|
|
}, |
|
|
'skip': 'Requires vk account credentials', |
|
|
'skip': 'Requires vk account credentials', |
|
|
}, { |
|
|
}, { |
|
|
# single YouTube embed, no leading - |
|
|
# single YouTube embed, no leading - |
|
@ -456,6 +459,9 @@ class VKWallPostIE(VKBaseIE): |
|
|
'title': 'Sergey Gorbunov - Wall post 85155021_6319', |
|
|
'title': 'Sergey Gorbunov - Wall post 85155021_6319', |
|
|
}, |
|
|
}, |
|
|
'playlist_count': 1, |
|
|
'playlist_count': 1, |
|
|
|
|
|
'params': { |
|
|
|
|
|
'usenetrc': True, |
|
|
|
|
|
}, |
|
|
'skip': 'Requires vk account credentials', |
|
|
'skip': 'Requires vk account credentials', |
|
|
}, { |
|
|
}, { |
|
|
# wall page URL |
|
|
# wall page URL |
|
@ -483,37 +489,41 @@ class VKWallPostIE(VKBaseIE): |
|
|
raise ExtractorError('VK said: %s' % error, expected=True) |
|
|
raise ExtractorError('VK said: %s' % error, expected=True) |
|
|
|
|
|
|
|
|
description = clean_html(get_element_by_class('wall_post_text', webpage)) |
|
|
description = clean_html(get_element_by_class('wall_post_text', webpage)) |
|
|
uploader = clean_html(get_element_by_class( |
|
|
|
|
|
'fw_post_author', webpage)) or self._og_search_description(webpage) |
|
|
|
|
|
|
|
|
uploader = clean_html(get_element_by_class('author', webpage)) |
|
|
thumbnail = self._og_search_thumbnail(webpage) |
|
|
thumbnail = self._og_search_thumbnail(webpage) |
|
|
|
|
|
|
|
|
entries = [] |
|
|
entries = [] |
|
|
|
|
|
|
|
|
for audio in re.finditer(r'''(?sx) |
|
|
|
|
|
<input[^>]+ |
|
|
|
|
|
id=(?P<q1>["\'])audio_info(?P<id>\d+_\d+).*?(?P=q1)[^>]+ |
|
|
|
|
|
value=(?P<q2>["\'])(?P<url>http.+?)(?P=q2) |
|
|
|
|
|
.+? |
|
|
|
|
|
</table>''', webpage): |
|
|
|
|
|
audio_html = audio.group(0) |
|
|
|
|
|
audio_id = audio.group('id') |
|
|
|
|
|
duration = parse_duration(get_element_by_class('duration', audio_html)) |
|
|
|
|
|
track = self._html_search_regex( |
|
|
|
|
|
r'<span[^>]+id=["\']title%s[^>]*>([^<]+)' % audio_id, |
|
|
|
|
|
audio_html, 'title', default=None) |
|
|
|
|
|
artist = self._html_search_regex( |
|
|
|
|
|
r'>([^<]+)</a></b>\s*&ndash', audio_html, |
|
|
|
|
|
'artist', default=None) |
|
|
|
|
|
entries.append({ |
|
|
|
|
|
'id': audio_id, |
|
|
|
|
|
'url': audio.group('url'), |
|
|
|
|
|
'title': '%s - %s' % (artist, track) if artist and track else audio_id, |
|
|
|
|
|
'thumbnail': thumbnail, |
|
|
|
|
|
'duration': duration, |
|
|
|
|
|
'uploader': uploader, |
|
|
|
|
|
'artist': artist, |
|
|
|
|
|
'track': track, |
|
|
|
|
|
}) |
|
|
|
|
|
|
|
|
audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage) |
|
|
|
|
|
if audio_ids: |
|
|
|
|
|
al_audio = self._download_webpage( |
|
|
|
|
|
'https://vk.com/al_audio.php', post_id, |
|
|
|
|
|
note='Downloading audio info', fatal=False, |
|
|
|
|
|
data=urlencode_postdata({ |
|
|
|
|
|
'act': 'reload_audio', |
|
|
|
|
|
'al': '1', |
|
|
|
|
|
'ids': ','.join(audio_ids) |
|
|
|
|
|
})) |
|
|
|
|
|
if al_audio: |
|
|
|
|
|
Audio = collections.namedtuple( |
|
|
|
|
|
'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration']) |
|
|
|
|
|
audios = self._parse_json( |
|
|
|
|
|
self._search_regex( |
|
|
|
|
|
r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'), |
|
|
|
|
|
post_id, fatal=False, transform_source=unescapeHTML) |
|
|
|
|
|
if isinstance(audios, list): |
|
|
|
|
|
for audio in audios: |
|
|
|
|
|
a = Audio._make(audio[:6]) |
|
|
|
|
|
entries.append({ |
|
|
|
|
|
'id': '%s_%s' % (a.user_id, a.id), |
|
|
|
|
|
'url': a.url, |
|
|
|
|
|
'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id, |
|
|
|
|
|
'thumbnail': thumbnail, |
|
|
|
|
|
'duration': a.duration, |
|
|
|
|
|
'uploader': uploader, |
|
|
|
|
|
'artist': a.artist, |
|
|
|
|
|
'track': a.track, |
|
|
|
|
|
}) |
|
|
|
|
|
|
|
|
for video in re.finditer( |
|
|
for video in re.finditer( |
|
|
r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage): |
|
|
r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage): |
|
|