Skip to content

Commit 181c88c

Browse files
authored
[DRAFT] Feature/search (pytube#1030)
* Added search functionality. * Added repr method to YouTube to make it more useful. * Added some docstrings + comments for clarity.
1 parent 257e6d2 commit 181c88c

File tree

7 files changed

+277
-2
lines changed

7 files changed

+277
-2
lines changed

docs/api.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ CaptionQuery Object
5555
:members:
5656
:inherited-members:
5757

58+
Search Object
59+
-------------
60+
61+
.. autoclass:: pytube.contrib.search.Search
62+
:members:
63+
:inherited-members:
64+
5865
Extract
5966
-------
6067

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ of pytube.
5959
user/captions
6060
user/playlist
6161
user/channel
62+
user/search
6263
user/cli
6364
user/exceptions
6465

docs/user/search.rst

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
.. _search:
2+
3+
Using the search feature
4+
========================
5+
6+
Pytube includes functionality to search YouTube and return results almost
7+
identical to those you would find using the search bar on YouTube's website.
8+
The integration into pytube means that we can directly provide you with
9+
YouTube objects that can be inspected and dowloaded, instead of needing to do
10+
additional processing.
11+
12+
Using the Search object is really easy::
13+
14+
>>> from pytube import Search
15+
>>> s = Search('YouTube Rewind')
16+
>>> len(s.results)
17+
17
18+
>>> s.results
19+
[\
20+
<pytube.__main__.YouTube object: videoId=YbJOTdZBX1g>, \
21+
<pytube.__main__.YouTube object: videoId=PKtnafFtfEo>, \
22+
...\
23+
]
24+
>>>
25+
26+
Due to the potential for an endless stream of results, and in order to prevent
27+
a user from accidentally entering an infinite loop of requesting additional
28+
results, the ``.results`` attribute will only ever request the first set of
29+
search results. Additional results can be explicitly requested by using the
30+
``.get_next_results()`` method, which will append any additional results to
31+
the ``.results`` attribute::
32+
33+
>>> s.get_next_results()
34+
>>> len(s.results)
35+
34
36+
>>>
37+
38+
Additional functionality
39+
========================
40+
41+
In addition to the basic search functionality which returns YouTube objects,
42+
searches also have associated autocomplete suggestions. These can be accessed
43+
as follows::
44+
45+
>>> s.completion_suggestions
46+
[\
47+
'can this video get 1 million dislikes', \
48+
'youtube rewind 2020 musical', \
49+
...\
50+
]

pytube/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@
1616
from pytube.__main__ import YouTube
1717
from pytube.contrib.playlist import Playlist
1818
from pytube.contrib.channel import Channel
19+
from pytube.contrib.search import Search

pytube/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ def __init__(
8585
self._title = None
8686
self._publish_date = None
8787

88+
def __repr__(self):
89+
return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
90+
8891
@property
8992
def watch_html(self):
9093
if self._watch_html:

pytube/contrib/search.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
"""Module for interacting with YouTube search."""
2+
# Native python imports
3+
import logging
4+
5+
# Local imports
6+
from pytube import YouTube
7+
from pytube.innertube import InnerTube
8+
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
class Search:
14+
def __init__(self, query):
15+
"""Initialize Search object.
16+
17+
:param str query:
18+
Search query provided by the user.
19+
"""
20+
self.query = query
21+
self._innertube_client = InnerTube()
22+
23+
# The first search, without a continuation, is structured differently
24+
# and contains completion suggestions, so we must store this separately
25+
self._initial_results = None
26+
27+
self._results = None
28+
self._completion_suggestions = None
29+
30+
# Used for keeping track of query continuations so that new results
31+
# are always returned when get_next_results() is called
32+
self._current_continuation = None
33+
34+
@property
35+
def completion_suggestions(self):
36+
"""Return query autocompletion suggestions for the query.
37+
38+
:rtype: list
39+
:returns:
40+
A list of autocomplete suggestions provided by YouTube for the query.
41+
"""
42+
if self._completion_suggestions:
43+
return self._completion_suggestions
44+
if self.results:
45+
self._completion_suggestions = self._initial_results['refinements']
46+
return self._completion_suggestions
47+
48+
@property
49+
def results(self):
50+
"""Return search results.
51+
52+
On first call, will generate and return the first set of results.
53+
Additional results can be generated using ``.get_next_results()``.
54+
55+
:rtype: list
56+
:returns:
57+
A list of YouTube objects.
58+
"""
59+
if self._results:
60+
return self._results
61+
62+
videos, continuation = self.fetch_and_parse()
63+
self._results = videos
64+
self._current_continuation = continuation
65+
return self._results
66+
67+
def get_next_results(self):
68+
"""Use the stored continuation string to fetch the next set of results.
69+
70+
This method does not return the results, but instead updates the results property.
71+
"""
72+
if self._current_continuation:
73+
videos, continuation = self.fetch_and_parse(self._current_continuation)
74+
self._results.extend(videos)
75+
self._current_continuation = continuation
76+
else:
77+
raise IndexError
78+
79+
def fetch_and_parse(self, continuation=None):
80+
"""Fetch from the innertube API and parse the results.
81+
82+
:param str continuation:
83+
Continuation string for fetching results.
84+
:rtype: tuple
85+
:returns:
86+
A tuple of a list of YouTube objects and a continuation string.
87+
"""
88+
# Begin by executing the query and identifying the relevant sections
89+
# of the results
90+
raw_results = self.fetch_query(continuation)
91+
92+
# Initial result is handled by try block, continuations by except block
93+
try:
94+
sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
95+
'primaryContents']['sectionListRenderer']['contents']
96+
except KeyError:
97+
sections = raw_results['onResponseReceivedCommands'][0][
98+
'appendContinuationItemsAction']['continuationItems']
99+
item_renderer = None
100+
continuation_renderer = None
101+
for s in sections:
102+
if 'itemSectionRenderer' in s:
103+
item_renderer = s['itemSectionRenderer']
104+
if 'continuationItemRenderer' in s:
105+
continuation_renderer = s['continuationItemRenderer']
106+
107+
# If the continuationItemRenderer doesn't exist, assume no further results
108+
if continuation_renderer:
109+
next_continuation = continuation_renderer['continuationEndpoint'][
110+
'continuationCommand']['token']
111+
else:
112+
next_continuation = None
113+
114+
# If the itemSectionRenderer doesn't exist, assume no results.
115+
if item_renderer:
116+
videos = []
117+
raw_video_list = item_renderer['contents']
118+
for video_details in raw_video_list:
119+
# Skip over ads
120+
if video_details.get('searchPyvRenderer', {}).get('ads', None):
121+
continue
122+
123+
# Skip "recommended" type videos e.g. "people also watched" and "popular X"
124+
# that break up the search results
125+
if 'shelfRenderer' in video_details:
126+
continue
127+
128+
# Skip auto-generated "mix" playlist results
129+
if 'radioRenderer' in video_details:
130+
continue
131+
132+
# Skip playlist results
133+
if 'playlistRenderer' in video_details:
134+
continue
135+
136+
# Skip channel results
137+
if 'channelRenderer' in video_details:
138+
continue
139+
140+
if 'videoRenderer' not in video_details:
141+
logger.warn('Unexpected renderer encountered.')
142+
logger.warn(f'Renderer name: {video_details.keys()}')
143+
logger.warn(f'Search term: {self.query}')
144+
logger.warn(
145+
'Please open an issue at '
146+
'https://github.com/pytube/pytube/issues '
147+
'and provide this log output.'
148+
)
149+
continue
150+
151+
# Extract relevant video information from the details.
152+
# Some of this can be used to pre-populate attributes of the
153+
# YouTube object.
154+
vid_renderer = video_details['videoRenderer']
155+
vid_id = vid_renderer['videoId']
156+
vid_url = f'https://www.youtube.com/watch?v={vid_id}'
157+
vid_title = vid_renderer['title']['runs'][0]['text']
158+
vid_channel_name = vid_renderer['ownerText']['runs'][0]['text']
159+
vid_channel_uri = vid_renderer['ownerText']['runs'][0][
160+
'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
161+
# Livestreams have "runs", non-livestreams have "simpleText",
162+
# and scheduled releases do not have 'viewCountText'
163+
if 'viewCountText' in vid_renderer:
164+
if 'runs' in vid_renderer['viewCountText']:
165+
vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text']
166+
else:
167+
vid_view_count_text = vid_renderer['viewCountText']['simpleText']
168+
# Strip ' views' text, then remove commas
169+
vid_view_count = int(vid_view_count_text.split()[0].replace(',',''))
170+
else:
171+
vid_view_count = 0
172+
if 'lengthText' in vid_renderer:
173+
vid_length = vid_renderer['lengthText']['simpleText']
174+
else:
175+
vid_length = None
176+
177+
vid_metadata = {
178+
'id': vid_id,
179+
'url': vid_url,
180+
'title': vid_title,
181+
'channel_name': vid_channel_name,
182+
'channel_url': vid_channel_uri,
183+
'view_count': vid_view_count,
184+
'length': vid_length
185+
}
186+
187+
# Construct YouTube object from metadata and append to results
188+
vid = YouTube(vid_metadata['url'])
189+
vid.author = vid_metadata['channel_name']
190+
vid.title = vid_metadata['title']
191+
videos.append(vid)
192+
else:
193+
videos = None
194+
195+
return videos, next_continuation
196+
197+
def fetch_query(self, continuation=None):
198+
"""Fetch raw results from the innertube API.
199+
200+
:param str continuation:
201+
Continuation string for fetching results.
202+
:rtype: dict
203+
:returns:
204+
The raw json object returned by the innertube API.
205+
"""
206+
query_results = self._innertube_client.search(self.query, continuation)
207+
if not self._initial_results:
208+
self._initial_results = query_results
209+
return query_results # noqa:R504

pytube/innertube.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def player(self, video_id):
103103
query.update(self.base_params)
104104
return self._call_api(endpoint, query, self.base_data)
105105

106-
def search(self, search_query):
106+
def search(self, search_query, continuation=None):
107107
"""Make a request to the search endpoint.
108108
109109
:param str search_query:
@@ -117,4 +117,8 @@ def search(self, search_query):
117117
'query': search_query
118118
}
119119
query.update(self.base_params)
120-
return self._call_api(endpoint, query, self.base_data)
120+
data = {}
121+
if continuation:
122+
data['continuation'] = continuation
123+
data.update(self.base_data)
124+
return self._call_api(endpoint, query, data)

0 commit comments

Comments
 (0)