|
| 1 | +"""Module for interacting with YouTube search.""" |
| 2 | +# Native python imports |
| 3 | +import logging |
| 4 | + |
| 5 | +# Local imports |
| 6 | +from pytube import YouTube |
| 7 | +from pytube.innertube import InnerTube |
| 8 | + |
| 9 | + |
| 10 | +logger = logging.getLogger(__name__) |
| 11 | + |
| 12 | + |
| 13 | +class Search: |
| 14 | + def __init__(self, query): |
| 15 | + """Initialize Search object. |
| 16 | +
|
| 17 | + :param str query: |
| 18 | + Search query provided by the user. |
| 19 | + """ |
| 20 | + self.query = query |
| 21 | + self._innertube_client = InnerTube() |
| 22 | + |
| 23 | + # The first search, without a continuation, is structured differently |
| 24 | + # and contains completion suggestions, so we must store this separately |
| 25 | + self._initial_results = None |
| 26 | + |
| 27 | + self._results = None |
| 28 | + self._completion_suggestions = None |
| 29 | + |
| 30 | + # Used for keeping track of query continuations so that new results |
| 31 | + # are always returned when get_next_results() is called |
| 32 | + self._current_continuation = None |
| 33 | + |
| 34 | + @property |
| 35 | + def completion_suggestions(self): |
| 36 | + """Return query autocompletion suggestions for the query. |
| 37 | +
|
| 38 | + :rtype: list |
| 39 | + :returns: |
| 40 | + A list of autocomplete suggestions provided by YouTube for the query. |
| 41 | + """ |
| 42 | + if self._completion_suggestions: |
| 43 | + return self._completion_suggestions |
| 44 | + if self.results: |
| 45 | + self._completion_suggestions = self._initial_results['refinements'] |
| 46 | + return self._completion_suggestions |
| 47 | + |
| 48 | + @property |
| 49 | + def results(self): |
| 50 | + """Return search results. |
| 51 | +
|
| 52 | + On first call, will generate and return the first set of results. |
| 53 | + Additional results can be generated using ``.get_next_results()``. |
| 54 | +
|
| 55 | + :rtype: list |
| 56 | + :returns: |
| 57 | + A list of YouTube objects. |
| 58 | + """ |
| 59 | + if self._results: |
| 60 | + return self._results |
| 61 | + |
| 62 | + videos, continuation = self.fetch_and_parse() |
| 63 | + self._results = videos |
| 64 | + self._current_continuation = continuation |
| 65 | + return self._results |
| 66 | + |
| 67 | + def get_next_results(self): |
| 68 | + """Use the stored continuation string to fetch the next set of results. |
| 69 | +
|
| 70 | + This method does not return the results, but instead updates the results property. |
| 71 | + """ |
| 72 | + if self._current_continuation: |
| 73 | + videos, continuation = self.fetch_and_parse(self._current_continuation) |
| 74 | + self._results.extend(videos) |
| 75 | + self._current_continuation = continuation |
| 76 | + else: |
| 77 | + raise IndexError |
| 78 | + |
| 79 | + def fetch_and_parse(self, continuation=None): |
| 80 | + """Fetch from the innertube API and parse the results. |
| 81 | +
|
| 82 | + :param str continuation: |
| 83 | + Continuation string for fetching results. |
| 84 | + :rtype: tuple |
| 85 | + :returns: |
| 86 | + A tuple of a list of YouTube objects and a continuation string. |
| 87 | + """ |
| 88 | + # Begin by executing the query and identifying the relevant sections |
| 89 | + # of the results |
| 90 | + raw_results = self.fetch_query(continuation) |
| 91 | + |
| 92 | + # Initial result is handled by try block, continuations by except block |
| 93 | + try: |
| 94 | + sections = raw_results['contents']['twoColumnSearchResultsRenderer'][ |
| 95 | + 'primaryContents']['sectionListRenderer']['contents'] |
| 96 | + except KeyError: |
| 97 | + sections = raw_results['onResponseReceivedCommands'][0][ |
| 98 | + 'appendContinuationItemsAction']['continuationItems'] |
| 99 | + item_renderer = None |
| 100 | + continuation_renderer = None |
| 101 | + for s in sections: |
| 102 | + if 'itemSectionRenderer' in s: |
| 103 | + item_renderer = s['itemSectionRenderer'] |
| 104 | + if 'continuationItemRenderer' in s: |
| 105 | + continuation_renderer = s['continuationItemRenderer'] |
| 106 | + |
| 107 | + # If the continuationItemRenderer doesn't exist, assume no further results |
| 108 | + if continuation_renderer: |
| 109 | + next_continuation = continuation_renderer['continuationEndpoint'][ |
| 110 | + 'continuationCommand']['token'] |
| 111 | + else: |
| 112 | + next_continuation = None |
| 113 | + |
| 114 | + # If the itemSectionRenderer doesn't exist, assume no results. |
| 115 | + if item_renderer: |
| 116 | + videos = [] |
| 117 | + raw_video_list = item_renderer['contents'] |
| 118 | + for video_details in raw_video_list: |
| 119 | + # Skip over ads |
| 120 | + if video_details.get('searchPyvRenderer', {}).get('ads', None): |
| 121 | + continue |
| 122 | + |
| 123 | + # Skip "recommended" type videos e.g. "people also watched" and "popular X" |
| 124 | + # that break up the search results |
| 125 | + if 'shelfRenderer' in video_details: |
| 126 | + continue |
| 127 | + |
| 128 | + # Skip auto-generated "mix" playlist results |
| 129 | + if 'radioRenderer' in video_details: |
| 130 | + continue |
| 131 | + |
| 132 | + # Skip playlist results |
| 133 | + if 'playlistRenderer' in video_details: |
| 134 | + continue |
| 135 | + |
| 136 | + # Skip channel results |
| 137 | + if 'channelRenderer' in video_details: |
| 138 | + continue |
| 139 | + |
| 140 | + if 'videoRenderer' not in video_details: |
| 141 | + logger.warn('Unexpected renderer encountered.') |
| 142 | + logger.warn(f'Renderer name: {video_details.keys()}') |
| 143 | + logger.warn(f'Search term: {self.query}') |
| 144 | + logger.warn( |
| 145 | + 'Please open an issue at ' |
| 146 | + 'https://github.com/pytube/pytube/issues ' |
| 147 | + 'and provide this log output.' |
| 148 | + ) |
| 149 | + continue |
| 150 | + |
| 151 | + # Extract relevant video information from the details. |
| 152 | + # Some of this can be used to pre-populate attributes of the |
| 153 | + # YouTube object. |
| 154 | + vid_renderer = video_details['videoRenderer'] |
| 155 | + vid_id = vid_renderer['videoId'] |
| 156 | + vid_url = f'https://www.youtube.com/watch?v={vid_id}' |
| 157 | + vid_title = vid_renderer['title']['runs'][0]['text'] |
| 158 | + vid_channel_name = vid_renderer['ownerText']['runs'][0]['text'] |
| 159 | + vid_channel_uri = vid_renderer['ownerText']['runs'][0][ |
| 160 | + 'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] |
| 161 | + # Livestreams have "runs", non-livestreams have "simpleText", |
| 162 | + # and scheduled releases do not have 'viewCountText' |
| 163 | + if 'viewCountText' in vid_renderer: |
| 164 | + if 'runs' in vid_renderer['viewCountText']: |
| 165 | + vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text'] |
| 166 | + else: |
| 167 | + vid_view_count_text = vid_renderer['viewCountText']['simpleText'] |
| 168 | + # Strip ' views' text, then remove commas |
| 169 | + vid_view_count = int(vid_view_count_text.split()[0].replace(',','')) |
| 170 | + else: |
| 171 | + vid_view_count = 0 |
| 172 | + if 'lengthText' in vid_renderer: |
| 173 | + vid_length = vid_renderer['lengthText']['simpleText'] |
| 174 | + else: |
| 175 | + vid_length = None |
| 176 | + |
| 177 | + vid_metadata = { |
| 178 | + 'id': vid_id, |
| 179 | + 'url': vid_url, |
| 180 | + 'title': vid_title, |
| 181 | + 'channel_name': vid_channel_name, |
| 182 | + 'channel_url': vid_channel_uri, |
| 183 | + 'view_count': vid_view_count, |
| 184 | + 'length': vid_length |
| 185 | + } |
| 186 | + |
| 187 | + # Construct YouTube object from metadata and append to results |
| 188 | + vid = YouTube(vid_metadata['url']) |
| 189 | + vid.author = vid_metadata['channel_name'] |
| 190 | + vid.title = vid_metadata['title'] |
| 191 | + videos.append(vid) |
| 192 | + else: |
| 193 | + videos = None |
| 194 | + |
| 195 | + return videos, next_continuation |
| 196 | + |
| 197 | + def fetch_query(self, continuation=None): |
| 198 | + """Fetch raw results from the innertube API. |
| 199 | +
|
| 200 | + :param str continuation: |
| 201 | + Continuation string for fetching results. |
| 202 | + :rtype: dict |
| 203 | + :returns: |
| 204 | + The raw json object returned by the innertube API. |
| 205 | + """ |
| 206 | + query_results = self._innertube_client.search(self.query, continuation) |
| 207 | + if not self._initial_results: |
| 208 | + self._initial_results = query_results |
| 209 | + return query_results # noqa:R504 |
0 commit comments