As of 2022-12-10
Explanation
Our strategy for this query is
Find the channel id
Query the channels.list
resource to get the channel's uploads video playlist id.
Query the playlistItems.list
resource to get a list of all videos in the uploads video playlist.
Query the videos.list
resource to get stats on each video.
Fetch all videos for the channel
Steps 1-3 are covered in detail in the solution for 3Blue1Brown . Here's the code adapted for this example:
from googleapiclient.discovery import build
# Instantiate a googleapiclient.discovery.Resource object for youtube
youtube = build(
serviceName = 'youtube' ,
version = 'v3' ,
developerKey = 'YOURAPIKEY'
)
# Define the request
request = youtube.channels().list(
part = "contentDetails" ,
id = "UCY1kMZp36IQSyNx_9h4mpCg"
)
# Execute the request and save the response
response = request.execute()
item = response[ 'items' ][ 0 ]
# Get the uploads id
uploads_id = item[ 'contentDetails' ][ 'relatedPlaylists' ][ 'uploads' ]
uploads_id
# List to store response objects
responses = []
# Get the first response
request = youtube.playlistItems().list(
part = "contentDetails" ,
playlistId = uploads_id,
maxResults = 50 ,
)
response = request.execute()
responses.append(response)
# Iterate until the "current" response doesn't have nextPageToken
while "nextPageToken" in response:
request = youtube.playlistItems().list(
part = "contentDetails" ,
playlistId = uploads_id,
maxResults = 50 ,
pageToken = response[ 'nextPageToken' ]
)
response = request.execute()
responses.append(response)
# Flatten the nested items lists
items = [item for response in responses for item in response[ 'items' ]]
video_ids = [x[ 'contentDetails' ][ 'videoId' ] for x in items]
Video statistics
At this point, we have a list of video ids
print (video_ids)
# ['DPaWiXZmcb4',
# 'BYVZh5kqaFg',
# 'zAwo5RqhXB4',
# ...
# 'YdJr1FCB0P4',
# '7sj6Gpk3ab4',
# 'V6p5mbp_M98']
and we want to get the views and number of comments for each one. Unfortunately those stats weren't exposed by the playlistItems.list
resource, but we can use the videos.list
resource to get them!
Here's what a sample call looks like.
# Get the first response
request = youtube.videos().list(
part = "contentDetails" ,
id = "DPaWiXZmcb4,BYVZh5kqaFg,zAwo5RqhXB4"
)
response = request.execute()
print (response[ 'items' ])
# [{
# 'kind': 'youtube#video',
# 'etag': 'l1gO7ZuMeuMUsWc1cJ4GzqQFcus',
# 'id': 'DPaWiXZmcb4',
# 'statistics': {
# 'viewCount': '1328594',
# 'likeCount': '57698',
# 'favoriteCount': '0',
# 'commentCount': '553'
# }
# },
# {
# 'kind': 'youtube#video',
# 'etag': 'r7R3YDtgy2yZ6gBBAE79_-vnblU',
# 'id': 'BYVZh5kqaFg',
# 'statistics': {
# 'viewCount': '26657492',
# 'likeCount': '804581',
# 'favoriteCount': '0',
# 'commentCount': '27264'
# }
# },
# {
# 'kind': 'youtube#video',
# 'etag': 'pnkP6uqqcOsjOSS3-FFQPQwr2tg',
# 'id': 'zAwo5RqhXB4',
# 'statistics': {
# 'viewCount': '2056224',
# 'likeCount': '135556',
# 'favoriteCount': '0',
# 'commentCount': '1551'
# }
# }]
Notice how we passed in three video ids as a comma-separated string and we got back a collection of three items.
You cannot request more than 50 ids at a time. I.e. your id string must have 50 or fewer ids within it, even if you set maxResults=50
.
Pagination
We'll extrapolate this example to fetch the details on all videos, querying 50 at a time.
# Chunk the ids into groups of 50
# Convert each group into a comma-separated string
id_strings = [ ',' .join(video_ids[x:x + 50 ]) for x in range ( 0 , len (video_ids), 50 )]
# Fetch each chunk of videos
responses = []
for id_str in id_strings:
request = youtube.videos().list(
part = "statistics,snippet" ,
id = id_str,
maxResults = 50 ,
)
responses.append(request.execute())
# Close the connection
youtube.close()
# Flatten the nested items lists
items = [item for response in responses for item in response[ 'items' ]]
print (items)
# [{
# 'kind': 'youtube#video',
# 'etag': 'O-R4eawcBbku_m4BtGBtiMIOEBs',
# 'id': 'BYVZh5kqaFg',
# 'snippet': {
# 'publishedAt': '2022-11-25T14:59:03Z',
# 'channelId': 'UCY1kMZp36IQSyNx_9h4mpCg',
# 'title': 'Egg Drop From Space',
# 'description': 'Next year we’re doing this on Mars. Ask for the CrunchLabs ... where prohibited.',
# ...
# },
# 'statistics': {
# 'viewCount': '26697713',
# 'likeCount': '805054',
# 'favoriteCount': '0',
# 'commentCount': '27346'
# }
# },
#
# {
# 'kind': 'youtube#video',
# 'etag': 'XpcxC0VgcUxe-R-XU1fwaohI2L8',
# 'id': 'zAwo5RqhXB4',
# 'snippet': {
# 'publishedAt': '2022-10-30T14:58:28Z',
# 'channelId': 'UCY1kMZp36IQSyNx_9h4mpCg',
# 'title': 'I mean, it is trick OR treat @CrunchLabs @MrBeast',
# 'description': '',
# ...
# },
# 'statistics': {
# 'viewCount': '2056803',
# 'likeCount': '135568',
# 'favoriteCount': '0',
# 'commentCount': '1551'
# }
# ...
Identifying the top videos
Now we have the data we need. We just need to manipulate it to extract the most viewed and most commented video.
# Get the index of the video with the most views
views = [ int (x[ 'statistics' ][ 'viewCount' ]) for x in items]
max_views_idx = max ( range ( len (views)), key = views. __getitem__ )
# Get the index of the video with the most comments
comments = [ int (x[ 'statistics' ][ 'commentCount' ]) for x in items]
max_comments_idx = max ( range ( len (comments)), key = comments. __getitem__ )
print ( f "Most viewed video: { items[max_views_idx][ 'snippet' ][ 'title' ] } ( { views[max_views_idx] } views)" )
print ( f "Most commented video: { items[max_comments_idx][ 'snippet' ][ 'title' ] } ( { comments[max_comments_idx] } comments)" )
# Most viewed video: SKIN A WATERMELON party trick (126698582 views)
# Most commented video: Glitter Bomb 1.0 vs Porch Pirates (157006 comments)
from googleapiclient.discovery import build
# Instantiate a googleapiclient.discovery.Resource object for youtube
youtube = build(
serviceName = 'youtube' ,
version = 'v3' ,
developerKey = 'YOURAPIKEY'
)
# Define the request
request = youtube.channels().list(
part = "contentDetails" ,
id = "UCY1kMZp36IQSyNx_9h4mpCg"
)
# Execute the request and save the response
response = request.execute()
item = response[ 'items' ][ 0 ]
# Get the uploads id
uploads_id = item[ 'contentDetails' ][ 'relatedPlaylists' ][ 'uploads' ]
uploads_id
# List to store response objects
responses = []
# Get the first response
request = youtube.playlistItems().list(
part = "contentDetails" ,
playlistId = uploads_id,
maxResults = 50 ,
)
response = request.execute()
responses.append(response)
# Iterate until the "current" response doesn't have nextPageToken
while "nextPageToken" in response:
request = youtube.playlistItems().list(
part = "contentDetails" ,
playlistId = uploads_id,
maxResults = 50 ,
pageToken = response[ 'nextPageToken' ]
)
response = request.execute()
responses.append(response)
# Flatten the nested items lists
items = [item for response in responses for item in response[ 'items' ]]
video_ids = [x[ 'contentDetails' ][ 'videoId' ] for x in items]
# Chunk the ids into
id_strings = [ ',' .join(video_ids[x:x + 50 ]) for x in range ( 0 , len (video_ids), 50 )]
# Fetch each chunk of videos
responses = []
for id_str in id_strings:
request = youtube.videos().list(
part = "statistics,snippet" ,
id = id_str,
maxResults = 50 ,
)
responses.append(request.execute())
# Close the connection
youtube.close()
# Flatten the nested items lists
items = [item for response in responses for item in response[ 'items' ]]
# Get the index of the video with the most views
views = [ int (x[ 'statistics' ][ 'viewCount' ]) for x in items]
max_views_idx = max ( range ( len (views)), key = views. __getitem__ )
# Get the index of the video with the most comments
comments = [ int (x[ 'statistics' ][ 'commentCount' ]) for x in items]
max_comments_idx = max ( range ( len (comments)), key = comments. __getitem__ )
print ( f "Most viewed video: { items[max_views_idx][ 'snippet' ][ 'title' ] } ( { views[max_views_idx] } views)" )
print ( f "Most commented video: { items[max_comments_idx][ 'snippet' ][ 'title' ] } ( { comments[max_comments_idx] } comments)" )
Bonus Solution
The total cost of this query is 7 quota.
1 request to channels.list
x 1 quota per request = 1 quota
3 requests to playlistItems.list
x 1 quota per request = 3 quota
3 requests to videos.list
x 1 quota per request = 3 quota
See here for the quota cost reference table.