import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-07-27/olympics.csv')
print(df.head())
medal_counts = df.groupby('team')['medal'].count().reset_index()
medal_counts = medal_counts.sort_values(by='medal', ascending=False).head(10)
print(medal_counts)
top_countries = medal_counts['team'].head(5)
df_top_countries = df[df['team'].isin(top_countries)]
medals_by_year = df_top_countries.groupby(['year', 'team'])['medal'].count().reset_index()
# Plot: Medals Over Time for Top 5 Countries
plt.figure(figsize=(14, 8))
sns.lineplot(data=medals_by_year, x='year', y='medal', hue='team')
plt.title('Medals Over Time for Top 5 Countries')
plt.xlabel('Year')
plt.ylabel('Number of Medals')
plt.legend(title='Country')
plt.show()
# What is the distribution of medals by sport?
medals_by_sport = df.groupby('sport')['medal'].count().reset_index()
medals_by_sport = medals_by_sport.sort_values(by='medal', ascending=False).head(10)
# Plot: Top 10 Sports by Number of Medals
plt.figure(figsize=(14, 8))
sns.barplot(data=medals_by_sport, x='medal', y='sport', palette='viridis')
plt.title('Top 10 Sports by Number of Medals')
plt.xlabel('Number of Medals')
plt.ylabel('Sport')
plt.show()
#clcoding.com
0 Comments:
Post a Comment