import numpy as np
import pandas as pd
from datetime import datetime
!pip install pycountry
!pip install folium
Requirement already satisfied: pycountry in /home/josh/anaconda3/lib/python3.9/site-packages (22.3.5) Requirement already satisfied: setuptools in /home/josh/anaconda3/lib/python3.9/site-packages (from pycountry) (61.2.0) Collecting folium Downloading folium-0.13.0-py2.py3-none-any.whl (96 kB) |████████████████████████████████| 96 kB 794 kB/s eta 0:00:01 Requirement already satisfied: requests in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (2.27.1) Collecting branca>=0.3.0 Downloading branca-0.5.0-py3-none-any.whl (24 kB) Requirement already satisfied: jinja2>=2.9 in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (2.11.3) Requirement already satisfied: numpy in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (1.21.5) Requirement already satisfied: MarkupSafe>=0.23 in /home/josh/anaconda3/lib/python3.9/site-packages (from jinja2>=2.9->folium) (2.0.1) Requirement already satisfied: charset-normalizer~=2.0.0 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (2.0.4) Requirement already satisfied: certifi>=2017.4.17 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (2021.10.8) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (1.26.9) Requirement already satisfied: idna<4,>=2.5 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (3.3) Installing collected packages: branca, folium Successfully installed branca-0.5.0 folium-0.13.0
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.express as px
import plotly.offline as py
import pycountry
import folium
from folium import plugins
import numpy as np
from PIL import Image
import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML
# Graphics in retina format
%config InlineBackend.figure_format = 'retina'
# Increase the default plot size and set the color scheme
plt.rcParams['figure.figsize'] = 8, 5
# Disable warnings in Anaconda
import warnings
warnings.filterwarnings('ignore')
import os
df = pd.read_csv('Forbes Richest Atheletes (Forbes Richest Athletes 1990-2020).csv')
df.head()
S.NO | Name | Nationality | Current Rank | Previous Year Rank | Sport | Year | earnings ($ million) | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Mike Tyson | USA | 1 | NaN | boxing | 1990 | 28.6 |
1 | 2 | Buster Douglas | USA | 2 | NaN | boxing | 1990 | 26.0 |
2 | 3 | Sugar Ray Leonard | USA | 3 | NaN | boxing | 1990 | 13.0 |
3 | 4 | Ayrton Senna | Brazil | 4 | NaN | auto racing | 1990 | 10.0 |
4 | 5 | Alain Prost | France | 5 | NaN | auto racing | 1990 | 9.0 |
# Creating a copy of the original dataframe-df
df1 = df.copy()
df1.drop('S.NO',axis=1, inplace=True)
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 301 entries, 0 to 300 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 301 non-null object 1 Nationality 301 non-null object 2 Current Rank 301 non-null int64 3 Previous Year Rank 277 non-null object 4 Sport 301 non-null object 5 Year 301 non-null int64 6 earnings ($ million) 301 non-null float64 dtypes: float64(1), int64(2), object(4) memory usage: 16.6+ KB
# Convert string to datetime64
df1['Year'] = df1['Year'].apply(pd.to_datetime, format='%Y')
# Set Date column as the index column
df1['year'] = pd.DatetimeIndex(df1['Year']).year
df1.set_index('year', inplace=True)
df1.drop('Year', axis=1, inplace=True)
# Converting the sport column to uppercase
df1['Sport'] = df1['Sport'].str.upper()
df1.head()
Name | Nationality | Current Rank | Previous Year Rank | Sport | earnings ($ million) | |
---|---|---|---|---|---|---|
year | ||||||
1990 | Mike Tyson | USA | 1 | NaN | BOXING | 28.6 |
1990 | Buster Douglas | USA | 2 | NaN | BOXING | 26.0 |
1990 | Sugar Ray Leonard | USA | 3 | NaN | BOXING | 13.0 |
1990 | Ayrton Senna | Brazil | 4 | NaN | AUTO RACING | 10.0 |
1990 | Alain Prost | France | 5 | NaN | AUTO RACING | 9.0 |
data_2020 = df1[df1.index == 2020]
data_2020.head()
Name | Nationality | Current Rank | Previous Year Rank | Sport | earnings ($ million) | |
---|---|---|---|---|---|---|
year | ||||||
2020 | Roger Federer | Switzerland | 1 | 5 | TENNIS | 106.3 |
2020 | Cristiano Ronaldo | Portugal | 2 | 2 | SOCCER | 105.0 |
2020 | Lionel Messi | Argentina | 3 | 1 | SOCCER | 104.0 |
2020 | Neymar | Brazil | 4 | 3 | SOCCER | 95.5 |
2020 | LeBron James | USA | 5 | 8 | BASKETBALL | 88.2 |
trace = go.Bar(
x = data_2020['earnings ($ million)'],
y = data_2020['Name'],
orientation='h',
marker = dict(color='blue',
line=dict(color='black', width=1))
)
data = [trace]
layout = go.Layout(barmode = "group",title="World's Highest-Paid Athletes in 2020",width=800, height=500,
xaxis= dict(title='No of times ranked higest'),
yaxis=dict(autorange="reversed"),
showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
top_paid_each_year = df1[df1['Current Rank'] == 1].sort_values(by='year', ascending=False)
top_paid_each_year.head()
Name | Nationality | Current Rank | Previous Year Rank | Sport | earnings ($ million) | |
---|---|---|---|---|---|---|
year | ||||||
2020 | Roger Federer | Switzerland | 1 | 5 | TENNIS | 106.3 |
2019 | Lionel Messi | Argentina | 1 | 2 | SOCCER | 127.0 |
2018 | Floyd Mayweather | USA | 1 | >100 | BOXING | 285.0 |
2017 | Cristiano Ronaldo | Portugal | 1 | 1 | SOCCER | 93.0 |
2016 | Cristiano Ronaldo | Portugal | 1 | 3 | SOCCER | 88.0 |
z = top_paid_each_year[['Name', 'Sport', 'Nationality', 'earnings ($ million)']]
z.style.background_gradient(cmap='Reds')
Name | Sport | Nationality | earnings ($ million) | |
---|---|---|---|---|
year | ||||
2020 | Roger Federer | TENNIS | Switzerland | 106.300000 |
2019 | Lionel Messi | SOCCER | Argentina | 127.000000 |
2018 | Floyd Mayweather | BOXING | USA | 285.000000 |
2017 | Cristiano Ronaldo | SOCCER | Portugal | 93.000000 |
2016 | Cristiano Ronaldo | SOCCER | Portugal | 88.000000 |
2015 | Floyd Mayweather | BOXING | USA | 300.000000 |
2014 | Floyd Mayweather | BOXING | USA | 105.000000 |
2013 | Tiger Woods | GOLF | USA | 78.100000 |
2012 | Floyd Mayweather | BOXING | USA | 85.000000 |
2011 | Tiger Woods | GOLF | USA | 75.000000 |
2010 | Tiger Woods | GOLF | USA | 105.000000 |
2009 | Tiger Woods | GOLF | USA | 110.000000 |
2008 | Tiger Woods | GOLF | USA | 115.000000 |
2007 | Tiger Woods | GOLF | USA | 100.000000 |
2006 | Tiger Woods | GOLF | USA | 90.000000 |
2005 | Tiger Woods | GOLF | USA | 87.000000 |
2004 | Tiger Woods | GOLF | USA | 80.300000 |
2003 | Tiger Woods | GOLF | USA | 78.000000 |
2002 | Tiger Woods | GOLF | USA | 69.000000 |
2000 | Michael Schumacher | AUTO RACING | Germany | 59.000000 |
1999 | Michael Schumacher | AUTO RACING | Germany | 49.000000 |
1998 | Michael Jordan | BASKETBALL | USA | 69.000000 |
1997 | Michael Jordan | BASKETBALL | USA | 78.300000 |
1996 | Mike Tyson | BOXING | USA | 75.000000 |
1995 | Michael Jordan | BASKETBALL | USA | 43.900000 |
1994 | Michael Jordan | BASKETBALL | USA | 30.000000 |
1993 | Michael Jordan | BASKETBALL | USA | 36.000000 |
1992 | Michael Jordan | BASKETBALL | USA | 35.900000 |
1991 | Evander Holyfield | BOXING | USA | 60.500000 |
1990 | Mike Tyson | BOXING | USA | 28.600000 |
counts_top = top_paid_each_year['Name'].value_counts().to_frame()
trace = go.Bar(
y = counts_top.index,
x = counts_top['Name'],
orientation='h',
marker = dict(color='blue',
line=dict(color='black', width=1))
)
data = [trace]
layout = go.Layout(barmode = 'group', title='Athlete earing the most maximum numbe of times', width=800, height=500,
xaxis = dict(title='No of times ranked higest'),
yaxis = dict(autorange='reversed'),
showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
# Top Paid Athlete for Each Year
total_earnings = top_paid_each_year.groupby('Name')['earnings ($ million)'].sum().to_frame().reset_index()
top_ranks = top_paid_each_year['Name'].value_counts().to_frame().reset_index()
top_ranks.rename(columns={'index':'Name',
'Name':'Rank_counts'}, inplace=True)
df_compare = total_earnings.merge(top_ranks, on='Name')
import plotly.graph_objs as go
from plotly import tools
trace0 = go.Bar(
y=df_compare['Name'],
x=df_compare['Rank_counts'],
marker=dict(color='rgba(171, 50, 96, 0.6)',line=dict(color='rgba(171, 50, 96, 1.0)',width=1)),
name='Top Ranks',
orientation='h',
)
trace1 = go.Scatter(
y=df_compare['Name'],
x=df_compare['earnings ($ million)'],
mode='lines+markers',
line=dict(color='rgb(63, 72, 204)'),
name='income',
)
layout = dict(
title='Income and Top Ranks',
yaxis=dict(showticklabels=True,domain=[0, 0.85]),
yaxis2=dict(showline=True,showticklabels=False,linecolor='rgba(102, 102, 102, 0.8)',linewidth=2,domain=[0, 0.85]),
xaxis=dict(zeroline=False,showline=False,showticklabels=True,showgrid=True,domain=[0, 0.42]),
xaxis2=dict(zeroline=False,showline=False,showticklabels=False,showgrid=True,domain=[0.47, 1],side='top',dtick=25),
legend=dict(x=0.029,y=1.038,font=dict(size=10) ),
margin=dict(l=200, r=20,t=70,b=70),
paper_bgcolor='rgb(248, 248, 255)',
plot_bgcolor='rgb(248, 248, 255)',
)
annotations = []
y_s = df_compare['Rank_counts']
y_nw = np.rint(df_compare['earnings ($ million)'])
# Adding labels
for ydn, yd, xd in zip(y_nw, y_s, df_compare['Name']):
# labeling the scatter savings
annotations.append(dict(xref='x2', yref='y2', y=xd, x=ydn-1,text='{:,}'.format(ydn),font=dict(family='Arial', size=12,color='rgb(63, 72, 204)'),showarrow=False))
# labeling the bar net worth
annotations.append(dict(xref='x1', yref='y1', y=xd, x=yd+1,text=str(yd),font=dict(family='Arial', size=12,color='rgb(171, 50, 96)'),showarrow=False))
layout['annotations'] = annotations
# Creating two subplots
fig = tools.make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
shared_yaxes=False, vertical_spacing=0.001)
fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)
fig['layout'].update(layout)
fig.show()
counts_top = top_paid_each_year['Nationality'].value_counts().to_frame()
trace = go.Bar(
x = counts_top.index,
y = counts_top['Nationality'] ,
orientation='v',
marker = dict(color='pink',
line=dict(color='black',width=1)),
)
data = [trace]
layout = go.Layout(barmode = "group",title='Country which produces the maximum earners in Sports',width=800, height=500,
xaxis= dict(title='No of times ranked higest'),
#yaxis=dict(autorange="reversed"),
showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
trace = go.Scatter(
x = top_paid_each_year.index,
y = top_paid_each_year['earnings ($ million)'] ,
orientation='v',
marker = dict(color='red',
line=dict(color='royalblue',width=2)),
)
data = [trace]
layout = go.Layout(title='How much did the Top Paid Athlete for Each Year, earn? ',width=800, height=500,
xaxis= dict(title='Years'),
yaxis=dict(title="Earning in US Dollars(million)"),
showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
df['Sport'] = df['Sport'].str.upper() # Converting the text to uppercase
max_sport = df['Sport'].value_counts().to_frame()
trace = go.Bar(
y = max_sport.index,
x = max_sport['Sport'] ,
orientation='h',
marker = dict(color='pink',
line=dict(color='black',width=1)),
)
data = [trace]
layout = go.Layout(barmode = "group",title='Sport which dominates in earnings',width=800, height=500,
xaxis= dict(title='No of times ranked highest'),
yaxis=dict(autorange="reversed"),
showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
max_sport = df['Nationality'].value_counts().to_frame()
trace = go.Bar(
y = max_sport.index,
x = max_sport['Nationality'] ,
orientation='h',
marker = dict(color='pink',
line=dict(color='black',width=1)),
)
data = [trace]
layout = go.Layout(barmode = "group",title='Country which dominates in Sports earningss',width=800, height=500,
xaxis= dict(title='No of times ranked highest'),
yaxis=dict(autorange="reversed"),
showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
s = df['Name'].value_counts().to_frame()[:5]
s.style.background_gradient(cmap='Reds')
Name | |
---|---|
Tiger Woods | 19 |
Michael Jordan | 19 |
Kobe Bryant | 14 |
LeBron James | 13 |
Michael Schumacher | 13 |
# People who have appeared once on the list.
names = df['Name'].value_counts().to_frame()
names[names['Name']==1].index
# On scanning the list, we find the name of a sole women athlete- monica seles
monica = df[df['Name'] == 'Monica Seles']
monica.style.set_properties(**{'background-color': 'pink',
'color': 'black',
'border-color': 'black'})
S.NO | Name | Nationality | Current Rank | Previous Year Rank | Sport | Year | earnings ($ million) | |
---|---|---|---|---|---|---|---|---|
29 | 30 | Monica Seles | USA | 10 | 12 | TENNIS | 1992 | 8.500000 |
top_earners_alltime = pd.pivot_table(df, index='Name',values="earnings ($ million)", aggfunc='sum')
top3_earners_all = top_earners_alltime.sort_values(by="earnings ($ million)",ascending=False)[:3]
top3_earners_all.style.background_gradient(cmap='Reds')
earnings ($ million) | |
---|---|
Name | |
Tiger Woods | 1373.800000 |
LeBron James | 844.800000 |
Floyd Mayweather | 840.000000 |
def earnings_plot(dataframe,athlete,image_path,opacity):
"""
function that creates a plotly line chart with image of the athlete in the background
"""
athlete_df = df1[df1['Name'] == athlete]
trace = go.Scatter(
x = athlete_df.index,
y = athlete_df['earnings ($ million)'] ,
orientation='v',
marker = dict(color='red',
line=dict(color='red',width=6)),
)
data = [trace]
layout= go.Layout(title= f'{athlete}' +"'s earnings over the Years",
xaxis=dict(title='Year'),
yaxis=dict(title="Earnings in US$ (millions)"),
images= [dict(
source=Image.open(image_path),
xref= "paper",
yref= "paper",
x= 0.5,
y= 0.5,
sizex= 1,
sizey= 1,
sizing= "stretch",
opacity= opacity,
xanchor= "center",
yanchor="middle",
layer= "below")])
fig = go.Figure(data = data, layout = layout)
iplot(fig)
image_path = "th.jpeg"
earnings_plot(df1,'Tiger Woods',image_path,opacity=0.3)
image_path = "fm.jpeg"
earnings_plot(df1,'Floyd Mayweather',image_path,opacity=0.2)