Athletes Project¶

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
!pip install pycountry
!pip install folium
Requirement already satisfied: pycountry in /home/josh/anaconda3/lib/python3.9/site-packages (22.3.5)
Requirement already satisfied: setuptools in /home/josh/anaconda3/lib/python3.9/site-packages (from pycountry) (61.2.0)
Collecting folium
  Downloading folium-0.13.0-py2.py3-none-any.whl (96 kB)
     |████████████████████████████████| 96 kB 794 kB/s eta 0:00:01
Requirement already satisfied: requests in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (2.27.1)
Collecting branca>=0.3.0
  Downloading branca-0.5.0-py3-none-any.whl (24 kB)
Requirement already satisfied: jinja2>=2.9 in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (2.11.3)
Requirement already satisfied: numpy in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (1.21.5)
Requirement already satisfied: MarkupSafe>=0.23 in /home/josh/anaconda3/lib/python3.9/site-packages (from jinja2>=2.9->folium) (2.0.1)
Requirement already satisfied: charset-normalizer~=2.0.0 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (2.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (2021.10.8)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (1.26.9)
Requirement already satisfied: idna<4,>=2.5 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (3.3)
Installing collected packages: branca, folium
Successfully installed branca-0.5.0 folium-0.13.0
In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.express as px
import plotly.offline as py
import pycountry
import folium
from folium import plugins
In [4]:
import numpy as np
from PIL import Image
In [5]:
import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML
In [6]:
# Graphics in retina format
%config InlineBackend.figure_format = 'retina'
In [7]:
# Increase the default plot size and set the color scheme
plt.rcParams['figure.figsize'] = 8, 5
In [8]:
# Disable warnings in Anaconda
import warnings
warnings.filterwarnings('ignore')
import os

Importing data¶

In [9]:
df = pd.read_csv('Forbes Richest Atheletes (Forbes Richest Athletes 1990-2020).csv')
df.head()
Out[9]:
S.NO Name Nationality Current Rank Previous Year Rank Sport Year earnings ($ million)
0 1 Mike Tyson USA 1 NaN boxing 1990 28.6
1 2 Buster Douglas USA 2 NaN boxing 1990 26.0
2 3 Sugar Ray Leonard USA 3 NaN boxing 1990 13.0
3 4 Ayrton Senna Brazil 4 NaN auto racing 1990 10.0
4 5 Alain Prost France 5 NaN auto racing 1990 9.0
In [10]:
# Creating a copy of the original dataframe-df
df1 = df.copy()
df1.drop('S.NO',axis=1, inplace=True)
df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name                  301 non-null    object 
 1   Nationality           301 non-null    object 
 2   Current Rank          301 non-null    int64  
 3   Previous Year Rank    277 non-null    object 
 4   Sport                 301 non-null    object 
 5   Year                  301 non-null    int64  
 6   earnings ($ million)  301 non-null    float64
dtypes: float64(1), int64(2), object(4)
memory usage: 16.6+ KB
In [11]:
# Convert string to datetime64
df1['Year'] = df1['Year'].apply(pd.to_datetime, format='%Y')

# Set Date column as the index column
df1['year'] = pd.DatetimeIndex(df1['Year']).year
df1.set_index('year', inplace=True)
df1.drop('Year', axis=1, inplace=True)

# Converting the sport column to uppercase
df1['Sport'] = df1['Sport'].str.upper()
df1.head()
Out[11]:
Name Nationality Current Rank Previous Year Rank Sport earnings ($ million)
year
1990 Mike Tyson USA 1 NaN BOXING 28.6
1990 Buster Douglas USA 2 NaN BOXING 26.0
1990 Sugar Ray Leonard USA 3 NaN BOXING 13.0
1990 Ayrton Senna Brazil 4 NaN AUTO RACING 10.0
1990 Alain Prost France 5 NaN AUTO RACING 9.0

Highest Paid Athletes¶

In [12]:
data_2020 = df1[df1.index == 2020]
data_2020.head()
Out[12]:
Name Nationality Current Rank Previous Year Rank Sport earnings ($ million)
year
2020 Roger Federer Switzerland 1 5 TENNIS 106.3
2020 Cristiano Ronaldo Portugal 2 2 SOCCER 105.0
2020 Lionel Messi Argentina 3 1 SOCCER 104.0
2020 Neymar Brazil 4 3 SOCCER 95.5
2020 LeBron James USA 5 8 BASKETBALL 88.2
In [13]:
trace = go.Bar(
    x = data_2020['earnings ($ million)'],
    y = data_2020['Name'],
    orientation='h',
    marker = dict(color='blue',
                  line=dict(color='black', width=1))
)

data = [trace]

layout = go.Layout(barmode = "group",title="World's Highest-Paid Athletes in 2020",width=800, height=500, 
                       xaxis= dict(title='No of times ranked higest'),
                       yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
020406080100Carson WentzKirk CousinsTiger WoodsKevin DurantStephen CurryLeBron JamesNeymarLionel MessiCristiano RonaldoRoger Federer
World's Highest-Paid Athletes in 2020No of times ranked higest
plotly-logomark

Top paid Athlete for each year¶

In [14]:
top_paid_each_year = df1[df1['Current Rank'] == 1].sort_values(by='year', ascending=False)
top_paid_each_year.head()
Out[14]:
Name Nationality Current Rank Previous Year Rank Sport earnings ($ million)
year
2020 Roger Federer Switzerland 1 5 TENNIS 106.3
2019 Lionel Messi Argentina 1 2 SOCCER 127.0
2018 Floyd Mayweather USA 1 >100 BOXING 285.0
2017 Cristiano Ronaldo Portugal 1 1 SOCCER 93.0
2016 Cristiano Ronaldo Portugal 1 3 SOCCER 88.0
In [15]:
z = top_paid_each_year[['Name', 'Sport', 'Nationality', 'earnings ($ million)']]
z.style.background_gradient(cmap='Reds')
Out[15]:
  Name Sport Nationality earnings ($ million)
year        
2020 Roger Federer TENNIS Switzerland 106.300000
2019 Lionel Messi SOCCER Argentina 127.000000
2018 Floyd Mayweather BOXING USA 285.000000
2017 Cristiano Ronaldo SOCCER Portugal 93.000000
2016 Cristiano Ronaldo SOCCER Portugal 88.000000
2015 Floyd Mayweather BOXING USA 300.000000
2014 Floyd Mayweather BOXING USA 105.000000
2013 Tiger Woods GOLF USA 78.100000
2012 Floyd Mayweather BOXING USA 85.000000
2011 Tiger Woods GOLF USA 75.000000
2010 Tiger Woods GOLF USA 105.000000
2009 Tiger Woods GOLF USA 110.000000
2008 Tiger Woods GOLF USA 115.000000
2007 Tiger Woods GOLF USA 100.000000
2006 Tiger Woods GOLF USA 90.000000
2005 Tiger Woods GOLF USA 87.000000
2004 Tiger Woods GOLF USA 80.300000
2003 Tiger Woods GOLF USA 78.000000
2002 Tiger Woods GOLF USA 69.000000
2000 Michael Schumacher AUTO RACING Germany 59.000000
1999 Michael Schumacher AUTO RACING Germany 49.000000
1998 Michael Jordan BASKETBALL USA 69.000000
1997 Michael Jordan BASKETBALL USA 78.300000
1996 Mike Tyson BOXING USA 75.000000
1995 Michael Jordan BASKETBALL USA 43.900000
1994 Michael Jordan BASKETBALL USA 30.000000
1993 Michael Jordan BASKETBALL USA 36.000000
1992 Michael Jordan BASKETBALL USA 35.900000
1991 Evander Holyfield BOXING USA 60.500000
1990 Mike Tyson BOXING USA 28.600000

Athletes with the highest income and maximum number of appearances on the Forbes list¶

In [16]:
counts_top = top_paid_each_year['Name'].value_counts().to_frame()
trace = go.Bar(
    y = counts_top.index,
    x = counts_top['Name'],
    orientation='h',
    marker = dict(color='blue',
                  line=dict(color='black', width=1))
)

data = [trace]
layout = go.Layout(barmode = 'group', title='Athlete earing the most maximum numbe of times', width=800, height=500,
                   xaxis = dict(title='No of times ranked higest'),
                   yaxis = dict(autorange='reversed'),
                   showlegend=False)

fig = go.Figure(data = data, layout = layout)
iplot(fig)
0246810Evander HolyfieldLionel MessiRoger FedererMike TysonMichael SchumacherCristiano RonaldoFloyd MayweatherMichael JordanTiger Woods
Athlete earing the most maximum numbe of timesNo of times ranked higest
plotly-logomark
In [17]:
# Top Paid Athlete for Each Year
total_earnings = top_paid_each_year.groupby('Name')['earnings ($ million)'].sum().to_frame().reset_index()
top_ranks = top_paid_each_year['Name'].value_counts().to_frame().reset_index()
top_ranks.rename(columns={'index':'Name',
                          'Name':'Rank_counts'}, inplace=True)
df_compare = total_earnings.merge(top_ranks, on='Name')
In [ ]:
import plotly.graph_objs as go
from plotly import tools
trace0 = go.Bar(
                y=df_compare['Name'],
                x=df_compare['Rank_counts'],
                marker=dict(color='rgba(171, 50, 96, 0.6)',line=dict(color='rgba(171, 50, 96, 1.0)',width=1)),
                name='Top Ranks',
                orientation='h',
)

trace1 = go.Scatter(
                y=df_compare['Name'],
                x=df_compare['earnings ($ million)'],
                mode='lines+markers',
                line=dict(color='rgb(63, 72, 204)'),
                name='income',
)

layout = dict(
                title='Income and Top Ranks',
                yaxis=dict(showticklabels=True,domain=[0, 0.85]),
                yaxis2=dict(showline=True,showticklabels=False,linecolor='rgba(102, 102, 102, 0.8)',linewidth=2,domain=[0, 0.85]),
                xaxis=dict(zeroline=False,showline=False,showticklabels=True,showgrid=True,domain=[0, 0.42]),
                xaxis2=dict(zeroline=False,showline=False,showticklabels=False,showgrid=True,domain=[0.47, 1],side='top',dtick=25),
                legend=dict(x=0.029,y=1.038,font=dict(size=10) ),
                margin=dict(l=200, r=20,t=70,b=70),
                paper_bgcolor='rgb(248, 248, 255)',
                plot_bgcolor='rgb(248, 248, 255)',
)

annotations = []
y_s = df_compare['Rank_counts']
y_nw = np.rint(df_compare['earnings ($ million)'])
# Adding labels
for ydn, yd, xd in zip(y_nw, y_s, df_compare['Name']):
    # labeling the scatter savings
    annotations.append(dict(xref='x2', yref='y2', y=xd, x=ydn-1,text='{:,}'.format(ydn),font=dict(family='Arial', size=12,color='rgb(63, 72, 204)'),showarrow=False))
    # labeling the bar net worth
    annotations.append(dict(xref='x1', yref='y1', y=xd, x=yd+1,text=str(yd),font=dict(family='Arial', size=12,color='rgb(171, 50, 96)'),showarrow=False))

layout['annotations'] = annotations


# Creating two subplots
fig = tools.make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
                          shared_yaxes=False, vertical_spacing=0.001)

fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)

fig['layout'].update(layout)
fig.show()

Country that produces the maximum income generators in sport.¶

In [18]:
counts_top = top_paid_each_year['Nationality'].value_counts().to_frame()


trace = go.Bar(
                    x = counts_top.index,
                    y = counts_top['Nationality'] ,
                    orientation='v',
                    marker = dict(color='pink',
                                 line=dict(color='black',width=1)),
                    )
data = [trace]
layout = go.Layout(barmode = "group",title='Country which produces the maximum earners in Sports',width=800, height=500, 
                       xaxis= dict(title='No of times ranked higest'),
                       #yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
USAPortugalGermanySwitzerlandArgentina0510152025
Country which produces the maximum earners in SportsNo of times ranked higest
plotly-logomark

How much do top athletes make each year?¶

In [19]:
trace = go.Scatter(
                    x = top_paid_each_year.index,
                    y = top_paid_each_year['earnings ($ million)'] ,
                    orientation='v',
                    marker = dict(color='red',
                                 line=dict(color='royalblue',width=2)),
                    )
data = [trace]
layout = go.Layout(title='How much did the Top Paid Athlete for Each Year, earn? ',width=800, height=500, 
                       xaxis= dict(title='Years'),
                       yaxis=dict(title="Earning in US Dollars(million)"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
199019952000200520102015202050100150200250300
How much did the Top Paid Athlete for Each Year, earn?YearsEarning in US Dollars(million)
plotly-logomark
In [20]:
df['Sport'] = df['Sport'].str.upper() # Converting the text to uppercase
max_sport = df['Sport'].value_counts().to_frame()

trace = go.Bar(
                    y = max_sport.index,
                    x = max_sport['Sport'] ,
                    orientation='h',
                    marker = dict(color='pink',
                                 line=dict(color='black',width=1)),
                    )
data = [trace]
layout = go.Layout(barmode = "group",title='Sport which dominates in earnings',width=800, height=500, 
                       xaxis= dict(title='No of times ranked highest'),
                       yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
020406080MMACYCLINGNBAHOCKEYAMERICAN FOOTBALL / BASEBALLAUTO RACING (NASCAR)NFLICE HOCKEYNASCARMOTORCYCLE GPF1 MOTORSPORTSBASEBALLF1 RACINGAMERICAN FOOTBALLAUTO RACINGTENNISSOCCERGOLFBOXINGBASKETBALL
Sport which dominates in earningsNo of times ranked highest
plotly-logomark

Which country generates the most in sports¶

In [21]:
max_sport = df['Nationality'].value_counts().to_frame()


trace = go.Bar(
                    y = max_sport.index,
                    x = max_sport['Nationality'] ,
                    orientation='h',
                    marker = dict(color='pink',
                                 line=dict(color='black',width=1)),
                    )
data = [trace]
layout = go.Layout(barmode = "group",title='Country which dominates in Sports earningss',width=800, height=500, 
                       xaxis= dict(title='No of times ranked highest'),
                       yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)
050100150200MexicoIrelandNorthern IrelandSerbiaSpainFilipinoAustriaDominicanAustraliaRussiaPhilippinesFranceFinlandItalyCanadaArgentinaBrazilPortugalSwitzerlandGermanyUKUSA
Country which dominates in Sports earningssNo of times ranked highest
plotly-logomark
In [22]:
s = df['Name'].value_counts().to_frame()[:5]
s.style.background_gradient(cmap='Reds')  
Out[22]:
  Name
Tiger Woods 19
Michael Jordan 19
Kobe Bryant 14
LeBron James 13
Michael Schumacher 13
In [23]:
# People who have appeared once on the list.
names = df['Name'].value_counts().to_frame()
names[names['Name']==1].index

# On scanning the list, we find the name of a sole women athlete- monica seles
monica = df[df['Name'] == 'Monica Seles']
monica.style.set_properties(**{'background-color': 'pink',
                            'color': 'black',
                            'border-color': 'black'})
Out[23]:
  S.NO Name Nationality Current Rank Previous Year Rank Sport Year earnings ($ million)
29 30 Monica Seles USA 10 12 TENNIS 1992 8.500000

Analyzing the 3 highest paid of all time.¶

In [24]:
top_earners_alltime = pd.pivot_table(df, index='Name',values="earnings ($ million)", aggfunc='sum')
top3_earners_all = top_earners_alltime.sort_values(by="earnings ($ million)",ascending=False)[:3]

top3_earners_all.style.background_gradient(cmap='Reds')  
Out[24]:
  earnings ($ million)
Name  
Tiger Woods 1373.800000
LeBron James 844.800000
Floyd Mayweather 840.000000

Analyzing Tiger Woods' income over the years¶

In [25]:
def earnings_plot(dataframe,athlete,image_path,opacity):
    """
    function that creates a plotly line chart with image of the athlete in the background
    
    
    """
    athlete_df = df1[df1['Name'] == athlete]

    trace = go.Scatter(
                    x = athlete_df.index,
                    y = athlete_df['earnings ($ million)'] ,
                    orientation='v',
                    marker = dict(color='red',
                                 line=dict(color='red',width=6)),
                    )
    data = [trace]
    layout= go.Layout(title= f'{athlete}' +"'s earnings over the Years",
                  xaxis=dict(title='Year'),
                  yaxis=dict(title="Earnings in US$ (millions)"),
                  images= [dict(
                          source=Image.open(image_path),
                          xref= "paper",
                          yref= "paper",
                          x= 0.5,
                          y= 0.5,
                          sizex= 1,
                          sizey= 1,
                          sizing= "stretch",
                          opacity= opacity,
                          xanchor= "center", 
                          yanchor="middle",
                          layer= "below")])
    fig = go.Figure(data = data, layout = layout)
    iplot(fig)
In [27]:
image_path = "th.jpeg"
earnings_plot(df1,'Tiger Woods',image_path,opacity=0.3)
2000200520102015202020406080100120
Tiger Woods's earnings over the YearsYearEarnings in US$ (millions)
plotly-logomark

Analyzing Floyd Mayweather income over the years¶

In [28]:
image_path = "fm.jpeg"
earnings_plot(df1,'Floyd Mayweather',image_path,opacity=0.2)