import numpy as np
import pandas as pd
from datetime import datetime
!pip install pycountry
!pip install folium

Requirement already satisfied: pycountry in /home/josh/anaconda3/lib/python3.9/site-packages (22.3.5)
Requirement already satisfied: setuptools in /home/josh/anaconda3/lib/python3.9/site-packages (from pycountry) (61.2.0)
Collecting folium
  Downloading folium-0.13.0-py2.py3-none-any.whl (96 kB)
     |████████████████████████████████| 96 kB 794 kB/s eta 0:00:01
Requirement already satisfied: requests in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (2.27.1)
Collecting branca>=0.3.0
  Downloading branca-0.5.0-py3-none-any.whl (24 kB)
Requirement already satisfied: jinja2>=2.9 in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (2.11.3)
Requirement already satisfied: numpy in /home/josh/anaconda3/lib/python3.9/site-packages (from folium) (1.21.5)
Requirement already satisfied: MarkupSafe>=0.23 in /home/josh/anaconda3/lib/python3.9/site-packages (from jinja2>=2.9->folium) (2.0.1)
Requirement already satisfied: charset-normalizer~=2.0.0 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (2.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (2021.10.8)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (1.26.9)
Requirement already satisfied: idna<4,>=2.5 in /home/josh/anaconda3/lib/python3.9/site-packages (from requests->folium) (3.3)
Installing collected packages: branca, folium
Successfully installed branca-0.5.0 folium-0.13.0


import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.express as px
import plotly.offline as py
import pycountry
import folium
from folium import plugins


import numpy as np
from PIL import Image


import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML


# Graphics in retina format
%config InlineBackend.figure_format = 'retina'


# Increase the default plot size and set the color scheme
plt.rcParams['figure.figsize'] = 8, 5


# Disable warnings in Anaconda
import warnings
warnings.filterwarnings('ignore')
import os


df = pd.read_csv('Forbes Richest Atheletes (Forbes Richest Athletes 1990-2020).csv')
df.head()


# Creating a copy of the original dataframe-df
df1 = df.copy()
df1.drop('S.NO',axis=1, inplace=True)
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name                  301 non-null    object 
 1   Nationality           301 non-null    object 
 2   Current Rank          301 non-null    int64  
 3   Previous Year Rank    277 non-null    object 
 4   Sport                 301 non-null    object 
 5   Year                  301 non-null    int64  
 6   earnings ($ million)  301 non-null    float64
dtypes: float64(1), int64(2), object(4)
memory usage: 16.6+ KB


# Convert string to datetime64
df1['Year'] = df1['Year'].apply(pd.to_datetime, format='%Y')

# Set Date column as the index column
df1['year'] = pd.DatetimeIndex(df1['Year']).year
df1.set_index('year', inplace=True)
df1.drop('Year', axis=1, inplace=True)

# Converting the sport column to uppercase
df1['Sport'] = df1['Sport'].str.upper()
df1.head()


data_2020 = df1[df1.index == 2020]
data_2020.head()


trace = go.Bar(
    x = data_2020['earnings ($ million)'],
    y = data_2020['Name'],
    orientation='h',
    marker = dict(color='blue',
                  line=dict(color='black', width=1))
)

data = [trace]

layout = go.Layout(barmode = "group",title="World's Highest-Paid Athletes in 2020",width=800, height=500, 
                       xaxis= dict(title='No of times ranked higest'),
                       yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)


top_paid_each_year = df1[df1['Current Rank'] == 1].sort_values(by='year', ascending=False)
top_paid_each_year.head()


z = top_paid_each_year[['Name', 'Sport', 'Nationality', 'earnings ($ million)']]
z.style.background_gradient(cmap='Reds')


counts_top = top_paid_each_year['Name'].value_counts().to_frame()
trace = go.Bar(
    y = counts_top.index,
    x = counts_top['Name'],
    orientation='h',
    marker = dict(color='blue',
                  line=dict(color='black', width=1))
)

data = [trace]
layout = go.Layout(barmode = 'group', title='Athlete earing the most maximum numbe of times', width=800, height=500,
                   xaxis = dict(title='No of times ranked higest'),
                   yaxis = dict(autorange='reversed'),
                   showlegend=False)

fig = go.Figure(data = data, layout = layout)
iplot(fig)


# Top Paid Athlete for Each Year
total_earnings = top_paid_each_year.groupby('Name')['earnings ($ million)'].sum().to_frame().reset_index()
top_ranks = top_paid_each_year['Name'].value_counts().to_frame().reset_index()
top_ranks.rename(columns={'index':'Name',
                          'Name':'Rank_counts'}, inplace=True)
df_compare = total_earnings.merge(top_ranks, on='Name')


import plotly.graph_objs as go
from plotly import tools
trace0 = go.Bar(
                y=df_compare['Name'],
                x=df_compare['Rank_counts'],
                marker=dict(color='rgba(171, 50, 96, 0.6)',line=dict(color='rgba(171, 50, 96, 1.0)',width=1)),
                name='Top Ranks',
                orientation='h',
)

trace1 = go.Scatter(
                y=df_compare['Name'],
                x=df_compare['earnings ($ million)'],
                mode='lines+markers',
                line=dict(color='rgb(63, 72, 204)'),
                name='income',
)

layout = dict(
                title='Income and Top Ranks',
                yaxis=dict(showticklabels=True,domain=[0, 0.85]),
                yaxis2=dict(showline=True,showticklabels=False,linecolor='rgba(102, 102, 102, 0.8)',linewidth=2,domain=[0, 0.85]),
                xaxis=dict(zeroline=False,showline=False,showticklabels=True,showgrid=True,domain=[0, 0.42]),
                xaxis2=dict(zeroline=False,showline=False,showticklabels=False,showgrid=True,domain=[0.47, 1],side='top',dtick=25),
                legend=dict(x=0.029,y=1.038,font=dict(size=10) ),
                margin=dict(l=200, r=20,t=70,b=70),
                paper_bgcolor='rgb(248, 248, 255)',
                plot_bgcolor='rgb(248, 248, 255)',
)

annotations = []
y_s = df_compare['Rank_counts']
y_nw = np.rint(df_compare['earnings ($ million)'])
# Adding labels
for ydn, yd, xd in zip(y_nw, y_s, df_compare['Name']):
    # labeling the scatter savings
    annotations.append(dict(xref='x2', yref='y2', y=xd, x=ydn-1,text='{:,}'.format(ydn),font=dict(family='Arial', size=12,color='rgb(63, 72, 204)'),showarrow=False))
    # labeling the bar net worth
    annotations.append(dict(xref='x1', yref='y1', y=xd, x=yd+1,text=str(yd),font=dict(family='Arial', size=12,color='rgb(171, 50, 96)'),showarrow=False))

layout['annotations'] = annotations


# Creating two subplots
fig = tools.make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
                          shared_yaxes=False, vertical_spacing=0.001)

fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)

fig['layout'].update(layout)
fig.show()


counts_top = top_paid_each_year['Nationality'].value_counts().to_frame()


trace = go.Bar(
                    x = counts_top.index,
                    y = counts_top['Nationality'] ,
                    orientation='v',
                    marker = dict(color='pink',
                                 line=dict(color='black',width=1)),
                    )
data = [trace]
layout = go.Layout(barmode = "group",title='Country which produces the maximum earners in Sports',width=800, height=500, 
                       xaxis= dict(title='No of times ranked higest'),
                       #yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)


trace = go.Scatter(
                    x = top_paid_each_year.index,
                    y = top_paid_each_year['earnings ($ million)'] ,
                    orientation='v',
                    marker = dict(color='red',
                                 line=dict(color='royalblue',width=2)),
                    )
data = [trace]
layout = go.Layout(title='How much did the Top Paid Athlete for Each Year, earn? ',width=800, height=500, 
                       xaxis= dict(title='Years'),
                       yaxis=dict(title="Earning in US Dollars(million)"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)


df['Sport'] = df['Sport'].str.upper() # Converting the text to uppercase
max_sport = df['Sport'].value_counts().to_frame()

trace = go.Bar(
                    y = max_sport.index,
                    x = max_sport['Sport'] ,
                    orientation='h',
                    marker = dict(color='pink',
                                 line=dict(color='black',width=1)),
                    )
data = [trace]
layout = go.Layout(barmode = "group",title='Sport which dominates in earnings',width=800, height=500, 
                       xaxis= dict(title='No of times ranked highest'),
                       yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)


max_sport = df['Nationality'].value_counts().to_frame()


trace = go.Bar(
                    y = max_sport.index,
                    x = max_sport['Nationality'] ,
                    orientation='h',
                    marker = dict(color='pink',
                                 line=dict(color='black',width=1)),
                    )
data = [trace]
layout = go.Layout(barmode = "group",title='Country which dominates in Sports earningss',width=800, height=500, 
                       xaxis= dict(title='No of times ranked highest'),
                       yaxis=dict(autorange="reversed"),
                       showlegend=False)
fig = go.Figure(data = data, layout = layout)
iplot(fig)


s = df['Name'].value_counts().to_frame()[:5]
s.style.background_gradient(cmap='Reds')


# People who have appeared once on the list.
names = df['Name'].value_counts().to_frame()
names[names['Name']==1].index

# On scanning the list, we find the name of a sole women athlete- monica seles
monica = df[df['Name'] == 'Monica Seles']
monica.style.set_properties(**{'background-color': 'pink',
                            'color': 'black',
                            'border-color': 'black'})


top_earners_alltime = pd.pivot_table(df, index='Name',values="earnings ($ million)", aggfunc='sum')
top3_earners_all = top_earners_alltime.sort_values(by="earnings ($ million)",ascending=False)[:3]

top3_earners_all.style.background_gradient(cmap='Reds')


def earnings_plot(dataframe,athlete,image_path,opacity):
    """
    function that creates a plotly line chart with image of the athlete in the background
    
    
    """
    athlete_df = df1[df1['Name'] == athlete]

    trace = go.Scatter(
                    x = athlete_df.index,
                    y = athlete_df['earnings ($ million)'] ,
                    orientation='v',
                    marker = dict(color='red',
                                 line=dict(color='red',width=6)),
                    )
    data = [trace]
    layout= go.Layout(title= f'{athlete}' +"'s earnings over the Years",
                  xaxis=dict(title='Year'),
                  yaxis=dict(title="Earnings in US$ (millions)"),
                  images= [dict(
                          source=Image.open(image_path),
                          xref= "paper",
                          yref= "paper",
                          x= 0.5,
                          y= 0.5,
                          sizex= 1,
                          sizey= 1,
                          sizing= "stretch",
                          opacity= opacity,
                          xanchor= "center", 
                          yanchor="middle",
                          layer= "below")])
    fig = go.Figure(data = data, layout = layout)
    iplot(fig)


image_path = "th.jpeg"
earnings_plot(df1,'Tiger Woods',image_path,opacity=0.3)


image_path = "fm.jpeg"
earnings_plot(df1,'Floyd Mayweather',image_path,opacity=0.2)

	S.NO	Name	Nationality	Current Rank	Previous Year Rank	Sport	Year	earnings ($ million)
0	1	Mike Tyson	USA	1	NaN	boxing	1990	28.6
1	2	Buster Douglas	USA	2	NaN	boxing	1990	26.0
2	3	Sugar Ray Leonard	USA	3	NaN	boxing	1990	13.0
3	4	Ayrton Senna	Brazil	4	NaN	auto racing	1990	10.0
4	5	Alain Prost	France	5	NaN	auto racing	1990	9.0

	Name	Nationality	Current Rank	Previous Year Rank	Sport	earnings ($ million)
year
1990	Mike Tyson	USA	1	NaN	BOXING	28.6
1990	Buster Douglas	USA	2	NaN	BOXING	26.0
1990	Sugar Ray Leonard	USA	3	NaN	BOXING	13.0
1990	Ayrton Senna	Brazil	4	NaN	AUTO RACING	10.0
1990	Alain Prost	France	5	NaN	AUTO RACING	9.0

	Name	Nationality	Current Rank	Previous Year Rank	Sport	earnings ($ million)
year
2020	Roger Federer	Switzerland	1	5	TENNIS	106.3
2020	Cristiano Ronaldo	Portugal	2	2	SOCCER	105.0
2020	Lionel Messi	Argentina	3	1	SOCCER	104.0
2020	Neymar	Brazil	4	3	SOCCER	95.5
2020	LeBron James	USA	5	8	BASKETBALL	88.2

	Name	Nationality	Current Rank	Previous Year Rank	Sport	earnings ($ million)
year
2020	Roger Federer	Switzerland	1	5	TENNIS	106.3
2019	Lionel Messi	Argentina	1	2	SOCCER	127.0
2018	Floyd Mayweather	USA	1	>100	BOXING	285.0
2017	Cristiano Ronaldo	Portugal	1	1	SOCCER	93.0
2016	Cristiano Ronaldo	Portugal	1	3	SOCCER	88.0

	Name	Sport	Nationality	earnings ($ million)
year
2020	Roger Federer	TENNIS	Switzerland	106.300000
2019	Lionel Messi	SOCCER	Argentina	127.000000
2018	Floyd Mayweather	BOXING	USA	285.000000
2017	Cristiano Ronaldo	SOCCER	Portugal	93.000000
2016	Cristiano Ronaldo	SOCCER	Portugal	88.000000
2015	Floyd Mayweather	BOXING	USA	300.000000
2014	Floyd Mayweather	BOXING	USA	105.000000
2013	Tiger Woods	GOLF	USA	78.100000
2012	Floyd Mayweather	BOXING	USA	85.000000
2011	Tiger Woods	GOLF	USA	75.000000
2010	Tiger Woods	GOLF	USA	105.000000
2009	Tiger Woods	GOLF	USA	110.000000
2008	Tiger Woods	GOLF	USA	115.000000
2007	Tiger Woods	GOLF	USA	100.000000
2006	Tiger Woods	GOLF	USA	90.000000
2005	Tiger Woods	GOLF	USA	87.000000
2004	Tiger Woods	GOLF	USA	80.300000
2003	Tiger Woods	GOLF	USA	78.000000
2002	Tiger Woods	GOLF	USA	69.000000
2000	Michael Schumacher	AUTO RACING	Germany	59.000000
1999	Michael Schumacher	AUTO RACING	Germany	49.000000
1998	Michael Jordan	BASKETBALL	USA	69.000000
1997	Michael Jordan	BASKETBALL	USA	78.300000
1996	Mike Tyson	BOXING	USA	75.000000
1995	Michael Jordan	BASKETBALL	USA	43.900000
1994	Michael Jordan	BASKETBALL	USA	30.000000
1993	Michael Jordan	BASKETBALL	USA	36.000000
1992	Michael Jordan	BASKETBALL	USA	35.900000
1991	Evander Holyfield	BOXING	USA	60.500000
1990	Mike Tyson	BOXING	USA	28.600000

Athletes Project¶

Importing data¶

Highest Paid Athletes¶

Top paid Athlete for each year¶

Athletes with the highest income and maximum number of appearances on the Forbes list¶

Country that produces the maximum income generators in sport.¶

How much do top athletes make each year?¶

Which country generates the most in sports¶

Analyzing the 3 highest paid of all time.¶

Analyzing Tiger Woods' income over the years¶

Analyzing Floyd Mayweather income over the years¶

	Name
Tiger Woods	19
Michael Jordan	19
Kobe Bryant	14
LeBron James	13
Michael Schumacher	13

	earnings ($ million)
Name
Tiger Woods	1373.800000
LeBron James	844.800000
Floyd Mayweather	840.000000