import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv("./IPL_Matches_2008_2020.csv")
data.head()
id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | method | umpire1 | umpire2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 335982 | Bangalore | 2008-04-18 | BB McCullum | M Chinnaswamy Stadium | 0 | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | NaN | Asad Rauf | RE Koertzen |
1 | 335983 | Chandigarh | 2008-04-19 | MEK Hussey | Punjab Cricket Association Stadium, Mohali | 0 | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | NaN | MR Benson | SL Shastri |
2 | 335984 | Delhi | 2008-04-19 | MF Maharoof | Feroz Shah Kotla | 0 | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | NaN | Aleem Dar | GA Pratapkumar |
3 | 335985 | Mumbai | 2008-04-20 | MV Boucher | Wankhede Stadium | 0 | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | NaN | SJ Davis | DJ Harper |
4 | 335986 | Kolkata | 2008-04-20 | DJ Hussey | Eden Gardens | 0 | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | NaN | BF Bowden | K Hariharan |
data.head(-5)
id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | method | umpire1 | umpire2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 335982 | Bangalore | 2008-04-18 | BB McCullum | M Chinnaswamy Stadium | 0 | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | NaN | Asad Rauf | RE Koertzen |
1 | 335983 | Chandigarh | 2008-04-19 | MEK Hussey | Punjab Cricket Association Stadium, Mohali | 0 | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | NaN | MR Benson | SL Shastri |
2 | 335984 | Delhi | 2008-04-19 | MF Maharoof | Feroz Shah Kotla | 0 | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | NaN | Aleem Dar | GA Pratapkumar |
3 | 335985 | Mumbai | 2008-04-20 | MV Boucher | Wankhede Stadium | 0 | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | NaN | SJ Davis | DJ Harper |
4 | 335986 | Kolkata | 2008-04-20 | DJ Hussey | Eden Gardens | 0 | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | NaN | BF Bowden | K Hariharan |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
806 | 1216542 | Dubai | 2020-10-08 | JM Bairstow | Dubai International Cricket Stadium | 0 | Sunrisers Hyderabad | Kings XI Punjab | Sunrisers Hyderabad | bat | Sunrisers Hyderabad | runs | 69.0 | N | NaN | AK Chaudhary | Nitin Menon |
807 | 1216543 | Dubai | 2020-10-14 | A Nortje | Dubai International Cricket Stadium | 0 | Delhi Capitals | Rajasthan Royals | Delhi Capitals | bat | Delhi Capitals | runs | 13.0 | N | NaN | AK Chaudhary | Nitin Menon |
808 | 1216544 | Dubai | 2020-10-25 | RD Gaikwad | Dubai International Cricket Stadium | 0 | Royal Challengers Bangalore | Chennai Super Kings | Royal Challengers Bangalore | bat | Chennai Super Kings | wickets | 8.0 | N | NaN | C Shamshuddin | RK Illingworth |
809 | 1216545 | Abu Dhabi | 2020-09-26 | Shubman Gill | Sheikh Zayed Stadium | 0 | Sunrisers Hyderabad | Kolkata Knight Riders | Sunrisers Hyderabad | bat | Kolkata Knight Riders | wickets | 7.0 | N | NaN | CB Gaffaney | VK Sharma |
810 | 1216546 | Dubai | 2020-10-20 | S Dhawan | Dubai International Cricket Stadium | 0 | Delhi Capitals | Kings XI Punjab | Delhi Capitals | bat | Kings XI Punjab | wickets | 5.0 | N | NaN | C Shamshuddin | RK Illingworth |
811 rows × 17 columns
data.tail()
id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | method | umpire1 | umpire2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
811 | 1216547 | Dubai | 2020-09-28 | AB de Villiers | Dubai International Cricket Stadium | 0 | Royal Challengers Bangalore | Mumbai Indians | Mumbai Indians | field | Royal Challengers Bangalore | tie | NaN | Y | NaN | Nitin Menon | PR Reiffel |
812 | 1237177 | Dubai | 2020-11-05 | JJ Bumrah | Dubai International Cricket Stadium | 0 | Mumbai Indians | Delhi Capitals | Delhi Capitals | field | Mumbai Indians | runs | 57.0 | N | NaN | CB Gaffaney | Nitin Menon |
813 | 1237178 | Abu Dhabi | 2020-11-06 | KS Williamson | Sheikh Zayed Stadium | 0 | Royal Challengers Bangalore | Sunrisers Hyderabad | Sunrisers Hyderabad | field | Sunrisers Hyderabad | wickets | 6.0 | N | NaN | PR Reiffel | S Ravi |
814 | 1237180 | Abu Dhabi | 2020-11-08 | MP Stoinis | Sheikh Zayed Stadium | 0 | Delhi Capitals | Sunrisers Hyderabad | Delhi Capitals | bat | Delhi Capitals | runs | 17.0 | N | NaN | PR Reiffel | S Ravi |
815 | 1237181 | Dubai | 2020-11-10 | TA Boult | Dubai International Cricket Stadium | 0 | Delhi Capitals | Mumbai Indians | Delhi Capitals | bat | Mumbai Indians | wickets | 5.0 | N | NaN | CB Gaffaney | Nitin Menon |
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 816 entries, 0 to 815 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 816 non-null int64 1 city 803 non-null object 2 date 816 non-null object 3 player_of_match 812 non-null object 4 venue 816 non-null object 5 neutral_venue 816 non-null int64 6 team1 816 non-null object 7 team2 816 non-null object 8 toss_winner 816 non-null object 9 toss_decision 816 non-null object 10 winner 812 non-null object 11 result 812 non-null object 12 result_margin 799 non-null float64 13 eliminator 812 non-null object 14 method 19 non-null object 15 umpire1 816 non-null object 16 umpire2 816 non-null object dtypes: float64(1), int64(2), object(14) memory usage: 108.5+ KB
# drop the method feature
data.drop(['method'], axis=1)
id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | umpire1 | umpire2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 335982 | Bangalore | 2008-04-18 | BB McCullum | M Chinnaswamy Stadium | 0 | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | Asad Rauf | RE Koertzen |
1 | 335983 | Chandigarh | 2008-04-19 | MEK Hussey | Punjab Cricket Association Stadium, Mohali | 0 | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | MR Benson | SL Shastri |
2 | 335984 | Delhi | 2008-04-19 | MF Maharoof | Feroz Shah Kotla | 0 | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | Aleem Dar | GA Pratapkumar |
3 | 335985 | Mumbai | 2008-04-20 | MV Boucher | Wankhede Stadium | 0 | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | SJ Davis | DJ Harper |
4 | 335986 | Kolkata | 2008-04-20 | DJ Hussey | Eden Gardens | 0 | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | BF Bowden | K Hariharan |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
811 | 1216547 | Dubai | 2020-09-28 | AB de Villiers | Dubai International Cricket Stadium | 0 | Royal Challengers Bangalore | Mumbai Indians | Mumbai Indians | field | Royal Challengers Bangalore | tie | NaN | Y | Nitin Menon | PR Reiffel |
812 | 1237177 | Dubai | 2020-11-05 | JJ Bumrah | Dubai International Cricket Stadium | 0 | Mumbai Indians | Delhi Capitals | Delhi Capitals | field | Mumbai Indians | runs | 57.0 | N | CB Gaffaney | Nitin Menon |
813 | 1237178 | Abu Dhabi | 2020-11-06 | KS Williamson | Sheikh Zayed Stadium | 0 | Royal Challengers Bangalore | Sunrisers Hyderabad | Sunrisers Hyderabad | field | Sunrisers Hyderabad | wickets | 6.0 | N | PR Reiffel | S Ravi |
814 | 1237180 | Abu Dhabi | 2020-11-08 | MP Stoinis | Sheikh Zayed Stadium | 0 | Delhi Capitals | Sunrisers Hyderabad | Delhi Capitals | bat | Delhi Capitals | runs | 17.0 | N | PR Reiffel | S Ravi |
815 | 1237181 | Dubai | 2020-11-10 | TA Boult | Dubai International Cricket Stadium | 0 | Delhi Capitals | Mumbai Indians | Delhi Capitals | bat | Mumbai Indians | wickets | 5.0 | N | CB Gaffaney | Nitin Menon |
816 rows × 16 columns
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 816 entries, 0 to 815 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 816 non-null int64 1 city 803 non-null object 2 date 816 non-null object 3 player_of_match 812 non-null object 4 venue 816 non-null object 5 neutral_venue 816 non-null int64 6 team1 816 non-null object 7 team2 816 non-null object 8 toss_winner 816 non-null object 9 toss_decision 816 non-null object 10 winner 812 non-null object 11 result 812 non-null object 12 result_margin 799 non-null float64 13 eliminator 812 non-null object 14 method 19 non-null object 15 umpire1 816 non-null object 16 umpire2 816 non-null object dtypes: float64(1), int64(2), object(14) memory usage: 108.5+ KB
# drop the method feature
data.drop(['method'], axis=1)
id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | umpire1 | umpire2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 335982 | Bangalore | 2008-04-18 | BB McCullum | M Chinnaswamy Stadium | 0 | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | Asad Rauf | RE Koertzen |
1 | 335983 | Chandigarh | 2008-04-19 | MEK Hussey | Punjab Cricket Association Stadium, Mohali | 0 | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | MR Benson | SL Shastri |
2 | 335984 | Delhi | 2008-04-19 | MF Maharoof | Feroz Shah Kotla | 0 | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | Aleem Dar | GA Pratapkumar |
3 | 335985 | Mumbai | 2008-04-20 | MV Boucher | Wankhede Stadium | 0 | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | SJ Davis | DJ Harper |
4 | 335986 | Kolkata | 2008-04-20 | DJ Hussey | Eden Gardens | 0 | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | BF Bowden | K Hariharan |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
811 | 1216547 | Dubai | 2020-09-28 | AB de Villiers | Dubai International Cricket Stadium | 0 | Royal Challengers Bangalore | Mumbai Indians | Mumbai Indians | field | Royal Challengers Bangalore | tie | NaN | Y | Nitin Menon | PR Reiffel |
812 | 1237177 | Dubai | 2020-11-05 | JJ Bumrah | Dubai International Cricket Stadium | 0 | Mumbai Indians | Delhi Capitals | Delhi Capitals | field | Mumbai Indians | runs | 57.0 | N | CB Gaffaney | Nitin Menon |
813 | 1237178 | Abu Dhabi | 2020-11-06 | KS Williamson | Sheikh Zayed Stadium | 0 | Royal Challengers Bangalore | Sunrisers Hyderabad | Sunrisers Hyderabad | field | Sunrisers Hyderabad | wickets | 6.0 | N | PR Reiffel | S Ravi |
814 | 1237180 | Abu Dhabi | 2020-11-08 | MP Stoinis | Sheikh Zayed Stadium | 0 | Delhi Capitals | Sunrisers Hyderabad | Delhi Capitals | bat | Delhi Capitals | runs | 17.0 | N | PR Reiffel | S Ravi |
815 | 1237181 | Dubai | 2020-11-10 | TA Boult | Dubai International Cricket Stadium | 0 | Delhi Capitals | Mumbai Indians | Delhi Capitals | bat | Mumbai Indians | wickets | 5.0 | N | CB Gaffaney | Nitin Menon |
816 rows × 16 columns
# drop the method feature and update dataframe
data.drop(['method'], axis=1, inplace=True)
data
id | city | date | player_of_match | venue | neutral_venue | team1 | team2 | toss_winner | toss_decision | winner | result | result_margin | eliminator | umpire1 | umpire2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 335982 | Bangalore | 2008-04-18 | BB McCullum | M Chinnaswamy Stadium | 0 | Royal Challengers Bangalore | Kolkata Knight Riders | Royal Challengers Bangalore | field | Kolkata Knight Riders | runs | 140.0 | N | Asad Rauf | RE Koertzen |
1 | 335983 | Chandigarh | 2008-04-19 | MEK Hussey | Punjab Cricket Association Stadium, Mohali | 0 | Kings XI Punjab | Chennai Super Kings | Chennai Super Kings | bat | Chennai Super Kings | runs | 33.0 | N | MR Benson | SL Shastri |
2 | 335984 | Delhi | 2008-04-19 | MF Maharoof | Feroz Shah Kotla | 0 | Delhi Daredevils | Rajasthan Royals | Rajasthan Royals | bat | Delhi Daredevils | wickets | 9.0 | N | Aleem Dar | GA Pratapkumar |
3 | 335985 | Mumbai | 2008-04-20 | MV Boucher | Wankhede Stadium | 0 | Mumbai Indians | Royal Challengers Bangalore | Mumbai Indians | bat | Royal Challengers Bangalore | wickets | 5.0 | N | SJ Davis | DJ Harper |
4 | 335986 | Kolkata | 2008-04-20 | DJ Hussey | Eden Gardens | 0 | Kolkata Knight Riders | Deccan Chargers | Deccan Chargers | bat | Kolkata Knight Riders | wickets | 5.0 | N | BF Bowden | K Hariharan |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
811 | 1216547 | Dubai | 2020-09-28 | AB de Villiers | Dubai International Cricket Stadium | 0 | Royal Challengers Bangalore | Mumbai Indians | Mumbai Indians | field | Royal Challengers Bangalore | tie | NaN | Y | Nitin Menon | PR Reiffel |
812 | 1237177 | Dubai | 2020-11-05 | JJ Bumrah | Dubai International Cricket Stadium | 0 | Mumbai Indians | Delhi Capitals | Delhi Capitals | field | Mumbai Indians | runs | 57.0 | N | CB Gaffaney | Nitin Menon |
813 | 1237178 | Abu Dhabi | 2020-11-06 | KS Williamson | Sheikh Zayed Stadium | 0 | Royal Challengers Bangalore | Sunrisers Hyderabad | Sunrisers Hyderabad | field | Sunrisers Hyderabad | wickets | 6.0 | N | PR Reiffel | S Ravi |
814 | 1237180 | Abu Dhabi | 2020-11-08 | MP Stoinis | Sheikh Zayed Stadium | 0 | Delhi Capitals | Sunrisers Hyderabad | Delhi Capitals | bat | Delhi Capitals | runs | 17.0 | N | PR Reiffel | S Ravi |
815 | 1237181 | Dubai | 2020-11-10 | TA Boult | Dubai International Cricket Stadium | 0 | Delhi Capitals | Mumbai Indians | Delhi Capitals | bat | Mumbai Indians | wickets | 5.0 | N | CB Gaffaney | Nitin Menon |
816 rows × 16 columns
temp = pd.DataFrame({"Winner": data['winner']})
count_wins = temp.value_counts()
print(count_wins)
Winner Mumbai Indians 120 Chennai Super Kings 106 Kolkata Knight Riders 99 Royal Challengers Bangalore 91 Kings XI Punjab 88 Rajasthan Royals 81 Delhi Daredevils 67 Sunrisers Hyderabad 66 Deccan Chargers 29 Delhi Capitals 19 Gujarat Lions 13 Pune Warriors 12 Rising Pune Supergiant 10 Kochi Tuskers Kerala 6 Rising Pune Supergiants 5 dtype: int64
labels = [X[0] for X in count_wins.keys()]
bar, ax = plt.subplots(figsize=(20,12))
ax = plt.pie(x = count_wins, autopct = "%.2f", labels=labels)
plt.title("Most wins in IPL", fontsize=21)
plt.show()
sns.countplot(data['winner'][data['eliminator']=='Y'], data = data)
plt.title("Most wins in Eliminator", fontsize = 21)
plt.xticks(rotation = 90)
plt.show()
teams = data['toss_winner'].unique()
teams
array(['Royal Challengers Bangalore', 'Chennai Super Kings', 'Rajasthan Royals', 'Mumbai Indians', 'Deccan Chargers', 'Kings XI Punjab', 'Kolkata Knight Riders', 'Delhi Daredevils', 'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad', 'Gujarat Lions', 'Rising Pune Supergiants', 'Rising Pune Supergiant', 'Delhi Capitals'], dtype=object)
teams = data['toss_winner'].unique()
decision_making = pd.DataFrame([], columns = ['Toss Winner', 'Decision', 'Times'])
for id, element in enumerate(teams):
temp_bat = data[(data['toss_winner']==element)& (data['toss_decision']=='bat')]
temp_field = data[(data['toss_winner']==element)& (data['toss_decision']=='field')]
# append to decison making
decision_making = decision_making.append({'Toss Winner': element,
'Decision':'bat', 'Times': temp_bat['toss_winner'].count()}, ignore_index = True)
decision_making = decision_making.append({'Toss Winner': element,
'Decision':'field', 'Times': temp_field['toss_winner'].count()}, ignore_index = True)
decision_making
Toss Winner | Decision | Times | |
---|---|---|---|
0 | Royal Challengers Bangalore | bat | 24 |
1 | Royal Challengers Bangalore | field | 63 |
2 | Chennai Super Kings | bat | 51 |
3 | Chennai Super Kings | field | 46 |
4 | Rajasthan Royals | bat | 34 |
5 | Rajasthan Royals | field | 53 |
6 | Mumbai Indians | bat | 48 |
7 | Mumbai Indians | field | 58 |
8 | Deccan Chargers | bat | 24 |
9 | Deccan Chargers | field | 19 |
10 | Kings XI Punjab | bat | 27 |
11 | Kings XI Punjab | field | 58 |
12 | Kolkata Knight Riders | bat | 34 |
13 | Kolkata Knight Riders | field | 64 |
14 | Delhi Daredevils | bat | 29 |
15 | Delhi Daredevils | field | 51 |
16 | Kochi Tuskers Kerala | bat | 3 |
17 | Kochi Tuskers Kerala | field | 5 |
18 | Pune Warriors | bat | 11 |
19 | Pune Warriors | field | 9 |
20 | Sunrisers Hyderabad | bat | 24 |
21 | Sunrisers Hyderabad | field | 33 |
22 | Gujarat Lions | bat | 1 |
23 | Gujarat Lions | field | 14 |
24 | Rising Pune Supergiants | bat | 3 |
25 | Rising Pune Supergiants | field | 4 |
26 | Rising Pune Supergiant | bat | 0 |
27 | Rising Pune Supergiant | field | 6 |
28 | Delhi Capitals | bat | 7 |
29 | Delhi Capitals | field | 13 |
sns.catplot(x="Toss Winner", y="Times", hue="Decision", data=decision_making, kind='bar', height=5, aspect=2)
plt.xticks(rotation=90)
plt.title("Toss Decision of Teams")
plt.xlabel("IPL Teams")
plt.ylabel("Toss Decision")
plt.show()
sns.barplot(x=data['venue'].value_counts().head(8).values,
y=data['venue'].value_counts().head(8).index,
data = data)
plt.title("Famous Venue")
plt.xlabel("Venue Count")
plt.ylabel("Venue")
Text(0, 0.5, 'Venue')
sns.barplot(x=data['venue'].value_counts().head(10).values,
y=data['venue'].value_counts().head(10).index,
data = data)
plt.title("Famous Venue")
plt.xlabel("Venue Count")
plt.ylabel("Venue")
plt.show()
sns.barplot(x=data['venue'].value_counts().values,
y=data['venue'].value_counts().index,
data = data)
plt.title("Famous Venue")
plt.xlabel("Venue Count")
plt.ylabel("Venue")
Text(0, 0.5, 'Venue')
sns.barplot(x=data['umpire1'].value_counts().head(5).values,
y=data['umpire1'].value_counts().head(5).index,
data = data)
plt.title("Top 5 Umpires 1")
plt.xlabel("Umpire 1")
plt.ylabel("Match count")
plt.show()
sns.barplot(x=data['umpire2'].value_counts().head(5).values,
y=data['umpire2'].value_counts().head(5).index,
data = data)
plt.title("Top 5 Umpires 2")
plt.xlabel("Umpire 2")
plt.ylabel("Match count")
plt.show()