pandas: Quizzes#

import pandas as pd

Loading data into Pandas DataFrame#

The dataset is from Kaggle - Pokemon.

df = pd.read_csv('data/Pokemon.csv')

1. Import the pandas library as pd.#

import pandas as pd

2. Read CSV into df#

df = pd.read_csv('data/Pokemon.csv')

3. Show first 5 rows#

df.head(5)

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False

4. Show last 3 rows#

df.tail(3)

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 150 130 70 6 True
798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 170 130 80 6 True
799 721 Volcanion Fire Water 600 80 110 120 130 90 70 6 True

5. Show all column names#

df.columns

Hide code cell output

Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

6. Select rows index 10 to 15#

df.iloc[10:16]

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
10 8 Wartortle Water NaN 405 59 63 80 65 80 58 1 False
11 9 Blastoise Water NaN 530 79 83 100 85 105 78 1 False
12 9 BlastoiseMega Blastoise Water NaN 630 79 103 120 135 115 78 1 False
13 10 Caterpie Bug NaN 195 45 30 35 20 20 45 1 False
14 11 Metapod Bug NaN 205 50 20 55 25 25 30 1 False
15 12 Butterfree Bug Flying 395 60 45 50 90 80 70 1 False

7. Select row index 50#

df.iloc[50]

Hide code cell output

#                    45
Name          Vileplume
Type 1            Grass
Type 2           Poison
Total               490
HP                   75
Attack               80
Defense              85
Sp. Atk             110
Sp. Def              90
Speed                50
Generation            1
Legendary         False
Name: 50, dtype: object

8. Filter rows Type 1 == Grass#

df.loc[df['Type 1'] == 'Grass'].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
48 43 Oddish Grass Poison 320 45 50 55 75 65 30 1 False

9. Filter rows HP > 100#

df.loc[df['HP'] > 100].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
44 39 Jigglypuff Normal Fairy 270 115 45 20 45 25 20 1 False
45 40 Wigglytuff Normal Fairy 435 140 70 45 85 50 45 1 False
96 89 Muk Poison NaN 500 105 105 75 65 100 50 1 False
120 112 Rhydon Ground Rock 485 105 130 120 45 45 40 1 False
121 113 Chansey Normal NaN 450 250 5 5 35 105 50 1 False

10. Filter rows Name contains chu#

df.loc[df['Name'].str.contains('chu')].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
30 25 Pikachu Electric NaN 320 35 55 40 50 50 90 1 False
31 26 Raichu Electric NaN 485 60 90 55 90 80 110 1 False
186 172 Pichu Electric NaN 205 20 40 15 35 35 60 2 False
257 238 Smoochum Ice Psychic 305 45 30 15 85 65 65 2 False

11. Sort by Attack descending#

df.sort_values('Attack', ascending=False).head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
163 150 MewtwoMega Mewtwo X Psychic Fighting 780 106 190 100 154 100 130 1 True
232 214 HeracrossMega Heracross Bug Fighting 600 80 185 115 40 105 75 2 False
429 386 DeoxysAttack Forme Psychic NaN 600 50 180 20 180 20 150 3 True
426 384 RayquazaMega Rayquaza Dragon Flying 780 105 180 100 180 100 115 3 True
424 383 GroudonPrimal Groudon Ground Fire 770 100 180 160 150 90 90 3 True

12. Reset index#

new_df = df.reset_index(drop=True)
new_df.head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False

13. Select columns Name, Type 1, HP#

df[['Name', 'Type 1', 'HP']].head()

Hide code cell output

Name Type 1 HP
0 Bulbasaur Grass 45
1 Ivysaur Grass 60
2 Venusaur Grass 80
3 VenusaurMega Venusaur Grass 80
4 Charmander Fire 39

14. Filter Type 1 == Grass and Type 2 == Poison#

df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
48 43 Oddish Grass Poison 320 45 50 55 75 65 30 1 False

15. Mean HP of Legendary#

df.loc[df['Legendary'] == True]['HP'].mean()

Hide code cell output

np.float64(92.73846153846154)

16. Rows with even indices#

even_indices_df = df.iloc[::2]
even_indices_df.head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False
6 6 Charizard Fire Flying 534 78 84 78 109 85 100 1 False
8 6 CharizardMega Charizard Y Fire Flying 634 78 104 78 159 115 100 1 False

17. Save even indices to filtered_pokemon.csv#

even_indices_df = df.iloc[::2]
even_indices_df.to_csv('data/filtered_pokemon.csv', index=True)
even_indices_df = pd.read_csv('data/filtered_pokemon.csv', index_col=0)
even_indices_df.head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False
6 6 Charizard Fire Flying 534 78 84 78 109 85 100 1 False
8 6 CharizardMega Charizard Y Fire Flying 634 78 104 78 159 115 100 1 False

18. First 10 rows Name, Type 1, Attack#

df[['Name', 'Type 1', 'Attack']].head(10)

Hide code cell output

Name Type 1 Attack
0 Bulbasaur Grass 49
1 Ivysaur Grass 62
2 Venusaur Grass 82
3 VenusaurMega Venusaur Grass 100
4 Charmander Fire 52
5 Charmeleon Fire 64
6 Charizard Fire 84
7 CharizardMega Charizard X Fire 130
8 CharizardMega Charizard Y Fire 104
9 Squirtle Water 48

19. Max Defense#

df['Defense'].max()

Hide code cell output

np.int64(230)

20. Rows Speed above mean#

mean_speed = df['Speed'].mean()
new_df = df.loc[df['Speed'] > mean_speed]
print('mean speed: ', mean_speed)
new_df.head()

Hide code cell output

mean speed:  68.2775
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
5 5 Charmeleon Fire NaN 405 58 64 58 80 65 80 1 False
6 6 Charizard Fire Flying 534 78 84 78 109 85 100 1 False
7 6 CharizardMega Charizard X Fire Dragon 634 78 130 111 130 85 100 1 False

21. First 3 rows Name and Attack#

df[['Name', 'Attack']].iloc[:3]

Hide code cell output

Name Attack
0 Bulbasaur 49
1 Ivysaur 62
2 Venusaur 82

22. Count per Type 1#

df.groupby(['Type 1'])['#'].count().sort_index()

Hide code cell output

Type 1
Bug          69
Dark         31
Dragon       32
Electric     44
Fairy        17
Fighting     27
Fire         52
Flying        4
Ghost        32
Grass        70
Ground       32
Ice          24
Normal       98
Poison       28
Psychic      57
Rock         44
Steel        27
Water       112
Name: #, dtype: int64

23. Rows Generation 3 or 5#

df.loc[(df['Generation'] == 3) | (df['Generation'] == 5)].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
272 252 Treecko Grass NaN 310 40 45 35 65 55 70 3 False
273 253 Grovyle Grass NaN 405 50 65 45 85 65 95 3 False
274 254 Sceptile Grass NaN 530 70 85 65 105 85 120 3 False
275 254 SceptileMega Sceptile Grass Dragon 630 70 110 75 145 85 145 3 False
276 255 Torchic Fire NaN 310 45 60 40 70 50 45 3 False

24. Replace Fire with Flame in Type 1#

df_copy = df.copy()
df_copy['Type 1'] = df_copy['Type 1'].str.replace('Fire', 'Flame')
df_copy.head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
4 4 Charmander Flame NaN 309 39 52 43 60 50 65 1 False

25. Rows Name startswith P#

df.loc[df['Name'].str.startswith('P')].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
20 16 Pidgey Normal Flying 251 40 45 40 35 35 56 1 False
21 17 Pidgeotto Normal Flying 349 63 60 55 50 50 71 1 False
22 18 Pidgeot Normal Flying 479 83 80 75 70 70 101 1 False
23 18 PidgeotMega Pidgeot Normal Flying 579 83 80 80 135 80 121 1 False
30 25 Pikachu Electric NaN 320 35 55 40 50 50 90 1 False

26. Mean Attack per Type 1#

df.groupby('Type 1')['Attack'].mean().sort_index()

Hide code cell output

Type 1
Bug          70.971014
Dark         88.387097
Dragon      112.125000
Electric     69.090909
Fairy        61.529412
Fighting     96.777778
Fire         84.769231
Flying       78.750000
Ghost        73.781250
Grass        73.214286
Ground       95.750000
Ice          72.750000
Normal       73.469388
Poison       74.678571
Psychic      71.456140
Rock         92.863636
Steel        92.703704
Water        74.151786
Name: Attack, dtype: float64

27. Sort by Type 1 asc, Attack desc#

df.sort_values(['Type 1', 'Attack'], ascending=[1, 0]).head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
232 214 HeracrossMega Heracross Bug Fighting 600 80 185 115 40 105 75 2 False
137 127 PinsirMega Pinsir Bug Flying 600 65 155 120 65 90 105 1 False
19 15 BeedrillMega Beedrill Bug Poison 495 65 150 40 15 80 145 1 False
229 212 ScizorMega Scizor Bug Steel 600 70 150 140 65 100 75 2 False
650 589 Escavalier Bug Steel 495 70 135 105 60 105 20 5 False

28. Mean Defense per Type 1#

df.groupby('Type 1')['Defense'].mean().sort_index()

Hide code cell output

Type 1
Bug          70.724638
Dark         70.225806
Dragon       86.375000
Electric     66.295455
Fairy        65.705882
Fighting     65.925926
Fire         67.769231
Flying       66.250000
Ghost        81.187500
Grass        70.800000
Ground       84.843750
Ice          71.416667
Normal       59.846939
Poison       68.821429
Psychic      67.684211
Rock        100.795455
Steel       126.370370
Water        72.946429
Name: Defense, dtype: float64

29. Legendary with Attack > 100#

df.loc[(df['Legendary'] == True) & (df['Attack'] > 100)].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
162 150 Mewtwo Psychic NaN 680 106 110 90 154 90 130 1 True
163 150 MewtwoMega Mewtwo X Psychic Fighting 780 106 190 100 154 100 130 1 True
164 150 MewtwoMega Mewtwo Y Psychic NaN 780 106 150 70 194 120 140 1 True
263 244 Entei Fire NaN 580 115 115 85 90 75 100 2 True
270 250 Ho-oh Fire Flying 680 106 130 90 110 154 90 2 True

30. New DataFrame Name and Total renamed Overall#

new_df = df[['Name', 'Total']].rename(columns={'Total': 'Overall'})
new_df.head()

Hide code cell output

Name Overall
0 Bulbasaur 318
1 Ivysaur 405
2 Venusaur 525
3 VenusaurMega Venusaur 625
4 Charmander 309

31. Filter Type 1 == Water and Attack >= 80#

df.loc[(df['Type 1'] == 'Water') & (df['Attack'] >= 80)].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
11 9 Blastoise Water NaN 530 79 83 100 85 105 78 1 False
12 9 BlastoiseMega Blastoise Water NaN 630 79 103 120 135 115 78 1 False
60 55 Golduck Water NaN 500 80 82 78 95 80 85 1 False
67 62 Poliwrath Water Fighting 510 90 95 95 70 90 70 1 False
98 91 Cloyster Water Ice 525 50 95 180 85 45 70 1 False

32. Generation 4 Name and Defense#

df.loc[df['Generation'] == 4][['Name', 'Defense']].head()

Hide code cell output

Name Defense
432 Turtwig 64
433 Grotle 85
434 Torterra 105
435 Chimchar 44
436 Monferno 52

33. Mean Total per (Type 1, Type 2)#

df.groupby(['Type 1', 'Type 2'])['Total'].mean().head(10)

Hide code cell output

Type 1  Type 2  
Bug     Electric    395.500000
        Fighting    550.000000
        Fire        455.000000
        Flying      419.500000
        Ghost       236.000000
        Grass       384.000000
        Ground      345.000000
        Poison      347.916667
        Rock        435.000000
        Steel       509.714286
Name: Total, dtype: float64

34. Filter Total between 400 and 500#

df.loc[(df['Total'] >= 400) & (df['Total'] <= 500)].head()

Hide code cell output

# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
5 5 Charmeleon Fire NaN 405 58 64 58 80 65 80 1 False
10 8 Wartortle Water NaN 405 59 63 80 65 80 58 1 False
19 15 BeedrillMega Beedrill Bug Poison 495 65 150 40 15 80 145 1 False
22 18 Pidgeot Normal Flying 479 83 80 75 70 70 101 1 False