# read csv fileimport pandas as pddf = pd.read_csv('file.csv')
# access a column of datadf['city'].head() # head is used to display the first 5 rowsdf.tail() # tail is used to display the last 5 rows
0 Kolkata
1 Delhi
2 Mumbai
3 Chennai
4 Kolkata
Name: city, dtype: object
# access multiple column of datadf[['name', 'city']].head()
name
city
0
John
Kolkata
1
Michael
Delhi
2
David
Mumbai
3
Sarah
Chennai
4
Daniel
Kolkata
iloc vs loc
iloc is used to access rows and columns by integer index -eg. 0
loc is used to access rows and columns by label -eg. city
# access a row of datadf.iloc[0] # access the first row
name John
city Kolkata
happiness(0-10) 7
height(cm) 182
weight(kg) 72.2
Name: 0, dtype: object
# Access multiple rowsdf.iloc[0:2] # access the first 5 rows# row range, column range
name
city
happiness(0-10)
height(cm)
weight(kg)
0
John
Kolkata
7
182
72.2
1
Michael
Delhi
6
168
65.8
# Access a specific celldf.loc[0, 'city'] # the city of the first row
'Kolkata'
# Access multiple cellsdf.loc[0:2, ['name', 'city']] # the name and city of the first 3 rowsdf.loc[:,['name','city']] # the name and city of all rows
name
city
0
John
Kolkata
1
Michael
Delhi
2
David
Mumbai
# Select rows based on a conditiondf[df['height(cm)'] > 155]df[df['city']=='Kolkata'].head() # select rows where city is Kolkatadf[df['city'].isin(['Kolkata', 'Delhi'])] # select using isin and select all rows where the content of the city column matches a list
( name city happiness(0-10) height(cm) weight(kg)
0 John Kolkata 7 182 72.2
1 Michael Delhi 6 168 65.8
2 David Mumbai 8 163 59.9
4 Daniel Kolkata 7 179 71.1
5 Emily Delhi 6 172 66.3,
name city happiness(0-10) height(cm) weight(kg)
0 John Kolkata 7 182 72.2
4 Daniel Kolkata 7 179 71.1
8 Sophia Kolkata 7 183 73.5
12 Zoe Kolkata 7 177 70.8
16 Emma Kolkata 7 181 72.7)
# select row based on multiple conditionsnew_df_3 = df[(df['height(cm)'] >= 155) & (df['height(cm)'] < 175)].head() # selecting a value between 2 numbers is considered as a multiple conditionnew_df_4 = df[(df['city']=='Kolkata') | (df['city']=='Mumbai')].head() # using the or operatornew_df_3, new_df_4
( name city happiness(0-10) height(cm) weight(kg)
1 Michael Delhi 6 168 65.8
2 David Mumbai 8 163 59.9
3 Sarah Chennai 9 155 52.7
5 Emily Delhi 6 172 66.3
6 Olivia Mumbai 8 158 61.7,
name city happiness(0-10) height(cm) weight(kg)
0 John Kolkata 7 182 72.2
2 David Mumbai 8 163 59.9
4 Daniel Kolkata 7 179 71.1
6 Olivia Mumbai 8 158 61.7
8 Sophia Kolkata 7 183 73.5)
# get the unique values of a columndf['city'].unique()
# 2d dataframe array can be converted into a python dictionarydict(df.head())
0 John
1 Michael
2 David
3 Sarah
4 Daniel
Name: name, dtype: object
# 1d dataframe array (eg. a column, row) can be converted into a python list or a dictionarylist(df['name'].head()) # column listlist(df.iloc[0]) # row listdict(df.iloc[0])
# Deal with Missing Datadf.dropna(how='any') # drop rows with missing datadf.fillna(value=0) # fill missing data with a specified value