Skip to content

Data selecting

Select columns by column names

Select columns by pattern
df.filter(regex='name|address|date*')

Filter by value

students.loc[students['student_id'] == 101]
df.query('col1 <= 1 & 1 <= col1')

Filter by conditions

query
result = df.query('column > value and other_column < other_value')
df = df.query('date.isnull()')
lambda
df = df.loc[lambda x: (x.cola != "YES") & (x.colb != "YES")]
Filter by multiple conditions
 df = df.loc[(df['area'] >= 3000000)|(df['population'] >= 25000000)]
Filter by value in another list
df = df[df['A'].isin([3, 6])]
filter by value not in another list
# Tilde operator
df = df[~df.group.isin(["A","B","D"])]

# False condition
df = df[df.group.isin(["A","B","D"])==False]

Filter the max value

df=df.loc[df['count'] == df['count'].max()]

loc

return the row as a Series
df.loc['viper']
select rows or columns as a DataFrame
df.loc[['row1', 'row2']]
df.loc[['col1', 'col2']]
select by row and column
df.loc['row', 'col']
df.loc[:, 'C':'E']

iloc

df.iloc[N-1]['salary'] # ???