Pandas多条件筛选

1. ‘&’和‘|’

import  pandas as pd

score={'student':['LiLei','HanMeiMei','David','Mary','oldwang'],'chinese':[90,65,72,38,42],'math':[68,32,75,60,92],
      'english':[63,75,82,74,91],'physics':[93,32,71,24,85]}

data=pd.DataFrame(score)

print(data)

out:
     student  chinese  math  english  physics
0      LiLei       90    68       63       93
1  HanMeiMei       65    32       75       32
2      David       72    75       82       71
3       Mary       38    60       74       24
4    oldwang       42    92       91       85

#筛选数学大于60分且物理大于80分

data1=data.loc[(data.math>60)&(data.physics>80)]

print(data1)

out:
 student  chinese  math  english  physics
0    LiLei       90    68       63       93
4  oldwang       42    92       91       85


#筛选语文大于80分或者物理低于60分

data2=data.loc[(data.chinese>80)|(data.physics<60)]

print(data2)

out:
    student  chinese  math  english  physics
0      LiLei       90    68       63       93
1  HanMeiMei       65    32       75       32
3       Mary       38    60       74       24


#筛选数学高于平均分,并且按照分数从高到低排列
data3=data[data['math']>data['math'].mean()].sort_values(by='math',ascending=False)

print(data3)

out:
student  chinese  math  english  physics
4  oldwang       42    92       91       85
2    David       72    75       82       71
0    LiLei       90    68       63       93

2.isin

import  pandas as pd

score={'student':['LiLei','HanMeiMei','David','Mary','oldwang'],'chinese':[90,65,72,38,42],'math':[68,32,75,60,92],
      'english':[63,75,82,74,91],'physics':[93,32,71,24,85],'label':['A','C','B','C','A']}

data=pd.DataFrame(score)

print(data)

out:

     student  chinese  math  english  physics label
0      LiLei       90    68       63       93     A
1  HanMeiMei       65    32       75       32     C
2      David       72    75       82       71     B
3       Mary       38    60       74       24     C
4    oldwang       42    92       91       85     A


#筛选label为B或者C
data4=data.loc[data['label'].isin(['B','C']),:]

print(data4)

out:

  student  chinese  math  english  physics label
1  HanMeiMei       65    32       75       32     C
2      David       72    75       82       71     B
3       Mary       38    60       74       24     C


#当然也可以加取反~,查找不在B和C的

data5=data.loc[~data['label'].isin(['B','C']),:]

print(data5)

out:

  student  chinese  math  english  physics label
0    LiLei       90    68       63       93     A
4  oldwang       42    92       91       85     A

3.str.contains

pandas可以用str.contains来模糊筛选,有点像SQL中的like

#查找姓名中包含wang的同学

data6=data.loc[data['student'].str.contains('wang'),:]

print(data6)

out:

 student  chinese  math  english  physics label
4  oldwang       42    92       91       85     A

4.Query

#查找语文大于70

data7=data.query('chinese>70')

print(data7)

out:
  student  chinese  math  english  physics label
0   LiLei       90    68       63       93     A
2   David       72    75       82       71     B


# data.query('chinese>70')==data[data.chinese>70]

5.Filter

filter不筛选具体的数据,而是筛选特定的行和列。它支持三种筛选模式:

        items:固定列名

        regex:正则表达式

        like:模糊查询

#筛选语文和英语列
data8=data.filter(['chinese','english'])

print(data8)

out:

  chinese  english
0       90       63
1       65       75
2       72       82
3       38       74
4       42       91


#筛选列名中包含a的
data9=data.filter(regex='a',axis=1)

print(data9)

out:

 math label
0    68     A
1    32     C
2    75     B
3    60     C
4    92     A

#筛选索引中包含2的

data10=data.filter(like='2',axis=0)

print(data10)

out:

  student  chinese  math  english  physics label
2   David       72    75       82       71     B