pandas中DataFrame的交集并集补集
补集就是差集。
df1 = df1.append(df2) # 留下df2,共有 df1.drop_duplicates(subset=['name', 'age', 'sex'],keep=False)) # 留下df1,共有之外,删除共有 df1.drop_duplicates(subset=['name', 'age', 'sex'],keep=True))
import pandas as pd df1 = DataFrame([['a', 10, '男'], ['b', 11, '男'], ['c', 11, '女'], ['a', 10, '女'], ['c', 11, '男']], columns=['name', 'age', 'sex']) print("df1:\n%s\n\n" % df1) df2 = DataFrame([['a', 10, '男'], ['b', 11, '女']], columns=['name', 'age', 'sex']) print("df2:\n%s\n\n" % df2) # 取交集 print("交集:\n%s\n\n" % pd.merge(df1,df2,on=['name', 'age', 'sex'])) # 取并集 print("并集:\n%s\n\n" % pd.merge(df1,df2,on=['name', 'age', 'sex'], how='outer')) # 从df1中过滤df1在df2中存在的行,也就是取补集 df1 = df1.append(df2) print("补集(从df1中过滤df1在df2中存在的行):\n%s\n\n" % df1.drop_duplicates(subset=['name', 'age', 'sex'],keep=False))