pandas中DataFrame的交集并集补集
补集就是差集。
df1 = df1.append(df2) # 留下df2,共有 df1.drop_duplicates(subset=['name', 'age', 'sex'],keep=False)) # 留下df1,共有之外,删除共有 df1.drop_duplicates(subset=['name', 'age', 'sex'],keep=True))
import pandas as pd
df1 = DataFrame([['a', 10, '男'],
['b', 11, '男'],
['c', 11, '女'],
['a', 10, '女'],
['c', 11, '男']],
columns=['name', 'age', 'sex'])
print("df1:\n%s\n\n" % df1)
df2 = DataFrame([['a', 10, '男'],
['b', 11, '女']],
columns=['name', 'age', 'sex'])
print("df2:\n%s\n\n" % df2)
# 取交集
print("交集:\n%s\n\n" % pd.merge(df1,df2,on=['name', 'age', 'sex']))
# 取并集
print("并集:\n%s\n\n" % pd.merge(df1,df2,on=['name', 'age', 'sex'], how='outer'))
# 从df1中过滤df1在df2中存在的行,也就是取补集
df1 = df1.append(df2)
print("补集(从df1中过滤df1在df2中存在的行):\n%s\n\n" %
df1.drop_duplicates(subset=['name', 'age', 'sex'],keep=False))