pandas dataframe计数连续事件。
# eg1
df['trade_ID']=df.trade_type.replace("-",np.nan).notna().cumsum()
# eg2
df['id'] = df['type'].eq('a').cumsum()
# eg3
data = { "date": pd.date_range("2019-04-04", periods=7), "type": [0, 1, 0, 0, 1, 1, 0]}
df = pd.DataFrame(data=data)
df['id'] = df['type'].eq(0).cumsum()
df['block'] = (df['type'] != df['type'].shift(1)).astype(int).cumsum()
df['id'] = df.groupby('block').transform(lambda x: range(1, len(x) + 1))
# eg4
data = { "date": pd.date_range("2019-04-04", periods=7), "type": [0, 1, 0, 0, 1, 1, 0]}
df = pd.DataFrame(data=data)
df['count1'] = df.groupby((df['type'] != df['type'].shift(1)).cumsum()).cumcount()+1
# eg5 创建一个使用静态变量的函数
def rolling_count(val):
if val == rolling_count.previous:
rolling_count.count +=1
else:
rolling_count.previous = val
rolling_count.count = 1
return rolling_count.count
rolling_count.count = 0 #static variable
rolling_count.previous = None #static variable
data = { "date": pd.date_range("2019-04-04", periods=7), "type": [0, 1, 0, 0, 1, 1, 0]}
df = pd.DataFrame(data=data)
df['count'] = df['col'].apply(rolling_count) #new column in dataframe
# eg6 过滤两列
def count_consecutive_items_n_cols(df, col_name_list, output_col):
cum_sum_list = [
(df[col_name] != df[col_name].shift(1)).cumsum().tolist() for col_name in col_name_list
]
df[output_col] = df.groupby(
["_".join(map(str, x)) for x in zip(*cum_sum_list)]
).cumcount() + 1
return df