pandas dataframe计数连续事件。
# eg1 df['trade_ID']=df.trade_type.replace("-",np.nan).notna().cumsum() # eg2 df['id'] = df['type'].eq('a').cumsum() # eg3 data = { "date": pd.date_range("2019-04-04", periods=7), "type": [0, 1, 0, 0, 1, 1, 0]} df = pd.DataFrame(data=data) df['id'] = df['type'].eq(0).cumsum() df['block'] = (df['type'] != df['type'].shift(1)).astype(int).cumsum() df['id'] = df.groupby('block').transform(lambda x: range(1, len(x) + 1)) # eg4 data = { "date": pd.date_range("2019-04-04", periods=7), "type": [0, 1, 0, 0, 1, 1, 0]} df = pd.DataFrame(data=data) df['count1'] = df.groupby((df['type'] != df['type'].shift(1)).cumsum()).cumcount()+1 # eg5 创建一个使用静态变量的函数 def rolling_count(val): if val == rolling_count.previous: rolling_count.count +=1 else: rolling_count.previous = val rolling_count.count = 1 return rolling_count.count rolling_count.count = 0 #static variable rolling_count.previous = None #static variable data = { "date": pd.date_range("2019-04-04", periods=7), "type": [0, 1, 0, 0, 1, 1, 0]} df = pd.DataFrame(data=data) df['count'] = df['col'].apply(rolling_count) #new column in dataframe # eg6 过滤两列 def count_consecutive_items_n_cols(df, col_name_list, output_col): cum_sum_list = [ (df[col_name] != df[col_name].shift(1)).cumsum().tolist() for col_name in col_name_list ] df[output_col] = df.groupby( ["_".join(map(str, x)) for x in zip(*cum_sum_list)] ).cumcount() + 1 return df