一、標準化
代碼實現(z_score方法標準化)
def standardlize(data, inf2nan=True, axis=1):
'''
參數
-----------
data: pd.Series/pd.DataFrame/np.array, 待標準化的序列
inf2nan: 是否將 np.inf 和 -np.inf 替換成 np.nan。默認爲 True
axis=1: 在 data 爲 pd.DataFrame 時使用,如果 series 爲 pd.DataFrame,沿哪個方向做標準化。0 爲對每列做標準化,1 爲對每行做標準化
返回
-----------
標準化後的因子數據
'''
if isinstance(data,pd.DataFrame):
value = data.copy()
if axis==1:
long = value.shape[0]
for i in range(long):
s = value.iloc[i,:]
if inf2nan==True:
s[np.isinf(s)]=np.nan
mean = np.mean(s.dropna())
std = np.std(s.dropna(),ddof=1)
value.iloc[i,:] = (s-mean)/std
else:
s1 = s[~np.isinf(s)]
mean = np.mean(s1)
std = np.std(s1,ddof=1)
value.iloc[i,:] = (s-mean)/std
return value
elif axis==0:
width = value.shape[1]
for j in range(width):
s = value.iloc[:,j]
if inf2nan==True:
s[np.isinf(s)]=np.nan
mean = np.mean(s.dropna())
std = np.std(s.dropna(),ddof=1)
value.iloc[:,j] = (s-mean)/std
else:
s1 = s[~np.isinf(s)]
mean = np.mean(s1)
std = np.std(s1,ddof=1)
value.iloc[:,j] = (s-mean)/std
return value
else:
return('axis值有誤')
elif isinstance(data,pd.Series):
value = data.copy()
if inf2nan==True:
value[np.isinf(value)]=np.nan
mean = np.mean(value.dropna())
std = np.std(value.dropna(),ddof=1)
value = (value-mean)/std
return value
else:
s = value[~np.isinf(value)]
mean = np.mean(s)
std = np.std(s,ddof=1)
value = (value-mean)/std
return value
else:
print('data不是pd.Series和pd.DataFrame類型')
return