im_selectionimporttrain_test__matplotlib_("ignore")%matplotlibinline%_format='retina'register_matplotlib_converters()(style='whitegrid',palette='muted',font_scale=1.5)rcParams['']=20,10RANDOM_SEED=42(RANDOM_SEED)dataset=_excel('',parse_dates=[['Date','Time']],index_col='Date_Time')Plottingfewdataforonemonthtimespantoobservehowitlooksdf_plot=(df_data,columns=['CO(GT)','C6H6(GT)','NOx(GT)','(NOx)','(O3)','T','RH','AH'])df_[0:510].plot(y=['CO(GT)','C6H6(GT)','NOx(GT)','(NOx)','(O3)','T','RH','AH'])();selectingonlyRelativeHumidityandDatecolumnforprocessingdata=df_data[["RH"]]=['Relative_Humidity']df_newdata=(data)df_()
class''DatetimeIndex:9357entries,2004-03-1018:00:00to2005-04-0414:00:00Datacolumns(total1columns):(df_,df_newdata['Relative_Humidity'])('Date_Time')('Relative_Humidity')ColumnNon-NullCountDtype----------------------------0Relative_Humidity9357non-nullfloat64dtypes:float64(1)memoryusage:146.2KB
AUGMENTEDDICKYFULLERTEST(ADF)tocheckstationarityofthetimeseriesdatadefstationarity_check(data):stationarity=adfuller(data)print('ADFStatistic:{}'.format(stationarity[0]))print('p-value:{}'.format(stationarity[1]))print('CriticalValues:')forkey,valueinstationarity[4].items():print('\t{}:{}'.format(key,value))Userollingmeantomaketheinputdatastationaryrolling_mean=df_(window=12).mean()df_newdata_mean=df_newdata-rolling_meandf_newdata_(method='bfill',inplace=True)stationarity_check(df_newdata_mean)ADFStatistic:-22.735p-value:0.0CriticalValues:1%:-3.43105%:-2.8670610%:-2.5669350967161857
(df_newdata_,df_newdata_shift['Relative_Humidity'])('Date_Time')('Relative_Humidity')size=int(len(df_newdata_shift)*0.85)X_train_arima=df_newdata_shift[:size]y_test_arima=df_newdata_shift[size:]print('lengthoftraindata:',len(X_train_arima))print('lengthoftestdata:',len(y_test_arima))lengthoftraindata:7953lengthoftestdata:1404pred_forecastmae=((pred_forecast-y_test_arima['Relative_Humidity'].values))print("Meanabsoluteerror,ARIMA:",mae)Meanabsoluteerror,ARIMA:3.343347672557643Usesquarederrorstofindtheanomaliesdefdetect_outliers(squared_errors):threshold=(squared_errors)+(squared_errors)*1.3predictions=(squared_errors=threshold).astype(int)returnpredictions,thresholdsquared_errors=model_arima_**2predictions,threshold=detect_outliers(squared_errors)=(predictions)(columns={0:'Relative_Humidity'},inplace=True)anomalies=anomalies[~(anomalies==0).any(axis=1)]anomaliesRelative_HumidityDate_Time2004-03-1210:00:0012004-03-1309:00:0012004-03-1610:00:0012004-03-1821:00:0012004-03-2209:00:0012005-03-2912:00:0012005-03-3108:00:0012005-04-0308:00:0012005-04-0318:00:0012005-04-0408:00:001345rows×1columnsanomalies_values=(df_newdata_shift[["Relative_Humidity"]],on="Date_Time",how="left")anomalies_(3)Relative_Humidity_xRelative_Humidity_yDate_Time2004-03-1210:00:001-14.4500002004-03-1309:00:001-11.8833332004-03-1610:00:001-13.424999(df_newdata_,df_newdata_shift['Relative_Humidity'],label='Relative_Humidity');(anomalies_,anomalies_values['Relative_Humidity_y'],color=_palette()[3],s=52,label='anomaly')(rotation=45)(['Relative_Humidity','AnomalyDetected'])('Relative_Humidity(preprocessed)')('Date_Time');PredictiveModelTimeSeriesAnomalyDetection-LSTM
X,y=train_test_split(df_newdata,test_size=0.33,shuffle=False,random_state=RANDOM_SEED)X_t=()y_t=()print(,)print(X_,y_)
(6269,1)(3088,1)(6269,1)(3088,1)
scaler=StandardScaler()scaler=(df_newdata[['Relative_Humidity']])X['Relative_Humidity']=(X[['Relative_Humidity']])y['Relative_Humidity']=(y[['Relative_Humidity']])
(,y['Relative_Humidity'])('Date_Time')('Relative_Humidity')defcreate_dataset(X,y,steps=1):Xs,ys=[],[]foriinrange(len(X)-steps):v=[i:(i+steps)].(v)([i+steps])(Xs),(ys)
TIME_STEPS=10creatingaLSTMbasedmodelmodel=()((units=64,input_shape=(X_[1],X_[2])))((0.2))((0.2))((n=X_[1]))((units=64,return_sequences=True))((0.2))(((units=X_[2])))(loss='mae',optimizer='adam')
history=(X_train,y_train,epochs=10,batch_size=32,validation_split=0.3,shuffle=False)
Epoch1/10137/137[==============================]-6s19ms/step-loss:0.6843-val_loss:0.6996Epoch2/10137/137[==============================]-2s13ms/step-loss:0.6801-val_loss:0.7038Epoch3/10137/137[==============================]-2s13ms/step-loss:0.6770-val_loss:0.6929Epoch4/10137/137[==============================]-2s13ms/step-loss:0.6744-val_loss:0.6857Epoch5/10137/137[==============================]-2s13ms/step-loss:0.6720-val_loss:0.6873Epoch6/10137/137[==============================]-2s13ms/step-loss:0.6701-val_loss:0.6744Epoch7/10137/137[==============================]-2s13ms/step-loss:0.6683-val_loss:0.6762Epoch8/10137/137[==============================]-2s13ms/step-loss:0.6671-val_loss:0.6655Epoch9/10137/137[==============================]-2s14ms/step-loss:0.6659-val_loss:0.6642Epoch10/10137/137[==============================]-2s14ms/step-loss:0.6638-val_loss:0.6585
(['loss'],label='TrainingDataLoss')(['val_loss'],label='TestDataLoss')("Epochs")("Loss")()X_train_pred=(X_train)train_mae_loss=((X_train_pred-X_train),axis=1)X_test_pred=(X_test)test_mae_loss=((X_test_pred-X_test),axis=1)print("Meanabsoluteerror:LSTM",(test_mae_loss))196/196[==============================]-1s4ms/step97/97[==============================]-0s4ms/stepMeanabsoluteerror:
threshold=(train_mae_loss)-0.3print(f'Threshold:{threshold}')test_score_df=(y[TIME_STEPS:].index)test_score_df['loss']=test_mae_losstest_score_df['threshold']=thresholdtest_score_df['anomaly']=test_score__score__score_df['Relative_Humidity']=y[TIME_STEPS:].Relative_HumidityThreshold:1.0782168198295923
(test_score_,test_score_,label='loss')(test_score_,test_score_,label='threshold')(rotation=30)()
anomalie=test_score_df[test_score_==True]
anomalie_values=(y[["Relative_Humidity"]],on="Date_Time",how="left")anomalie_(columns={'Relative_Humidity_y':'Relative_Humidity'},inplace=True)anomalie_()
Date_Time
loss
threshold
anomaly
Relative_Humidity_x
Relative_Humidity
0
2004-11-2710:00:00
1.099661
1.078217
True
NaN
1.671711
1
2004-11-2711:00:00
1.123045
1.078217
True
NaN
1.403755
2
2004-11-2712:00:00
1.125650
1.078217
True
NaN
1.246726
3
2004-11-2713:00:00
1.111426
1.078217
True
NaN
1.410958
4
2004-11-2906:00:00
1.086354
1.078217
True
NaN
2.039071
(,_Humidity,label='Relative_Humidity')(anomalie__Time,anomalie__Humidity,color=_palette()[3],s=52,label='anomaly')(rotation=25)(['Relative_Humidity','AnomalyDetected'])('Relative_Humidity(standardised)')('Date_Time')ClusteringModelTimeSeriesAnomalyDetection-DBSCAN
Assignnon-clusteredpointsasanomaliesanomalies_pos=(labels==-1)[0]
print('Lengthofthelabels:',len(labels))print(labels)print('Numberofanomalies:',len(anomalies_pos))print('Positions:',anomalies_pos)Lengthofthelabels:9357[000-1-1-1]Numberofanomalies:529Positions:[201661671688350679689589792492696596797210291030105110851206664708136019411943197219731978209820992222227222822292230223122322469247025442567256825862587258825892611261226634273427542755275627572758275927802781278227832784280328042805280628282829283028312832284828502851285228532854285528742875287628772878287928982924292529502994299529962999300224302530443045304630673068306930703079030994309531112033316431663167333192163234323532363237323832393353335433553356335733583359336033773378337933853345435560359737893790381038195539573958395939803419842224246424742664267429342944295433843404386438743884389439043910445164555046555523952405242524352445262526552835363543854835507550855255529553255865587558855995626566456785695572445860586685872587358745894589558966100610161026103611166119661976198619962016202620562076222262236224622562266227622862296249625362556271627662796289629462956332363246325634463456350636463666368636963706384638863896411644421642264236424642564406441644464476448644964516452645364546455645664596463647664926493677167876789688820682168226963696469656968697269746975697669876988699377038703972178738027405742774477448744974507452745475377538754662784378447846789388834083448360836648365836683678379838483858386838783928393839784068410841443084334843584378439856585668567859385948595859685978644870687078709871087157875887598760876039106910799933093319332933393349335935493559356]
df__index(inplace=True)df_(2)
Date_Time
Relative_Humidity
0
2004-03-1018:00:00
48.875001
1
2004-03-1019:00:00
47.700000
df_anomalies=(df_[anomalies_pos])df_(3)
Date_Time
Relative_Humidity
20
2004-03-1114:00:00
81.15
166
2004-03-1716:00:00
15.75
167
2004-03-1717:00:00
14.90
#(df__Time,df__Humidity,label='Relative_Humidity');(df__Time,df__Humidity,color=_palette()[3],s=52,label='anomaly')(rotation=35)(['Relative_Humidity','AnomalyDetected'])('Relative_Humidity')('Date_Time');知乎学术咨询:
担任《MechanicalSystemandSignalProcessing》等审稿专家,擅长领域:信号滤波/降噪,机器学习/深度学习,时间序列预分析/预测,设备故障诊断/缺陷检测/异常检测。
分割线分割线分割线分割线分割线分割线分割线分割线分割线
基于条件谱矩的时间序列分析(以轴承故障诊断为例,MATLAB)
完整代码:
一种新的一维时间序列信号盲解卷积算法(以旋转机械故障诊断为例)
研究结果表明:小型简单旋转机械的声、振信号混合系统更接近瞬时模型,而现实工况环境中的大型机械设备、复杂机械结构的机械声、振信号耦合系统由于各故障源到传感器之间传递路复杂多变,特别是对于小刚度的结构,时延效应十分明显,往往更符合卷积模型,传统的时域瞬时盲分离方法并不能直接被应用于其中。
正是由于机械结构和信号的复杂性,复杂的机械系统信号一般可分为三种类型,即周期信号、非平稳随机冲击信号和平稳随机高斯噪声,这对机械信号是一种行之有效的划分方法。机械系统中的复杂周期信号如齿轮不同轴振动波形、转子不平衡的振动信号及轴系不对中的振动信号等,均可看作不同频率正弦信号的叠加。针对周期信号的盲解卷积比较困难,这是因为处理过程中可能出现次序不确定的问题,进而导致分离信号失真。但是研究发现对于以大多数机械故障信号的表现形式—非平稳信号分离为目标的盲信号处理问题,机械信号混合过程仍然可以表示为线性卷积混合模型。鉴于此,采用一种新的一维时间序列信号盲解卷积算法,以旋转机械故障诊断为例进行了验证,结果如下。
完整代码可通过知乎学术咨询获得: