代码-航空公司代码.docxVIP

  • 17
  • 0
  • 约1.57千字
  • 约 2页
  • 2022-12-24 发布于湖北
  • 举报
import numpy as np import pandas as pd import os import numpy as np 数据的读取 import pandas as pd credit_card = pd.read_csv(air_data.csv,encoding=gb18030) credit_card.shape exp1 = credit_card[SUM_YR_1].notnull () exp2 = credit_card[SUM_YR_2].notnull () exp = exp1 exp2 index1 = airline_notnull[SUM_YR_1] != 0 index2 = airline_notnull[ SUM_YR_2] != 0 index3 = (airline_notnull[ SEG_KM_SUM] 0) \ (airline_notnull[avg_discount] != 0) airline = airline_notnull[(index1|index2) index3] #选取需求特征 airline_selection = airline[[FFP_DATE, LOAD_TIME, FLIGHT_COUNT, LAST_TO_END,2022/11/20 01:23 avg_discount, SEG_KM_SUM] ] #构建工特征 L= pd.to_datetime (airline_selection[LOAD_TIME]) -\ pd.to_datetime(airline_selection[FFP_DATE] ) L = L.astype(str) .str.split() .str[0] # 数据可视化 L = L.astype (int) /30 #合并特征 airline_features = pd.concat([L,airline_selection.iloc[:,2:]], axis = 1) airline_features[LAST_TO_END].hist(bins=1000) airline_features[avg_discount].hist(bins=1000) airline_features[SEG_KM_SUM].hist(bins=1000) from sklearn. preprocessing import StandardScaler data = StandardScaler().fit_transform(airline_features) np.savez(airline_ scale.npz,data) import numpy as np from sklearn.cluster import KMeans #5l K-Means i2 airline_scale = np.load(airline_ scale.npz) [arr_0] k =5 #确定聚类中心数 kmeans_model = KMeans (n_clusters = k, random_state=123) fit_kmeans = kmeans_model.fit (airline_scale) # VHil15 kmeans_model.cluster_centers_ kmeans_model.labels_ pd.Series(kmeans_model.labels_).value_counts

文档评论(0)

1亿VIP精品文档

相关文档