- 17
- 0
- 约1.57千字
- 约 2页
- 2022-12-24 发布于湖北
- 举报
import numpy as np
import pandas as pd
import os
import numpy as np 数据的读取
import pandas as pd
credit_card = pd.read_csv(air_data.csv,encoding=gb18030)
credit_card.shape
exp1 = credit_card[SUM_YR_1].notnull ()
exp2 = credit_card[SUM_YR_2].notnull ()
exp = exp1 exp2
index1 = airline_notnull[SUM_YR_1] != 0
index2 = airline_notnull[ SUM_YR_2] != 0
index3 = (airline_notnull[ SEG_KM_SUM] 0) \
(airline_notnull[avg_discount] != 0)
airline = airline_notnull[(index1|index2) index3]
#选取需求特征
airline_selection = airline[[FFP_DATE, LOAD_TIME,
FLIGHT_COUNT, LAST_TO_END,2022/11/20 01:23
avg_discount, SEG_KM_SUM] ]
#构建工特征
L= pd.to_datetime (airline_selection[LOAD_TIME]) -\
pd.to_datetime(airline_selection[FFP_DATE] )
L = L.astype(str) .str.split() .str[0]
# 数据可视化
L = L.astype (int) /30
#合并特征
airline_features = pd.concat([L,airline_selection.iloc[:,2:]], axis = 1)
airline_features[LAST_TO_END].hist(bins=1000)
airline_features[avg_discount].hist(bins=1000)
airline_features[SEG_KM_SUM].hist(bins=1000)
from sklearn. preprocessing import StandardScaler
data = StandardScaler().fit_transform(airline_features)
np.savez(airline_ scale.npz,data)
import numpy as np
from sklearn.cluster import KMeans #5l K-Means i2
airline_scale = np.load(airline_ scale.npz) [arr_0]
k =5 #确定聚类中心数
kmeans_model = KMeans (n_clusters = k, random_state=123)
fit_kmeans = kmeans_model.fit (airline_scale) # VHil15
kmeans_model.cluster_centers_
kmeans_model.labels_
pd.Series(kmeans_model.labels_).value_counts
原创力文档

文档评论(0)