|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 澍梵. 于 2022-4-29 17:03 编辑
dataZscore:
- dataZscore = (dataTransformed - dataTransformed.mean(axis=0)) /dataTransformed.std(axis=0)
- dataZscore.columns = ['Z' + i for i in dataTransformed.columns]
- dataZscore.head()
- ZL ZR ZF ZM ZC
- 0 1.435707 -0.944948 14.034016 26.761154 0.315041
- 1 1.307152 -0.911894 9.073213 13.126864 0.315041
- 2 1.328381 -0.889859 8.718869 12.653481 0.315041
- 3 0.658476 -0.416098 0.781585 12.540622 0.315041
- 4 0.386032 -0.922912 9.923636 13.898736 0.315041
复制代码
- #Kmeans
- #导入工具包
- import time
- import pyspark.ml.clustering as clu
- # 获取程序运行前时间
- start = time.time()
- #通过上图观察可知最好的簇数量为5
- #初始化Kmeans模型
- kmodel = clu.KMeans(k=5, initMode='k-means||', initSteps=10,maxIter=300, seed=0)
- kmodel.fit(dataZscore)
- # # 获取程序运行结束后时间
- # end = time.time()
- # print(f"使用K-means聚类算法的运行时间为:%.3fs" % (end-start))
- # # 聚类算法的评价指标CH值的计算:
- # score = calinski_harabaz_score(dataZscore, labels) # 至越大表示聚类效果越好
- # print("CH值:", score)
复制代码
一直报错,没办法解决
- ---------------------------------------------------------------------------
- AttributeError Traceback (most recent call last)
- <ipython-input-87-01ba619cc4ab> in <module>()
- 16 # kmodel = KMeans(k=5 ,maxIter=300, initSteps=10, initMode='k-means||', seed=0)
- 17 kmodel = clu.KMeans(k=5, initMode='k-means||', initSteps=10,maxIter=300, seed=0)
- ---> 18 kmodel.fit(dataZscore)
- 19 # kmodel = KMeans(k=5 ,maxIter=300)
- 20 # kmodel.fit(dataZscore)
- ~/hadoop/spark/python/pyspark/ml/base.py in fit(self, dataset, params)
- 159 return self.copy(params)._fit(dataset)
- 160 else:
- --> 161 return self._fit(dataset)
- 162 else:
- 163 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
- ~/hadoop/spark/python/pyspark/ml/wrapper.py in _fit(self, dataset)
- 333
- 334 def _fit(self, dataset):
- --> 335 java_model = self._fit_java(dataset)
- 336 model = self._create_model(java_model)
- 337 return self._copyValues(model)
- ~/hadoop/spark/python/pyspark/ml/wrapper.py in _fit_java(self, dataset)
- 330 """
- 331 self._transfer_params_to_java()
- --> 332 return self._java_obj.fit(dataset._jdf)
- 333
- 334 def _fit(self, dataset):
- ~/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
- 4370 if self._info_axis._can_hold_identifiers_and_holds_name(name):
- 4371 return self[name]
- -> 4372 return object.__getattribute__(self, name)
- 4373
- 4374 def __setattr__(self, name, value):
- AttributeError: 'DataFrame' object has no attribute '_jdf'
复制代码
- 'DataFrame' object has no attribute '_jdf'
复制代码
|
|