第一关:
#encoding=utf8 import numpy as npfrom collections import Counterclass kNNClassifier(object):def __init__(self, k):'''初始化函数:param k:kNN算法中的k'''self.k = k# 用来存放训练数据,类型为ndarrayself.train_feature = None# 用来存放训练标签,类型为ndarrayself.train_label = Nonedef fit(self, feature, label):'''kNN算法的训练过程:param feature: 训练集数据,类型为ndarray:param label: 训练集标签,类型为ndarray:return: 无返回'''#********* Begin *********#self.train_feature = featureself.train_label = label#********* End *********#def predict(self, feature):'''kNN算法的预测过程:param feature: 测试集数据,类型为ndarray:return: 预测结果,类型为ndarray或list'''#********* Begin *********#result = []for data in feature:dist = np.sqrt(np.sum((self.train_feature - data) ** 2, axis = 1)) # 欧氏距离neighbor = np.argsort(dist)[0 : self.k]kLabel = (self.train_label[i] for i in neighbor)key, value = Counter(kLabel).most_common(1)[0] # 如果k个邻居中有两个标签的出现次数一样多,要取总距离小的标签,懒得写了(result.append(key)return result#********* End *********#
第2关:
from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import StandardScalerdef classification(train_feature, train_label, test_feature):'''对test_feature进行红酒分类:param train_feature: 训练集数据,类型为ndarray:param train_label: 训练集标签,类型为ndarray:param test_feature: 测试集数据,类型为ndarray:return: 测试集数据的分类结果'''#********* Begin *********##实例化StandardScaler函数scaler = StandardScaler()train_feature = scaler.fit_transform(train_feature)test_feature = scaler.transform(test_feature)#生成K近邻分类器clf = KNeighborsClassifier()#训练分类器 clf.fit(train_feature, train_label)#进行预测predict_result = clf.predict(test_feature)return predict_result #********* End **********#