forked from yxycug/machine-learning-algorithm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 05f4b62
Showing
19 changed files
with
20,695 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Auto detect text files and perform LF normalization | ||
* text=auto |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"cells": [], | ||
"metadata": {}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,246 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"start read file\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from minst import loadData\n", | ||
"dataArr, labelArr=loadData('D:/Jupyter/mnist_train.csv')\n", | ||
"testDataArr, testLabelArr = loadData('D:/Jupyter/mnist_test.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"def calcDist(x1, x2):\n", | ||
" '''\n", | ||
" 计算两个样本点向量之间的距离\n", | ||
" 使用的是欧氏距离,即 样本点每个元素相减的平方 再求和 再开方\n", | ||
" :param x1:向量1\n", | ||
" :param x2:向量2\n", | ||
" :return:向量之间的欧式距离\n", | ||
" '''\n", | ||
" return np.sqrt(np.sum(np.square(x1 - x2)))\n", | ||
"\n", | ||
" #马哈顿距离计算公式\n", | ||
" # return np.sum(x1 - x2)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def getClosest(trainDataMat, trainLabelMat, x, topK):\n", | ||
" '''\n", | ||
" 预测样本x的标记。\n", | ||
" 获取方式通过找到与样本x最近的topK个点,并查看它们的标签。\n", | ||
" 查找里面占某类标签最多的那类标签\n", | ||
" (书中3.1 3.2节)\n", | ||
" :param trainDataMat:训练集数据集\n", | ||
" :param trainLabelMat:训练集标签集\n", | ||
" :param x:要预测的样本x\n", | ||
" :param topK:选择参考最邻近样本的数目(样本数目的选择关系到正确率,详看3.2.3 K值的选择)\n", | ||
" :return:预测的标记\n", | ||
" '''\n", | ||
" distList = [0] * len(trainLabelMat) # [0]*5=[0 0 0 0 0]\n", | ||
"\n", | ||
" for i in range(len(trainDataMat)): #获取训练集中当前样本的向量\n", | ||
"\n", | ||
" x1 = trainDataMat[i] #计算向量x与训练集样本x的距离\n", | ||
" curDist = calcDist(x1, x) #将距离放入对应的列表位置中\n", | ||
" distList[i] = curDist\n", | ||
" \n", | ||
" topKList = np.argsort(np.array(distList))[:topK] #前K个升序排序 \n", | ||
" '''排序的只是索引index!!!!!'''\n", | ||
" \n", | ||
" labelList = [0] * 10 \n", | ||
" for index in topKList:\n", | ||
" '''遍历的是array里的元素!!!!'''\n", | ||
" \n", | ||
" #trainLabelMat[index]:在训练集标签中寻找topK元素索引对应的标记\n", | ||
" #int(trainLabelMat[index]):将标记转换为int(实际上已经是int了,但是不int的话,报错)\n", | ||
" #labelList[int(trainLabelMat[index])]:找到标记在labelList中对应的位置\n", | ||
" #最后加1,表示投了一票\n", | ||
" \n", | ||
" labelList[int(trainLabelMat[index])] += 1\n", | ||
" \n", | ||
" #max(labelList):找到选票箱中票数最多的票数值\n", | ||
" #labelList.index(max(labelList)):再根据最大值在列表中找到该值对应的索引,等同于预测的标记\n", | ||
" return labelList.index(max(labelList)) " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"start read file\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"trainDataMat = np.mat(dataArr)\n", | ||
"trainLabelMat = np.mat(labelArr).T\n", | ||
"testDataMat = np.mat(testDataArr)\n", | ||
"testLabelMat = np.mat(testLabelArr).T" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"test 0 predict:7 true:7 \n", | ||
"test 1 predict:2 true:2 \n", | ||
"test 2 predict:1 true:1 \n", | ||
"test 3 predict:0 true:0 \n", | ||
"test 4 predict:4 true:4 \n", | ||
"test 5 predict:1 true:1 \n", | ||
"test 6 predict:4 true:4 \n", | ||
"test 7 predict:9 true:9 \n", | ||
"test 8 predict:5 true:5 \n", | ||
"test 9 predict:9 true:9 \n", | ||
"test 10 predict:0 true:0 \n", | ||
"test 11 predict:6 true:6 \n", | ||
"test 12 predict:9 true:9 \n", | ||
"test 13 predict:0 true:0 \n", | ||
"test 14 predict:1 true:1 \n", | ||
"test 15 predict:5 true:5 \n", | ||
"test 16 predict:9 true:9 \n", | ||
"test 17 predict:7 true:7 \n", | ||
"test 18 predict:3 true:3 \n", | ||
"test 19 predict:4 true:4 \n", | ||
"test 20 predict:9 true:9 \n", | ||
"test 21 predict:6 true:6 \n", | ||
"test 22 predict:6 true:6 \n", | ||
"test 23 predict:5 true:5 \n", | ||
"test 24 predict:4 true:4 \n", | ||
"test 25 predict:0 true:0 \n", | ||
"test 26 predict:7 true:7 \n", | ||
"test 27 predict:4 true:4 \n", | ||
"test 28 predict:0 true:0 \n", | ||
"test 29 predict:1 true:1 \n", | ||
"test 30 predict:3 true:3 \n", | ||
"test 31 predict:1 true:1 \n", | ||
"test 32 predict:3 true:3 \n", | ||
"test 33 predict:4 true:4 \n", | ||
"test 34 predict:7 true:7 \n", | ||
"test 35 predict:2 true:2 \n", | ||
"test 36 predict:7 true:7 \n", | ||
"test 37 predict:1 true:1 \n", | ||
"test 38 predict:2 true:2 \n", | ||
"test 39 predict:1 true:1 \n", | ||
"test 40 predict:1 true:1 \n", | ||
"test 41 predict:7 true:7 \n", | ||
"test 42 predict:4 true:4 \n", | ||
"test 43 predict:1 true:2 \n", | ||
"test 44 predict:3 true:3 \n", | ||
"test 45 predict:5 true:5 \n", | ||
"test 46 predict:1 true:1 \n", | ||
"test 47 predict:2 true:2 \n", | ||
"test 48 predict:4 true:4 \n", | ||
"test 49 predict:4 true:4 \n", | ||
"accuracy:0 \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"errorCnt = 0\n", | ||
"topK = 25\n", | ||
"n = 50\n", | ||
"for i in range(n):\n", | ||
" x = testDataMat[i] #读取测试集当前测试样本的向量\n", | ||
" y = getClosest(trainDataMat, trainLabelMat, x, topK) #获取预测的标记\n", | ||
" print('test %d predict:%d true:%d ' % (i,y,testLabelMat[i]))\n", | ||
" if y != testLabelMat[i]: \n", | ||
" errorCnt += 1 #如果预测标记与实际标记不符,错误值计数加1\n", | ||
" \n", | ||
"accuracy = 1 - (errorCnt / n)\n", | ||
"print(accuracy)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
}, | ||
"varInspector": { | ||
"cols": { | ||
"lenName": 16, | ||
"lenType": 16, | ||
"lenVar": 40 | ||
}, | ||
"kernels_config": { | ||
"python": { | ||
"delete_cmd_postfix": "", | ||
"delete_cmd_prefix": "del ", | ||
"library": "var_list.py", | ||
"varRefreshCmd": "print(var_dic_list())" | ||
}, | ||
"r": { | ||
"delete_cmd_postfix": ") ", | ||
"delete_cmd_prefix": "rm(", | ||
"library": "var_list.r", | ||
"varRefreshCmd": "cat(var_dic_list()) " | ||
} | ||
}, | ||
"oldHeight": 210, | ||
"position": { | ||
"height": "40px", | ||
"left": "705px", | ||
"right": "20px", | ||
"top": "133px", | ||
"width": "481px" | ||
}, | ||
"types_to_exclude": [ | ||
"module", | ||
"function", | ||
"builtin_function_or_method", | ||
"instance", | ||
"_Feature" | ||
], | ||
"varInspector_section_display": "none", | ||
"window_display": true | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.