Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
yxycug committed Jun 4, 2019
0 parents commit 05f4b62
Show file tree
Hide file tree
Showing 19 changed files with 20,695 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
6 changes: 6 additions & 0 deletions .ipynb_checkpoints/00-minst-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
246 changes: 246 additions & 0 deletions .ipynb_checkpoints/01-KNN-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"start read file\n"
]
}
],
"source": [
"from minst import loadData\n",
"dataArr, labelArr=loadData('D:/Jupyter/mnist_train.csv')\n",
"testDataArr, testLabelArr = loadData('D:/Jupyter/mnist_test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"def calcDist(x1, x2):\n",
" '''\n",
" 计算两个样本点向量之间的距离\n",
" 使用的是欧氏距离,即 样本点每个元素相减的平方 再求和 再开方\n",
" :param x1:向量1\n",
" :param x2:向量2\n",
" :return:向量之间的欧式距离\n",
" '''\n",
" return np.sqrt(np.sum(np.square(x1 - x2)))\n",
"\n",
" #马哈顿距离计算公式\n",
" # return np.sum(x1 - x2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def getClosest(trainDataMat, trainLabelMat, x, topK):\n",
" '''\n",
" 预测样本x的标记。\n",
" 获取方式通过找到与样本x最近的topK个点,并查看它们的标签。\n",
" 查找里面占某类标签最多的那类标签\n",
" (书中3.1 3.2节)\n",
" :param trainDataMat:训练集数据集\n",
" :param trainLabelMat:训练集标签集\n",
" :param x:要预测的样本x\n",
" :param topK:选择参考最邻近样本的数目(样本数目的选择关系到正确率,详看3.2.3 K值的选择)\n",
" :return:预测的标记\n",
" '''\n",
" distList = [0] * len(trainLabelMat) # [0]*5=[0 0 0 0 0]\n",
"\n",
" for i in range(len(trainDataMat)): #获取训练集中当前样本的向量\n",
"\n",
" x1 = trainDataMat[i] #计算向量x与训练集样本x的距离\n",
" curDist = calcDist(x1, x) #将距离放入对应的列表位置中\n",
" distList[i] = curDist\n",
" \n",
" topKList = np.argsort(np.array(distList))[:topK] #前K个升序排序 \n",
" '''排序的只是索引index!!!!!'''\n",
" \n",
" labelList = [0] * 10 \n",
" for index in topKList:\n",
" '''遍历的是array里的元素!!!!'''\n",
" \n",
" #trainLabelMat[index]:在训练集标签中寻找topK元素索引对应的标记\n",
" #int(trainLabelMat[index]):将标记转换为int(实际上已经是int了,但是不int的话,报错)\n",
" #labelList[int(trainLabelMat[index])]:找到标记在labelList中对应的位置\n",
" #最后加1,表示投了一票\n",
" \n",
" labelList[int(trainLabelMat[index])] += 1\n",
" \n",
" #max(labelList):找到选票箱中票数最多的票数值\n",
" #labelList.index(max(labelList)):再根据最大值在列表中找到该值对应的索引,等同于预测的标记\n",
" return labelList.index(max(labelList)) "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"start read file\n"
]
}
],
"source": [
"trainDataMat = np.mat(dataArr)\n",
"trainLabelMat = np.mat(labelArr).T\n",
"testDataMat = np.mat(testDataArr)\n",
"testLabelMat = np.mat(testLabelArr).T"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"test 0 predict:7 true:7 \n",
"test 1 predict:2 true:2 \n",
"test 2 predict:1 true:1 \n",
"test 3 predict:0 true:0 \n",
"test 4 predict:4 true:4 \n",
"test 5 predict:1 true:1 \n",
"test 6 predict:4 true:4 \n",
"test 7 predict:9 true:9 \n",
"test 8 predict:5 true:5 \n",
"test 9 predict:9 true:9 \n",
"test 10 predict:0 true:0 \n",
"test 11 predict:6 true:6 \n",
"test 12 predict:9 true:9 \n",
"test 13 predict:0 true:0 \n",
"test 14 predict:1 true:1 \n",
"test 15 predict:5 true:5 \n",
"test 16 predict:9 true:9 \n",
"test 17 predict:7 true:7 \n",
"test 18 predict:3 true:3 \n",
"test 19 predict:4 true:4 \n",
"test 20 predict:9 true:9 \n",
"test 21 predict:6 true:6 \n",
"test 22 predict:6 true:6 \n",
"test 23 predict:5 true:5 \n",
"test 24 predict:4 true:4 \n",
"test 25 predict:0 true:0 \n",
"test 26 predict:7 true:7 \n",
"test 27 predict:4 true:4 \n",
"test 28 predict:0 true:0 \n",
"test 29 predict:1 true:1 \n",
"test 30 predict:3 true:3 \n",
"test 31 predict:1 true:1 \n",
"test 32 predict:3 true:3 \n",
"test 33 predict:4 true:4 \n",
"test 34 predict:7 true:7 \n",
"test 35 predict:2 true:2 \n",
"test 36 predict:7 true:7 \n",
"test 37 predict:1 true:1 \n",
"test 38 predict:2 true:2 \n",
"test 39 predict:1 true:1 \n",
"test 40 predict:1 true:1 \n",
"test 41 predict:7 true:7 \n",
"test 42 predict:4 true:4 \n",
"test 43 predict:1 true:2 \n",
"test 44 predict:3 true:3 \n",
"test 45 predict:5 true:5 \n",
"test 46 predict:1 true:1 \n",
"test 47 predict:2 true:2 \n",
"test 48 predict:4 true:4 \n",
"test 49 predict:4 true:4 \n",
"accuracy:0 \n"
]
}
],
"source": [
"errorCnt = 0\n",
"topK = 25\n",
"n = 50\n",
"for i in range(n):\n",
" x = testDataMat[i] #读取测试集当前测试样本的向量\n",
" y = getClosest(trainDataMat, trainLabelMat, x, topK) #获取预测的标记\n",
" print('test %d predict:%d true:%d ' % (i,y,testLabelMat[i]))\n",
" if y != testLabelMat[i]: \n",
" errorCnt += 1 #如果预测标记与实际标记不符,错误值计数加1\n",
" \n",
"accuracy = 1 - (errorCnt / n)\n",
"print(accuracy)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"oldHeight": 210,
"position": {
"height": "40px",
"left": "705px",
"right": "20px",
"top": "133px",
"width": "481px"
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"varInspector_section_display": "none",
"window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 05f4b62

Please sign in to comment.