Python Tkinter 基操

Posted by 韩同学的笔记本 on March 24, 2020

基于降维+聚类 + Tkinter库可视化

要求:

  1. 使用TKinter来可视化二维聚类效果(散状图)
  2. 【选做】探究准确率和维数的相关性。(曲线图,维数范围自己取合适即可)
  3. 降维的方法可选:T-Sne, PCA和SVD,聚类方法:Kmeans,实现一种降维方法,并用Kmeans完成聚类
  4. 基本要求是完成1种降维方式的二维可视化聚类效果

进阶一:完成2-3种降维方式的二维可视化聚类效果

进阶二:完成1种降维方式的可视化 + 【要求2】

进阶三:完成2-3种降维方式的可视化 + 三种方式的【要求2】

注意:

  1. 聚类标签的分配参照上次作业的二部图匹配算法,在训练数据上确定聚类标签和真是标签对应情况,在测试数据上计算准确率。
  2. 给出了基本框架,因为作业各类要求的不同,大家可以根据自行需要增减函数
  3. 数据使用上次作业MINIST的数据
  4. 自行添加下拉菜单,选择几种降维方式,则下拉菜单有几种相应的选项

demo

1
2
3
4
5
6
7
8
9
10
11
12
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import tkinter as tk
from tkinter import *
from sklearn.cluster import KMeans
from tkinter import ttk
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def eva(X, tY, P, kmeans):
    pY = P[kmeans.predict(X)]
    acc = 0
    for i, j in zip(tY, pY):
        if i == j:
            acc += 1
    return acc / len(tY)


def reduction(arr, k, method):
    if method == 'SVD':
        C = np.cov(arr.transpose())
        # 第一维代表属性,第二维代表观测!
        u, s, v = np.linalg.svd(C)
        U = u[:, :k]
        return arr@U, U
    elif method == 'TSNE':
        tsne = TSNE(n_components=k)
        tsne_results = tsne.fit_transform(arr)
        return tsne_results, np.eye(k)
    elif method == 'PCA':
        pca = PCA(n_components=k)
        X = pca.fit_transform(arr)
        return X, pca


def match_label(res):
    G = nx.Graph()
    G.add_nodes_from(range(20))

    for i in range(10):
        for j in range(10):
            G.add_edge(i,j+10, weight=res[i][j])

    m = nx.algorithms.matching.max_weight_matching(G)
    P = np.zeros(10, dtype=np.int)
    for i,j in m:
        a,b = min(i,j),max(i,j)
        P[a] = b-10
    return P


def read_data(train_name='./mnist/train6000DataSet.csv', test_name='./mnist/test1000DataSet.csv'):
    trainData = pd.read_csv(train_name, index_col = 0)
    testData = pd.read_csv(test_name, index_col = 0)
    trainImage = np.zeros((6000, 784))
    trainLabel = np.zeros(6000, dtype=np.int)
    testImage = np.zeros((1000, 784))
    testLabel = np.zeros(1000, dtype=np.int)
    # 请在下方添加代码
    for i in range(6000):
        trainImage[i] = trainData.loc[i,'0':].to_numpy()
        trainLabel[i] = trainData.loc[i, 'label'].astype(dtype=np.int)

    for i in range(1000):
        testImage[i] = testData.loc[i,'0':].to_numpy()
        testLabel[i] = testData.loc[i, 'label'].astype(dtype=np.int)

    return (trainImage, trainLabel, testImage, testLabel)


def cluster(X, tY, n_clusters):
    kmeans = KMeans(n_clusters=n_clusters).fit(X)
    pY = kmeans.predict(X)
    res = np.zeros((10,10), dtype=np.int)
    for i,j in zip(pY, tY):
        res[i][j] += 1
    P = match_label(res)
    return (kmeans, P, pY)


class TkMnist:

    def work(self, trainImage, trainLabel, testImage, testLabel, method):
        if method == 'SVD' or method == 'PCA':
            dims = range(10,90,5)
        elif method == 'TSNE':
            dims = [2,3] # TSNE 不支持高维
        accu = []
        ans = -1.0
        P = []
        pY = []
        for d in dims:
            red, U = reduction(trainImage, d, method)
            kmeans, tmpP, tmppY = cluster(red, trainLabel, 10)
            if method == 'TSNE':
                testImage = red

            if method == 'PCA':
                acc = eva(U.transform(testImage), testLabel, tmpP, kmeans)
            else:
                acc = eva(testImage@U, testLabel, tmpP, kmeans)

            accu.append(acc)
            if ans < acc:
                ans = acc
                to_plot, _ = reduction(testImage, 2, method)
                P = tmpP
                pY = tmppY

        return to_plot, P[pY], dims, accu

        self.fig.clf()
        self.fig.add_subplot(121).scatter(x=to_plot[:, 0], y=to_plot[:, 1], c=P[pY])
        self.fig.add_subplot(122).plot(dims, accu)

    def _go(self, *args):
        sel = self.comboxlist.get()
        tk.messagebox.showinfo(title="Info", message=sel+" was selected!")
        if sel == '<clear>':
            self.fig.clf()
            self.canvas.draw()
        else:
            if sel == 'TSNE':
                tk.messagebox.showwarning(title='Warning', message='TSNE 不支持高维,n_components <= 3')

            to_plot,col,dims,accu = self.work(self.testImage, self.testLabel, self.testImage, self.testLabel, sel)

            self.fig.clf()
            self.fig.add_subplot(121).scatter(x=to_plot[:, 0], y=to_plot[:, 1], c=col)
            self.fig.add_subplot(122).plot(dims, accu, 'b')
            self.canvas.draw()

        return

    def __init__(self):
        self.trainImage, self.trainLabel, self.testImage, self.testLabel = read_data()
        self.root = tk.Tk()
        self.root.wm_title("Embedding in Tk")
        self.fig = Figure(figsize=(5, 4), dpi=100)
        self.canvas = FigureCanvasTkAgg(self.fig, master=self.root)  # A tk.DrawingArea.
        self.canvas.draw()
        self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)

        comvalue = ['<clear>', 'SVD', 'TSNE', 'PCA']
        self.comboxlist = ttk.Combobox(self.root, values=comvalue, state="readonly")  # 初始化
        self.comboxlist.current(0)  # 选择第一个
        self.comboxlist.bind("<<ComboboxSelected>>", self._go)  # 绑定事件,(下拉列表框被选中时,绑定go()函数)
        self.comboxlist.pack(side=tk.BOTTOM)

    def loop(self):
        self.root.mainloop()
1
2
3
if __name__ == "__main__":
    instance = TkMnist()
    instance.loop()