-
Notifications
You must be signed in to change notification settings - Fork 3
/
USENC.m
172 lines (145 loc) · 4.6 KB
/
USENC.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is the code for the U-SPEC algorithm, which is proposed in %
% the following paper: %
% %
% D. Huang, C.-D. Wang, J.-S. Wu, J.-H. Lai, and C.-K. Kwoh. %
% "Ultra-Scalable Spectral Clustering and Ensemble Clustering." %
% IEEE Transactions on Knowledge and Data Engineering, 2020. %
% DOI: https://doi.org/10.1109/TKDE.2019.2903410 %
% %
% The code has been tested in Matlab R2016a and Matlab R2016b. %
% Website: https://www.researchgate.net/publication/330760669 %
% Written by Huang Dong. (huangdonghere@gmail.com) %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function L = USENC(fea, k, M, distance, p, Knn, bcsLowK, bcsUpK)
if nargin < 8
bcsUpK = 60;
end
if nargin < 7
bcsLowK = 20;
end
if nargin < 6
Knn = 5;
end
if nargin < 5
p = 1000;
end
if nargin < 4
distance = 'euclidean';
end
if nargin < 3
M = 20;
end
if bcsUpK < bcsLowK
bcsUpK = bcsLowK;
end
disp('.');
disp(['Generating an ensemble of ',num2str(M),' base clusterings...']);
disp('.');
baseCls = USENC_EnsembleGeneration(fea, M, distance, p, Knn, bcsLowK, bcsUpK);
clear fea
disp('.');
disp('Performing the consensus function...');
disp('.');
tic1 = tic;
L = USENC_ConsensusFunction(baseCls,k);
toc(tic1);
disp('.');
function members = USENC_EnsembleGeneration(fea, M, distance, p, Knn, lowK, upK)
% Huang Dong. Mar. 20, 2019.
% Generate M base cluserings.
% The number of clusters in each base clustering is randomly selected in
% the range of [lowK, upK].
if nargin < 7
upK = 60;
end
if nargin < 6
lowK = 20;
end
if nargin < 5
Knn = 5;
end
if nargin < 4
p = 1000;
end
if nargin < 3
distance = 'euclidean';
end
if nargin < 2
M = 20;
end
N = size(fea,1);
if p>N
p = N;
end
members = zeros(N,M);
rand('state',sum(100*clock)*rand(1)); % Reset the clock before generating random numbers
Ks = randsample(upK-lowK+1,M,true)+lowK-1;
warning('off');
% In ensemble generation, the iteration number in the kmeans discretization
% of each base cluserer can be set to small values, so as to improve
% diversity of base clusterings and reduce the iteration time costs.
tcutKmIters = 5;
tcutKmRps = 1;
for i = 1:M
% Generating the i-th base clustering by U-SPEC.
tic1 = tic;
members(:,i) = USPEC(fea, Ks(i), distance, p, Knn, tcutKmIters, tcutKmRps);
toc(tic1);
end
function labels = USENC_ConsensusFunction(baseCls,k,maxTcutKmIters,cntTcutKmReps)
% Huang Dong. Mar. 20, 2019.
% Combine the M base clusterings in baseCls to obtain the final clustering
% result (with k clusters).
if nargin < 4
cntTcutKmReps = 3;
end
if nargin < 3
maxTcutKmIters = 100; % maxTcutKmIters and cntTcutKmReps are used to limit the iterations of the k-means discretization in Tcut.
end
[N,M] = size(baseCls);
maxCls = max(baseCls);
for i = 1:numel(maxCls)-1
maxCls(i+1) = maxCls(i+1)+maxCls(i);
end
cntCls = maxCls(end);
baseCls(:,2:end) = baseCls(:,2:end) + repmat(maxCls(1:end-1),N,1); clear maxCls
% Build the bipartite graph.
B=sparse(repmat([1:N]',1,M),baseCls(:),1,N,cntCls); clear baseCls
colB = sum(B);
B(:,colB==0) = [];
% Cut the bipartite graph.
labels = Tcut_for_bipartite_graph(B,k,maxTcutKmIters,cntTcutKmReps);
function labels = Tcut_for_bipartite_graph(B,Nseg,maxKmIters,cntReps)
% B - |X|-by-|Y|, cross-affinity-matrix
if nargin < 4
cntReps = 3;
end
if nargin < 3
maxKmIters = 100;
end
[Nx,Ny] = size(B);
if Ny < Nseg
error('Need more columns!');
end
dx = sum(B,2);
dx(dx==0) = 1e-10; % Just to make 1./dx feasible.
Dx = sparse(1:Nx,1:Nx,1./dx); clear dx
Wy = B'*Dx*B;
%%% compute Ncut eigenvectors
% normalized affinity matrix
d = sum(Wy,2);
D = sparse(1:Ny,1:Ny,1./sqrt(d)); clear d
nWy = D*Wy*D; clear Wy
nWy = (nWy+nWy')/2;
% computer eigenvectors
[evec,eval] = eig(full(nWy)); clear nWy
[~,idx] = sort(diag(eval),'descend');
Ncut_evec = D*evec(:,idx(1:Nseg)); clear D
%%% compute the Ncut eigenvectors on the entire bipartite graph (transfer!)
evec = Dx * B * Ncut_evec; clear B Dx Ncut_evec
% normalize each row to unit norm
evec = bsxfun( @rdivide, evec, sqrt(sum(evec.*evec,2)) + 1e-10 );
% k-means
% labels = litekmeans(evec,Nseg);
labels = kmeans(evec,Nseg,'MaxIter',maxKmIters,'Replicates',cntReps);