% KNNCLUST clustering % function [labs] = knnclust(s,a,k,conrate,D) % The string s sets the density type: % 'n': Normal, Gaussian % 't' : Triangular % a: the data set with size of N x d (N, the number of pixels, d dimension) % k: the k nearest neighbor % D (optional) Nxkx2 K nearest distance matrix for every objects (N) and its neighbor indices % conrate (optional) the convergence rate (e.g. 0.95 = 95%) - default 100% % Developed by % Thanh N. Tran % Dept. of Chemometrics % Radboud University of Nijmegen, The Netherlands % http://www-cac.sci.kun.nl/people/tnthanh/index.html % References: % Thanh N. Tran, Ron Wehrens, Lutgarde M.C. Buydens*, KNN-KERNEL DENSITY-BASED CLUSTERING FOR HIGH DIMENSIONAL MULTISPECTRAL IMAGES % The software is provided "as is" with no warranty whatsoever. % Please acknowledge contributions based on this software in your publications. function [labs] = knnclust(s,a,k,CONRATE,D) global labs; MAXSIZE_IN_MEM = 10000; if nargin < 4, CONRATE = 1; end N=size(a,1); D_in_file = 0; if nargin < 5 % for a small data set if N < MAXSIZE_IN_MEM d=pdist(a); d=squareform(d); [d,I]=sort(d'); d=d'; I=I'; D=zeros(N,k,2); D(:,:,1)=d(:,1:k); D(:,:,2)=I(:,1:k); % for a bigger data set else, D = sortdist(a,k); end else % In practice, a distance table needs to be calculated and store in disk % for a large data set. [Ndump,kdump,dump]=size(D); % check if knn matrix is in file? if Ndump == 1 D_in_file = 1; filename = D; else if Ndump ~= N | kdump ~= k | dump ~= 2, error('Size of D mismatch!'); end end end % Update knn distance table which is suitable for specific data type switch s case {'n','t'} , if ~D_in_file for i=1:N if D(i,k,1) == 0, D(i,:,1) = inf; else, D(i,:,1) = D(i,:,1)/D(i,k,1); end end end otherwise, error('No such data type'); end %%%%%%%%%%%%% START THE ALGORITHM - KNNCLUST %%%%%%%%%%%%%%%%%%%%%%%%%%%%% labs = [1:N]'; n=1; nmerged = 1; while nmerged % Display the number of ilteration fprintf('Iteration %d\n',n); % Generate N random for indexing purpose %rand_i = unique(ceil(N*rand(1,N))); %rand_i=[rand_i setdiff([1:N],rand_i)]; nmerged = 0; curindex = 1; if D_in_file, currecord = 1; maxrow_ = 0; end for i=1:N % Randomly get sample from the sample set %curindex = rand_i(i); % This is the NOT RANDOMLY case curindex = i; % This only for a big data set if D_in_file & (curindex > maxrow_) display (['load' 'mem/' filename num2str(currecord)]) load(['mem/' filename num2str(currecord)],'D'); D = D(:,1:k,:); currecord = currecord + 1; curindex = 1; [maxrow_ dump dump] = size(D); switch s case {'n','t'} , for ii=1:maxrow_ if D(ii,k,1) == 0, D(ii,:,1) = inf; else D(ii,:,1) = D(ii,:,1)/(2*D(ii,k,1)); end end otherwise, error('No such data type'); end end %%%%%%%%%% Calculate knn-kernel and perform the knn-kernel class-condition merged = 0; % get k neighbor objects knnindex = D(curindex,2:k,2); % for each object in knindex [Cset, I, J] = unique(labs(knnindex)); %% A(I)=Cset % Consider point i has to be merged to 'closest' c in Cset Csetsize = size(Cset,1); d_pdf=zeros(1,Csetsize); for c=1:Csetsize % knn points in c indexci = find(labs(knnindex)==Cset(c)); nci = size(indexci,1); indexc = [knnindex(indexci)'; curindex]; %%%%%%%% Choice between difference kernels switch s case 'n', d_pdf(c) = sum(exp(-0.5*D(curindex,indexci+1,1))); % Gaussian case 't', d_pdf(c) = sum(ones(1,nci)-D(curindex,indexci+1,1)); % Triangular case 'b', d_pdf(c) = sum(1-D(curindex,indexci+1,1)/5); case 'r', d_pdf(c) = size(indexc,1); end end %%%%%% perform the knn-kernel class-condition [dmax,di]=max(d_pdf); switch s case 'r' isame = find(d_pdf==dmax); if size(isame,2) > 1 % choose cluster that closest to % get the index of point closest to x [di,dump]=min(I(isame)); labs(curindex)=labs(knnindex(di+1)); merged = 1; else if labs(curindex) ~= Cset(di), labs(curindex)=Cset(di); merged = 1; end end otherwise, if labs(curindex) ~= Cset(di), labs(curindex)=Cset(di); merged = 1; end end %%% End one iteration nmerged = nmerged + merged; curindex = curindex +1; end %%%%%%%%%%%%%% Converge ??? End ??? (1-nmerged/N) if (1-nmerged/N) >= CONRATE, break; end n=n+1; end %%%% re-index labs (clustering result) newlabs=labs; cset = unique(labs); nclus = size(cset,1); for i=1:nclus index = find(labs==cset(i)); if ~isempty(index) newlabs(index) = i; end end labs = newlabs;