388 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			388 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| using UnityEngine;
 | |
| using System.Collections;
 | |
| using System.Collections.Generic;
 | |
| using System.Text;
 | |
| using System;
 | |
| 
 | |
| namespace DigitalOpus.MB.Core
 | |
| {
 | |
|     [Serializable]
 | |
|     public class MB3_AgglomerativeClustering
 | |
|     {
 | |
| 
 | |
|         public List<item_s> items = new List<item_s>();
 | |
| 
 | |
|         public ClusterNode[] clusters;
 | |
| 
 | |
|         public bool wasCanceled;
 | |
| 
 | |
|         [Serializable]
 | |
|         public class ClusterNode
 | |
|         {
 | |
|             public item_s leaf;
 | |
|             public ClusterNode cha;
 | |
|             public ClusterNode chb;
 | |
|             public int height; /* height of node from the bottom */
 | |
|             public float distToMergedCentroid;
 | |
|             public Vector3 centroid; /* centroid of this cluster */
 | |
|             public int[] leafs; /* indexes of root clusters merged */
 | |
|             public int idx; //index in clusters list
 | |
|             public bool isUnclustered = true;
 | |
| 
 | |
|             public ClusterNode(item_s ii, int index)
 | |
|             {
 | |
|                 leaf = ii;
 | |
|                 idx = index;
 | |
|                 leafs = new int[1];
 | |
|                 leafs[0] = index;
 | |
|                 centroid = ii.coord;
 | |
|                 height = 0;
 | |
|             }
 | |
| 
 | |
|             public ClusterNode(ClusterNode a, ClusterNode b, int index, int h, float dist, ClusterNode[] clusters)
 | |
|             {
 | |
|                 cha = a;
 | |
|                 chb = b;
 | |
|                 idx = index;
 | |
|                 leafs = new int[a.leafs.Length + b.leafs.Length];
 | |
|                 Array.Copy(a.leafs, leafs, a.leafs.Length);
 | |
|                 Array.Copy(b.leafs, 0, leafs, a.leafs.Length, b.leafs.Length);
 | |
|                 Vector3 c = Vector3.zero;
 | |
|                 for (int i = 0; i < leafs.Length; i++)
 | |
|                 {
 | |
|                     c += clusters[leafs[i]].centroid;
 | |
|                 }
 | |
|                 centroid = c / leafs.Length;
 | |
|                 height = h;
 | |
|                 distToMergedCentroid = dist;
 | |
|             }
 | |
|         };
 | |
| 
 | |
| 
 | |
|         [Serializable]
 | |
|         public class item_s
 | |
|         {
 | |
|             public GameObject go;
 | |
|             public Vector3 coord; /* coordinate of the input data point */
 | |
|         };
 | |
| 
 | |
|         float euclidean_distance(Vector3 a, Vector3 b)
 | |
|         {
 | |
|             return Vector3.Distance(a, b);
 | |
|         }
 | |
| 
 | |
|         public bool agglomerate(ProgressUpdateCancelableDelegate progFunc)
 | |
|         {
 | |
|             wasCanceled = true;
 | |
|             if (progFunc != null) wasCanceled = progFunc("Filling Priority Queue:", 0);
 | |
|             if (items.Count <= 1)
 | |
|             {
 | |
|                 clusters = new ClusterNode[0];
 | |
|                 return false;
 | |
|                 //yield break;
 | |
|             }
 | |
|             clusters = new ClusterNode[items.Count * 2 - 1];
 | |
|             for (int i = 0; i < items.Count; i++)
 | |
|             {
 | |
|                 clusters[i] = new ClusterNode(items[i], i);
 | |
|             }
 | |
| 
 | |
|             int numClussters = items.Count;
 | |
|             List<ClusterNode> unclustered = new List<ClusterNode>();
 | |
|             for (int i = 0; i < numClussters; i++)
 | |
|             {
 | |
|                 clusters[i].isUnclustered = true;
 | |
|                 unclustered.Add(clusters[i]);
 | |
|             }
 | |
| 
 | |
|             int height = 0;
 | |
|             System.Diagnostics.Stopwatch timer = new System.Diagnostics.Stopwatch();
 | |
|             timer.Start();
 | |
| 
 | |
|             float largestDistInQ = 0;
 | |
|             long usedMemory = GC.GetTotalMemory(false) / 1000000;
 | |
|             PriorityQueue < float, ClusterDistance > pq = new PriorityQueue<float, ClusterDistance>();
 | |
|             //largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters /*,null,null*/);
 | |
|             int numRefills = 0;
 | |
|             while (unclustered.Count > 1)
 | |
|             {
 | |
|                 
 | |
|                 int numToFindClosetPair = 0;
 | |
|                 height++;
 | |
|                 //find closest pair
 | |
|                 
 | |
|                 if (pq.Count == 0)
 | |
|                 {
 | |
|                     numRefills++;
 | |
|                     usedMemory = GC.GetTotalMemory(false) / 1000000;
 | |
|                     if (progFunc != null) wasCanceled = progFunc("Refilling Q:" + ((float)(items.Count - unclustered.Count) * 100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory,
 | |
|                         ((float)(items.Count - unclustered.Count)) / items.Count);
 | |
|                     largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters, progFunc);
 | |
|                     if (pq.Count == 0) break;
 | |
|                 }
 | |
|                 KeyValuePair<float, ClusterDistance> closestPair = pq.Dequeue();
 | |
|                 // should only consider unclustered pairs. It is more effecient to discard nodes that have already been clustered as they are popped off the Q
 | |
|                 // than to try to remove them from the Q when they have been clustered.
 | |
|                 while (!closestPair.Value.a.isUnclustered || !closestPair.Value.b.isUnclustered) {
 | |
|                     if (pq.Count == 0)
 | |
|                     {
 | |
|                         numRefills++;
 | |
|                         usedMemory = GC.GetTotalMemory(false) / 1000000;
 | |
|                         if (progFunc != null) wasCanceled = progFunc("Creating clusters:" + ((float)(items.Count - unclustered.Count) * 100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory,
 | |
|                             ((float)(items.Count - unclustered.Count)) / items.Count);
 | |
|                         largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters, progFunc);
 | |
|                         if (pq.Count == 0) break;
 | |
|                     }
 | |
|                     closestPair = pq.Dequeue();
 | |
|                     numToFindClosetPair++;
 | |
|                 }
 | |
| 
 | |
|                 //make a new cluster with pair as children set merge height
 | |
|                 numClussters++;
 | |
|                 ClusterNode cn = new ClusterNode(closestPair.Value.a, closestPair.Value.b, numClussters - 1, height, closestPair.Key, clusters);
 | |
|                 //remove children from unclustered
 | |
|                 unclustered.Remove(closestPair.Value.a);
 | |
|                 unclustered.Remove(closestPair.Value.b);
 | |
| 
 | |
| 
 | |
|                 //We NEED TO REMOVE ALL DISTANCE PAIRS THAT INVOLVE A AND B FROM PRIORITY Q. However searching for all these pairs and removing is very slow.
 | |
|                 // Instead we will leave them in the Queue and flag the clusters as isUnclustered = false and discard them as they are popped from the Q which is O(1) operation.
 | |
|                 closestPair.Value.a.isUnclustered = false;
 | |
|                 closestPair.Value.b.isUnclustered = false;
 | |
| 
 | |
|                 //add new cluster to unclustered
 | |
|                 int newIdx = numClussters - 1;
 | |
|                 if (newIdx == clusters.Length)
 | |
|                 {
 | |
|                     Debug.LogError("how did this happen");
 | |
|                 }
 | |
|                 clusters[newIdx] = cn;
 | |
|                 unclustered.Add(cn);
 | |
|                 cn.isUnclustered = true;
 | |
|                 //update new clusteres distance
 | |
|                 for (int i = 0; i < unclustered.Count - 1; i++)
 | |
|                 {
 | |
| 
 | |
|                     float dist = euclidean_distance(cn.centroid, unclustered[i].centroid);
 | |
|                     if (dist < largestDistInQ) //avoid cluttering Qwith
 | |
|                     {
 | |
|                         pq.Add(new KeyValuePair<float, ClusterDistance>(dist, new ClusterDistance(cn, unclustered[i])));
 | |
|                     }
 | |
|                 }
 | |
|                 //if (timer.Interval > .2f)
 | |
|                 //{
 | |
|                 //    yield return null;
 | |
|                 //    timer.Start();
 | |
|                 //}
 | |
|                 if (wasCanceled) break;
 | |
|                 usedMemory = GC.GetTotalMemory(false) / 1000000;
 | |
|                 if (progFunc != null) wasCanceled = progFunc("Creating clusters:" + ((float)(items.Count - unclustered.Count)*100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory, 
 | |
|                     ((float)(items.Count - unclustered.Count)) / items.Count);
 | |
|             }
 | |
|             if (progFunc != null) wasCanceled = progFunc("Finished clustering:", 100);
 | |
|             //Debug.Log("Time " + timer.Elapsed);
 | |
|             if (wasCanceled)
 | |
|             {
 | |
|                 return false;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 return true;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         const int MAX_PRIORITY_Q_SIZE = 2048;
 | |
|         float _RefillPriorityQWithSome(PriorityQueue<float, ClusterDistance> pq, List<ClusterNode> unclustered, ClusterNode[] clusters, ProgressUpdateCancelableDelegate progFunc)
 | |
|         {
 | |
|             //find nthSmallest point of distances between pairs
 | |
|             List<float> allDist = new List<float>(2048);
 | |
|             for (int i = 0; i < unclustered.Count; i++)
 | |
|             {
 | |
|                 for (int j = i+1; j < unclustered.Count; j++)
 | |
|                 {
 | |
|                     
 | |
|                    // if (unclustered[i] == omitA || unclustered[i] == omitB ||
 | |
|                    //     unclustered[j] == omitA || unclustered[j] == omitB)
 | |
|                    // {
 | |
|                         
 | |
|                    // } else
 | |
|                    // {
 | |
|                
 | |
|                         allDist.Add(euclidean_distance(unclustered[i].centroid, unclustered[j].centroid));
 | |
|                    // }
 | |
|                 }
 | |
|                 wasCanceled = progFunc("Refilling Queue Part A:", i / (unclustered.Count * 2f));
 | |
|                 if (wasCanceled) return 10f;
 | |
|             }
 | |
|             
 | |
|             if (allDist.Count == 0)
 | |
|             {
 | |
|                 return 10e10f;
 | |
|             }
 | |
|             float nthSmallest = NthSmallestElement(allDist, MAX_PRIORITY_Q_SIZE);
 | |
| 
 | |
|             //load up Q with up to nthSmallest distance pairs
 | |
|             for (int i = 0; i < unclustered.Count; i++)
 | |
|             {
 | |
|                 for (int j = i + 1; j < unclustered.Count; j++)
 | |
|                 {
 | |
|                     int idxa = unclustered[i].idx;
 | |
|                     int idxb = unclustered[j].idx;
 | |
|                     float newDist = euclidean_distance(unclustered[i].centroid, unclustered[j].centroid);
 | |
|                     if (newDist <= nthSmallest)
 | |
|                     {
 | |
|                         pq.Add(new KeyValuePair<float, ClusterDistance>(newDist, new ClusterDistance(clusters[idxa], clusters[idxb])));
 | |
|                     }
 | |
|                 }
 | |
|                 wasCanceled = progFunc("Refilling Queue Part B:", (unclustered.Count + i) / (unclustered.Count * 2f));
 | |
|                 if (wasCanceled) return 10f;
 | |
|             }
 | |
|             return nthSmallest;
 | |
|         }
 | |
| 
 | |
|         public int TestRun(List<GameObject> gos)
 | |
|         {
 | |
|             List<item_s> its = new List<item_s>();
 | |
|             for (int i = 0; i < gos.Count; i++)
 | |
|             {
 | |
|                 item_s ii = new item_s();
 | |
|                 ii.go = gos[i];
 | |
|                 ii.coord = gos[i].transform.position;
 | |
|                 its.Add(ii);
 | |
|             }
 | |
|             items = its;
 | |
|             if (items.Count > 0)
 | |
|             {
 | |
|                 agglomerate(null);
 | |
|             }
 | |
|             return 0;
 | |
|         }
 | |
| 
 | |
| 
 | |
|         //------
 | |
|         //    Unclustered
 | |
|         //need to be able to find the smallest distance between unclustered pairs quickly
 | |
|         //Do this by maintaining a fixed length PriorityQueue (len = 1000)
 | |
|         //   Q stores min distances between cluster pairs
 | |
|         //   unlclustered stores list of unclustered
 | |
|         //GetMin
 | |
|         //    if Q is empty
 | |
|         //          build Q from unclustered O(n2)
 | |
|         //          track the largestDistanceInQ
 | |
|         //          if unclustered is empty we are done
 | |
|         //    else
 | |
|         //          q.DeQueue O(1)
 | |
|         //
 | |
|         //    when creating new merged cluster, calc dist to all other unclustered add these distances to priority Q if less than largestDistanceInQ O(N)
 | |
|         //
 | |
| 
 | |
|         public class ClusterDistance
 | |
|         {
 | |
|             public ClusterNode a;
 | |
|             public ClusterNode b;
 | |
|             public ClusterDistance(ClusterNode aa, ClusterNode bb)
 | |
|             {
 | |
|                 a = aa;
 | |
|                 b = bb;
 | |
|             }
 | |
|         }
 | |
| 
 | |
| 
 | |
| 
 | |
|             public static void Main()
 | |
|             {
 | |
| 
 | |
|                 List<float> inputArray = new List<float>();
 | |
|                 inputArray.AddRange(new float[] { 19, 18, 17, 16, 15, 10, 11, 12, 13, 14 });
 | |
|                 // Loop 10 times
 | |
|                 Debug.Log("Loop quick select 10 times.");
 | |
| 
 | |
|                 Debug.Log(NthSmallestElement(inputArray, 0));
 | |
|                 
 | |
|             }
 | |
| 
 | |
|             // n is 0 indexed
 | |
|             public static T NthSmallestElement<T>(List<T> array, int n) where T : IComparable<T>
 | |
|             {
 | |
|                 if (n < 0)
 | |
|                     n = 0;
 | |
| 
 | |
|                 if (n > array.Count - 1)
 | |
|                     n = array.Count - 1;
 | |
|                 if (array.Count == 0)
 | |
|                     throw new ArgumentException("Array is empty.", "array");
 | |
|                 if (array.Count == 1)
 | |
|                     return array[0];
 | |
| 
 | |
|                 return QuickSelectSmallest(array, n)[n];
 | |
|             }
 | |
| 
 | |
|             private static List<T> QuickSelectSmallest<T>(List<T> input, int n) where T : IComparable<T>
 | |
|             {
 | |
|                 // Let's not mess up with our input array
 | |
|                 // For very large arrays - we should optimize this somehow - or just mess up with our input
 | |
|                 var partiallySortedArray = input;
 | |
| 
 | |
|                 // Initially we are going to execute quick select to entire array
 | |
|                 var startIndex = 0;
 | |
|                 var endIndex = input.Count - 1;
 | |
| 
 | |
|                 // Selecting initial pivot
 | |
|                 // Maybe we are lucky and array is sorted initially?
 | |
|                 var pivotIndex = n;
 | |
| 
 | |
|                 // Loop until there is nothing to loop (this actually shouldn't happen - we should find our value before we run out of values)
 | |
|                 var r = new System.Random();
 | |
|                 while (endIndex > startIndex)
 | |
|                 {
 | |
|                     pivotIndex = QuickSelectPartition(partiallySortedArray, startIndex, endIndex, pivotIndex);
 | |
|                     if (pivotIndex == n)
 | |
|                         // We found our n:th smallest value - it is stored to pivot index
 | |
|                         break;
 | |
|                     if (pivotIndex > n)
 | |
|                         // Array before our pivot index have more elements that we are looking for                    
 | |
|                         endIndex = pivotIndex - 1;
 | |
|                     else
 | |
|                         // Array before our pivot index has less elements that we are looking for                    
 | |
|                         startIndex = pivotIndex + 1;
 | |
| 
 | |
|                     // Omnipotent beings don't need to roll dices - but we do...
 | |
|                     // Randomly select a new pivot index between end and start indexes (there are other methods, this is just most brutal and simplest)
 | |
|                     pivotIndex = r.Next(startIndex, endIndex);
 | |
|                 }
 | |
|                 return partiallySortedArray;
 | |
|             }
 | |
| 
 | |
|             private static int QuickSelectPartition<T>(List<T> array, int startIndex, int endIndex, int pivotIndex) where T : IComparable<T>
 | |
|             {
 | |
|                 var pivotValue = array[pivotIndex];
 | |
|                 // Initially we just assume that value in pivot index is largest - so we move it to end (makes also for loop more straight forward)
 | |
|                 Swap(array, pivotIndex, endIndex);
 | |
|                 for (var i = startIndex; i < endIndex; i++)
 | |
|                 {
 | |
|                     if (array[i].CompareTo(pivotValue) > 0)
 | |
|                         continue;
 | |
| 
 | |
|                     // Value stored to i was smaller than or equal with pivot value - let's move it to start
 | |
|                     Swap(array, i, startIndex);
 | |
|                     // Move start one index forward 
 | |
|                     startIndex++;
 | |
|                 }
 | |
|                 // Start index is now pointing to index where we should store our pivot value from end of array
 | |
|                 Swap(array, endIndex, startIndex);
 | |
|                 return startIndex;
 | |
|             }
 | |
| 
 | |
|             private static void Swap<T>(List<T> array, int index1, int index2)
 | |
|             {
 | |
|                 if (index1 == index2)
 | |
|                     return;
 | |
| 
 | |
|                 var temp = array[index1];
 | |
|                 array[index1] = array[index2];
 | |
|                 array[index2] = temp;
 | |
|             }
 | |
|        
 | |
|     }
 | |
| }
 |