
PDSA - Week 6Union-Find data structureImproved Kruskal's algorithm using Union-findPriority QueueHeapImprove Dijkstra's algorithm using min heapHeap SortBinary Search Tree(BST)
A set S partitioned into components {C1, C2, . . . , Ck }
Support the following operations
Visualization
Naïve Implementation of Union-Find
1class MakeUnionFind:2 def __init__(self):3 self.components = {}4 self.size = 05 def make_union_find(self,vertices):6 self.size = vertices7 for vertex in range(vertices):8 self.components[vertex] = vertex9 def find(self,vertex):10 return self.components[vertex]11 def union(self,u,v):12 c_old = self.components[u]13 c_new = self.components[v]14 for k in range(self.size):15 if self.components[k] == c_old:16 self.components[k] = c_newComplexity
Improved Implementation of Union-Find
xxxxxxxxxx261class MakeUnionFind:2 def __init__(self):3 self.components = {}4 self.members = {}5 self.size = {}6 def make_union_find(self,vertices):7 for vertex in range(vertices):8 self.components[vertex] = vertex9 self.members[vertex] = [vertex]10 self.size[vertex] = 111 def find(self,vertex):12 return self.components[vertex]13 def union(self,u,v):14 c_old = self.components[u]15 c_new = self.components[v]16 # Always add member in components which have greater size17 if self.size[c_new] >= self.size[c_old]: 18 for x in self.members[c_old]:19 self.components[x] = c_new20 self.members[c_new].append(x)21 self.size[c_new] += 122 else:23 for x in self.members[c_new]:24 self.components[x] = c_old25 self.members[c_old].append(x)26 self.size[c_old] += 1Complexity
x
1class MakeUnionFind:2 def __init__(self):3 self.components = {}4 self.members = {}5 self.size = {}6 def make_union_find(self,vertices):7 for vertex in range(vertices):8 self.components[vertex] = vertex9 self.members[vertex] = [vertex]10 self.size[vertex] = 111 def find(self,vertex):12 return self.components[vertex]13 def union(self,u,v):14 c_old = self.components[u]15 c_new = self.components[v]16 # Always add member in components which have greater size17 if self.size[c_new] >= self.size[c_old]: 18 for x in self.members[c_old]:19 self.components[x] = c_new20 self.members[c_new].append(x)21 self.size[c_new] += 122 else:23 for x in self.members[c_new]:24 self.components[x] = c_old25 self.members[c_old].append(x)26 self.size[c_old] += 127
28
29def kruskal(WList):30 (edges,TE) = ([],[])31 for u in WList.keys():32 edges.extend([(d,u,v) for (v,d) in WList[u]])33 edges.sort()34 mf = MakeUnionFind()35 mf.make_union_find(len(WList))36 for (d,u,v) in edges:37 if mf.components[u] != mf.components[v]:38 mf.union(u,v)39 TE.append((u,v,d))40 # We can stop the process if the size becomes equal to the total number of vertices41 # Which represent that a spanning tree is completed42 if mf.size[mf.components[u]]>= mf.size[mf.components[v]]:43 if mf.size[mf.components[u]] == len(WList):44 break45 else:46 if mf.size[mf.components[v]] == len(WList):47 break48 return(TE)49
50# Testcase51
52edge = [(0,1,10),(0,2,18),(0,3,6),(0,4,20),(0,5,13),(1,2,10),(1,3,10),(1,4,5),(1,5,7),(2,3,2),(2,4,14),(2,5,15),(3,4,17),(3,5,12),(4,5,10)]53
54size = 655WL = {}56for i in range(size):57 WL[i] = []58for (i,j,d) in edge:59 WL[i].append((j,d))60print(kruskal(WL))Output
xxxxxxxxxx11[(2, 3, 2), (1, 4, 5), (0, 3, 6), (1, 5, 7), (0, 1, 10)]Complexity
Tree has
Sorting E takes
Overall time,
Need to maintain a collection of items with priorities to optimize the following operations
delete max()
insert()
Binary tree
A binary tree is a tree data structure in which each node can contain at most 2 children, which are referred to as the left child and the right child.
Heap
Heap is a binary tree, filled level by level, left to right. There are two types of the heap:
We can represent heap using array(list in python)

H = [h0, h1, h2, h3, h4, h5, h6, h7, h8, h9]
left child of H[i] = H[2 * i + 1]
Right child of H[i] = H[2 * i + 2]
Parent of H[i] = H[(i-1) // 2], for i > 0
Visualization
Implementation of Maxheap
xxxxxxxxxx541class maxheap:2 def __init__(self):3 self.A = []4 def max_heapify(self,k):5 l = 2 * k + 16 r = 2 * k + 27 largest = k8 if l < len(self.A) and self.A[l] > self.A[largest]:9 largest = l10 if r < len(self.A) and self.A[r] > self.A[largest]:11 largest = r12 if largest != k:13 self.A[k], self.A[largest] = self.A[largest], self.A[k]14 self.max_heapify(largest)15
16 def build_max_heap(self,L):17 self.A = []18 for i in L:19 self.A.append(i)20 n = int((len(self.A)//2)-1)21 for k in range(n, -1, -1):22 self.max_heapify(k)23
24 25 def delete_max(self):26 item = None27 if self.A != []:28 self.A[0],self.A[-1] = self.A[-1],self.A[0]29 item = self.A.pop()30 self.max_heapify(0)31 return item32
33
34 def insert_in_maxheap(self,d):35 self.A.append(d)36 index = len(self.A)-137 while index > 0:38 parent = (index-1)//239 if self.A[index] > self.A[parent]:40 self.A[index],self.A[parent] = self.A[parent],self.A[index]41 index = parent42 else:43 break44
45heap = maxheap()46heap.build_max_heap([1,2,3,4,5,6])47print(heap.A)48heap.insert_in_maxheap(7)49print(heap.A)50heap.insert_in_maxheap(8)51print(heap.A)52print(heap.delete_max())53print(heap.delete_max())54print(heap.A)Output
xxxxxxxxxx61[6, 5, 3, 4, 2, 1]2[7, 5, 6, 4, 2, 1, 3]3[8, 7, 6, 5, 2, 1, 3, 4]48576[6, 5, 3, 4, 2, 1]
Implementation of Minheap
xxxxxxxxxx541class minheap:2 def __init__(self):3 self.A = []4 def min_heapify(self,k):5 l = 2 * k + 16 r = 2 * k + 27 smallest = k8 if l < len(self.A) and self.A[l] < self.A[smallest]:9 smallest = l10 if r < len(self.A) and self.A[r] < self.A[smallest]:11 smallest = r12 if smallest != k:13 self.A[k], self.A[smallest] = self.A[smallest], self.A[k]14 self.min_heapify(smallest)15
16 def build_min_heap(self,L):17 self.A = []18 for i in L:19 self.A.append(i)20 n = int((len(self.A)//2)-1)21 for k in range(n, -1, -1):22 self.min_heapify(k)23
24 25 def delete_min(self):26 item = None27 if self.A != []:28 self.A[0],self.A[-1] = self.A[-1],self.A[0]29 item = self.A.pop()30 self.min_heapify(0)31 return item32
33
34 def insert_in_minheap(self,d):35 self.A.append(d)36 index = len(self.A)-137 while index > 0:38 parent = (index-1)//239 if self.A[index] < self.A[parent]:40 self.A[index],self.A[parent] = self.A[parent],self.A[index]41 index = parent42 else:43 break44
45heap = minheap()46heap.build_min_heap([6,5,4,3,2])47print(heap.A)48heap.insert_in_minheap(1)49print(heap.A)50heap.insert_in_minheap(8)51print(heap.A)52print(heap.delete_min())53print(heap.delete_min())54print(heap.A)Output
xxxxxxxxxx61[2, 3, 4, 6, 5]2[1, 3, 2, 6, 5, 4]3[1, 3, 2, 6, 5, 4, 8]41526[3, 5, 4, 6, 8]
Complexity
Heaps are a tree implementation of priority queues
Old implementation for adjacency matrix
xxxxxxxxxx371def dijkstra(WMat,s):2 (rows,cols,x) = WMat.shape3 infinity = np.max(WMat)*rows+14 (visited,distance) = ({},{})5 for v in range(rows):6 (visited[v],distance[v]) = (False,infinity)7 8 distance[s] = 09 10 for u in range(rows):11 nextd = min([distance[v] for v in range(rows)12 if not visited[v]])13 nextvlist = [v for v in range(rows)14 if (not visited[v]) and15 distance[v] == nextd]16 if nextvlist == []:17 break18 nextv = min(nextvlist)19 20 visited[nextv] = True 21 for v in range(cols):22 if WMat[nextv,v,0] == 1 and (not visited[v]):23 distance[v] = min(distance[v],distance[nextv]24 +WMat[nextv,v,1])25 return(distance)26
27
28dedges = [(0,1,10),(0,2,80),(1,2,6),(1,4,20),(2,3,70),(4,5,50),(4,6,5),(5,6,10)]29#edges = dedges + [(j,i,w) for (i,j,w) in dedges]30size = 731import numpy as np32W = np.zeros(shape=(size,size,2))33for (i,j,w) in dedges:34 W[i,j,0] = 135 W[i,j,1] = w36s = 037print(dijkstra(W,s))Output
xxxxxxxxxx11{0: 0, 1: 10.0, 2: 16.0, 3: 86.0, 4: 30.0, 5: 80.0, 6: 35.0}
Updated Implementation for adjacency matrix using min heap
xxxxxxxxxx791# considering dictionary as a heap for given code2def min_heapify(i,size):3 lchild = 2*i + 14 rchild = 2*i + 25 small = i6 if lchild < size-1 and HtoV[lchild][1] < HtoV[small][1]: 7 small = lchild8 if rchild < size-1 and HtoV[rchild][1] < HtoV[small][1]: 9 small = rchild10 if small != i:11 VtoH[HtoV[small][0]] = i12 VtoH[HtoV[i][0]] = small13 (HtoV[small],HtoV[i]) = (HtoV[i], HtoV[small])14 min_heapify(small,size)15
16def create_minheap(size):17 for x in range((size//2)-1,-1,-1):18 min_heapify(x,size)19
20def minheap_update(i,size):21 if i!= 0:22 while i > 0:23 parent = (i-1)//224 if HtoV[parent][1] > HtoV[i][1]:25 VtoH[HtoV[parent][0]] = i26 VtoH[HtoV[i][0]] = parent27 (HtoV[parent],HtoV[i]) = (HtoV[i], HtoV[parent])28 else:29 break30 i = parent31
32def delete_min(hsize):33 VtoH[HtoV[0][0]] = hsize-134 VtoH[HtoV[hsize-1][0]] = 035 HtoV[hsize-1],HtoV[0] = HtoV[0],HtoV[hsize-1] 36 node,dist = HtoV[hsize-1]37 hsize = hsize - 138 min_heapify(0,hsize) 39 return node,dist,hsize40
41#global HtoV map heap index to (vertex,distance from source)42#global VtoH map vertex to heap index43HtoV, VtoH = {},{}44def dijkstra(WMat,s):45 (rows,cols,x) = WMat.shape46 infinity = float('inf')47 visited = {}48 heapsize = rows49 for v in range(rows):50 VtoH[v]=v51 HtoV[v]=[v,infinity]52 visited[v] = False53 HtoV[s]= [s,0]54 create_minheap(heapsize)55 56 for u in range(rows):57 nextd,ds,heapsize = delete_min(heapsize) 58 visited[nextd] = True 59 for v in range(cols):60 if WMat[nextd,v,0] == 1 and (not visited[v]):61 # update distance of adjacent of v if it is less than to previous one62 HtoV[VtoH[v]][1] = min(HtoV[VtoH[v]][1],ds+WMat[nextd,v,1])63 minheap_update(VtoH[v],heapsize)64
65
66dedges = [(0,1,10),(0,2,80),(1,2,6),(1,4,20),(2,3,70),(4,5,50),(4,6,5),(5,6,10)]67#edges = dedges + [(j,i,w) for (i,j,w) in dedges]68size = 769import numpy as np70W = np.zeros(shape=(size,size,2))71for (i,j,w) in dedges:72 W[i,j,0] = 173 W[i,j,1] = w74s = 075dijkstra(W,s)76#print(HtoV)77#print(VtoH)78for i in range(size):79 print('Shortest distance from {0} to {1} = {2}'.format(s,i,HtoV[VtoH[i]][1]))Output
xxxxxxxxxx71Shortest distance from 0 to 0 = 02Shortest distance from 0 to 1 = 10.03Shortest distance from 0 to 2 = 16.04Shortest distance from 0 to 3 = 86.05Shortest distance from 0 to 4 = 30.06Shortest distance from 0 to 5 = 80.07Shortest distance from 0 to 6 = 35.0
Updated Implementation for adjacency list using min heap
xxxxxxxxxx781def min_heapify(i,size):2 lchild = 2*i + 13 rchild = 2*i + 24 small = i5 if lchild < size-1 and HtoV[lchild][1] < HtoV[small][1]: 6 small = lchild7 if rchild < size-1 and HtoV[rchild][1] < HtoV[small][1]: 8 small = rchild9 if small != i:10 VtoH[HtoV[small][0]] = i11 VtoH[HtoV[i][0]] = small12 (HtoV[small],HtoV[i]) = (HtoV[i], HtoV[small])13 min_heapify(small,size)14
15def create_minheap(size):16 for x in range((size//2)-1,-1,-1):17 min_heapify(x,size)18
19def minheap_update(i,size):20 if i!= 0:21 while i > 0:22 parent = (i-1)//223 if HtoV[parent][1] > HtoV[i][1]:24 VtoH[HtoV[parent][0]] = i25 VtoH[HtoV[i][0]] = parent26 (HtoV[parent],HtoV[i]) = (HtoV[i], HtoV[parent])27 else:28 break29 i = parent30
31def delete_min(hsize):32 VtoH[HtoV[0][0]] = hsize-133 VtoH[HtoV[hsize-1][0]] = 034 HtoV[hsize-1],HtoV[0] = HtoV[0],HtoV[hsize-1] 35 node,dist = HtoV[hsize-1]36 hsize = hsize - 137 min_heapify(0,hsize) 38 return node,dist,hsize39
40
41HtoV, VtoH = {},{}42#global HtoV map heap index to (vertex,distance from source)43#global VtoH map vertex to heap index44def dijkstralist(WList,s):45 infinity = float('inf')46 visited = {}47 heapsize = len(WList)48 for v in WList.keys():49 VtoH[v]=v50 HtoV[v]=[v,infinity]51 visited[v] = False52 HtoV[s]= [s,0]53 create_minheap(heapsize)54 55 for u in WList.keys():56 nextd,ds,heapsize = delete_min(heapsize) 57 visited[nextd] = True 58 for v,d in WList[nextd]:59 if not visited[v]:60 HtoV[VtoH[v]][1] = min(HtoV[VtoH[v]][1],ds+d)61 minheap_update(VtoH[v],heapsize)62
63
64dedges = [(0,1,10),(0,2,80),(1,2,6),(1,4,20),(2,3,70),(4,5,50),(4,6,5),(5,6,10)]65#edges = dedges + [(j,i,w) for (i,j,w) in dedges]66size = 767WL = {}68for i in range(size):69 WL[i] = []70for (i,j,d) in dedges:71 WL[i].append((j,d))72s = 073dijkstralist(WL,s)74#print(HtoV)75#print(VtoH)76for i in range(size):77 print('Shortest distance from {0} to {1} = {2}'.format(s,i,HtoV[VtoH[i]][1]))78
Output
xxxxxxxxxx71Shortest distance from 0 to 0 = 02Shortest distance from 0 to 1 = 103Shortest distance from 0 to 2 = 164Shortest distance from 0 to 3 = 865Shortest distance from 0 to 4 = 306Shortest distance from 0 to 5 = 807Shortest distance from 0 to 6 = 35Complexity
Using min-heaps:-
Cumulatively:-
Implementation
xxxxxxxxxx281def max_heapify(A,size,k):2 l = 2 * k + 13 r = 2 * k + 24 largest = k5 if l < size and A[l] > A[largest]:6 largest = l7 if r < size and A[r] > A[largest]:8 largest = r9 if largest != k:10 (A[k], A[largest]) = (A[largest], A[k])11 max_heapify(A,size,largest)12
13def build_max_heap(A):14 n = (len(A)//2)-115 for i in range(n, -1, -1):16 max_heapify(A,len(A),i)17
18def heapsort(A):19 build_max_heap(A)20 n = len(A)21 for i in range(n-1,-1,-1):22 A[0],A[i] = A[i],A[0]23 max_heapify(A,i,0)24 25
26A = [8,6,9,3,4,5,61,6666]27heapsort(A)28print(A)Output
xxxxxxxxxx11[3, 4, 5, 6, 8, 9, 61, 6666]
Complexity
For dynamic stored data
How can we improve Insert/delete time? - using tree structure?
A binary search tree is a binary tree that is either empty or satisfies the following conditions:
For each node V in the Tree
Visualization
Implementation
xxxxxxxxxx1151class Tree:2# Constructor:3 def __init__(self,initval=None):4 self.value = initval5 if self.value:6 self.left = Tree()7 self.right = Tree()8 else:9 self.left = None10 self.right = None11 return12 # Only empty node has value None13 def isempty(self):14 return (self.value == None)15 # Leaf nodes have both children empty16 def isleaf(self):17 return (self.value != None and self.left.isempty() and self.right.isempty())18 # Inorder traversal19 def inorder(self):20 if self.isempty():21 return([])22 else:23 return(self.left.inorder()+[self.value]+self.right.inorder())24 # Display Tree as a string25 def __str__(self):26 return(str(self.inorder()))27 # Check if value v occurs in tree28 def find(self,v):29 if self.isempty():30 return(False)31 if self.value == v:32 return(True)33 if v < self.value:34 return(self.left.find(v))35 if v > self.value:36 return(self.right.find(v))37 # return minimum value for tree rooted on self - Minimum is left most node in the tree38 def minval(self):39 if self.left.isempty():40 return(self.value)41 else:42 return(self.left.minval())43 # return max value for tree rooted on self - Maximum is right most node in the tree44 def maxval(self):45 if self.right.isempty():46 return(self.value)47 else:48 return(self.right.maxval())49 # insert new element in binary search tree50 def insert(self,v):51 if self.isempty():52 self.value = v53 self.left = Tree()54 self.right = Tree()55 if self.value == v:56 return57 if v < self.value:58 self.left.insert(v)59 return60 if v > self.value:61 self.right.insert(v)62 return63 # delete element from binary search tree64 def delete(self,v):65 if self.isempty():66 return67 if v < self.value:68 self.left.delete(v)69 return70 if v > self.value:71 self.right.delete(v)72 return73 if v == self.value:74 if self.isleaf():75 self.makeempty()76 elif self.left.isempty():77 self.copyright()78 elif self.right.isempty():79 self.copyleft()80 else:81 self.value = self.left.maxval()82 self.left.delete(self.left.maxval())83 return84 # Convert leaf node to empty node85 def makeempty(self):86 self.value = None87 self.left = None88 self.right = None89 return90 # Promote left child91 def copyleft(self):92 self.value = self.left.value93 self.right = self.left.right94 self.left = self.left.left95 return96 # Promote right child97 def copyright(self):98 self.value = self.right.value99 self.left = self.right.left100 self.right = self.right.right101 return102
103 104
105T = Tree()106bst = [9,8,7,6,5,4,3,2,1]107k = 4108for i in bst:109 T.insert(i)110print('Element in BST are:= ',T.inorder())111print('Maximum element in BST are:= ',T.maxval())112print('Minimum element in BST are:= ',T.minval())113print(k,'is present or not = ',T.find(k))114T.delete(3)115print('Element in BST after delete 3:= ',T.inorder())Output
xxxxxxxxxx51Element in BST are:= [1, 2, 3, 4, 5, 6, 7, 8, 9]2Maximum element in BST are:= 93Minimum element in BST are:= 144 is present or not = True5Element in BST after delete 3:= [1, 2, 4, 5, 6, 7, 8, 9]
Complexity