PDSA - Week 6Union-Find data structureImproved Kruskal's algorithm using Union-findPriority QueueHeapImprove Dijkstra's algorithm using min heapHeap SortBinary Search Tree(BST)
A set S partitioned into components {C1, C2, . . . , Ck }
Support the following operations
Visualization
Naïve Implementation of Union-Find
1class MakeUnionFind:
2 def __init__(self):
3 self.components = {}
4 self.size = 0
5 def make_union_find(self,vertices):
6 self.size = vertices
7 for vertex in range(vertices):
8 self.components[vertex] = vertex
9 def find(self,vertex):
10 return self.components[vertex]
11 def union(self,u,v):
12 c_old = self.components[u]
13 c_new = self.components[v]
14 for k in range(self.size):
15 if self.components[k] == c_old:
16 self.components[k] = c_new
Complexity
Improved Implementation of Union-Find
xxxxxxxxxx
261class MakeUnionFind:
2 def __init__(self):
3 self.components = {}
4 self.members = {}
5 self.size = {}
6 def make_union_find(self,vertices):
7 for vertex in range(vertices):
8 self.components[vertex] = vertex
9 self.members[vertex] = [vertex]
10 self.size[vertex] = 1
11 def find(self,vertex):
12 return self.components[vertex]
13 def union(self,u,v):
14 c_old = self.components[u]
15 c_new = self.components[v]
16 # Always add member in components which have greater size
17 if self.size[c_new] >= self.size[c_old]:
18 for x in self.members[c_old]:
19 self.components[x] = c_new
20 self.members[c_new].append(x)
21 self.size[c_new] += 1
22 else:
23 for x in self.members[c_new]:
24 self.components[x] = c_old
25 self.members[c_old].append(x)
26 self.size[c_old] += 1
Complexity
x
1class MakeUnionFind:
2 def __init__(self):
3 self.components = {}
4 self.members = {}
5 self.size = {}
6 def make_union_find(self,vertices):
7 for vertex in range(vertices):
8 self.components[vertex] = vertex
9 self.members[vertex] = [vertex]
10 self.size[vertex] = 1
11 def find(self,vertex):
12 return self.components[vertex]
13 def union(self,u,v):
14 c_old = self.components[u]
15 c_new = self.components[v]
16 # Always add member in components which have greater size
17 if self.size[c_new] >= self.size[c_old]:
18 for x in self.members[c_old]:
19 self.components[x] = c_new
20 self.members[c_new].append(x)
21 self.size[c_new] += 1
22 else:
23 for x in self.members[c_new]:
24 self.components[x] = c_old
25 self.members[c_old].append(x)
26 self.size[c_old] += 1
27
28
29def kruskal(WList):
30 (edges,TE) = ([],[])
31 for u in WList.keys():
32 edges.extend([(d,u,v) for (v,d) in WList[u]])
33 edges.sort()
34 mf = MakeUnionFind()
35 mf.make_union_find(len(WList))
36 for (d,u,v) in edges:
37 if mf.components[u] != mf.components[v]:
38 mf.union(u,v)
39 TE.append((u,v,d))
40 # We can stop the process if the size becomes equal to the total number of vertices
41 # Which represent that a spanning tree is completed
42 if mf.size[mf.components[u]]>= mf.size[mf.components[v]]:
43 if mf.size[mf.components[u]] == len(WList):
44 break
45 else:
46 if mf.size[mf.components[v]] == len(WList):
47 break
48 return(TE)
49
50# Testcase
51
52edge = [(0,1,10),(0,2,18),(0,3,6),(0,4,20),(0,5,13),(1,2,10),(1,3,10),(1,4,5),(1,5,7),(2,3,2),(2,4,14),(2,5,15),(3,4,17),(3,5,12),(4,5,10)]
53
54size = 6
55WL = {}
56for i in range(size):
57 WL[i] = []
58for (i,j,d) in edge:
59 WL[i].append((j,d))
60print(kruskal(WL))
Output
xxxxxxxxxx
11[(2, 3, 2), (1, 4, 5), (0, 3, 6), (1, 5, 7), (0, 1, 10)]
Complexity
Tree has
Sorting E takes
Overall time,
Need to maintain a collection of items with priorities to optimize the following operations
delete max()
insert()
Binary tree
A binary tree is a tree data structure in which each node can contain at most 2 children, which are referred to as the left child and the right child.
Heap
Heap is a binary tree, filled level by level, left to right. There are two types of the heap:
We can represent heap using array(list in python)
H = [h0, h1, h2, h3, h4, h5, h6, h7, h8, h9]
left child of H[i] = H[2 * i + 1]
Right child of H[i] = H[2 * i + 2]
Parent of H[i] = H[(i-1) // 2], for i > 0
Visualization
Implementation of Maxheap
xxxxxxxxxx
541class maxheap:
2 def __init__(self):
3 self.A = []
4 def max_heapify(self,k):
5 l = 2 * k + 1
6 r = 2 * k + 2
7 largest = k
8 if l < len(self.A) and self.A[l] > self.A[largest]:
9 largest = l
10 if r < len(self.A) and self.A[r] > self.A[largest]:
11 largest = r
12 if largest != k:
13 self.A[k], self.A[largest] = self.A[largest], self.A[k]
14 self.max_heapify(largest)
15
16 def build_max_heap(self,L):
17 self.A = []
18 for i in L:
19 self.A.append(i)
20 n = int((len(self.A)//2)-1)
21 for k in range(n, -1, -1):
22 self.max_heapify(k)
23
24
25 def delete_max(self):
26 item = None
27 if self.A != []:
28 self.A[0],self.A[-1] = self.A[-1],self.A[0]
29 item = self.A.pop()
30 self.max_heapify(0)
31 return item
32
33
34 def insert_in_maxheap(self,d):
35 self.A.append(d)
36 index = len(self.A)-1
37 while index > 0:
38 parent = (index-1)//2
39 if self.A[index] > self.A[parent]:
40 self.A[index],self.A[parent] = self.A[parent],self.A[index]
41 index = parent
42 else:
43 break
44
45heap = maxheap()
46heap.build_max_heap([1,2,3,4,5,6])
47print(heap.A)
48heap.insert_in_maxheap(7)
49print(heap.A)
50heap.insert_in_maxheap(8)
51print(heap.A)
52print(heap.delete_max())
53print(heap.delete_max())
54print(heap.A)
Output
xxxxxxxxxx
61[6, 5, 3, 4, 2, 1]
2[7, 5, 6, 4, 2, 1, 3]
3[8, 7, 6, 5, 2, 1, 3, 4]
48
57
6[6, 5, 3, 4, 2, 1]
Implementation of Minheap
xxxxxxxxxx
541class minheap:
2 def __init__(self):
3 self.A = []
4 def min_heapify(self,k):
5 l = 2 * k + 1
6 r = 2 * k + 2
7 smallest = k
8 if l < len(self.A) and self.A[l] < self.A[smallest]:
9 smallest = l
10 if r < len(self.A) and self.A[r] < self.A[smallest]:
11 smallest = r
12 if smallest != k:
13 self.A[k], self.A[smallest] = self.A[smallest], self.A[k]
14 self.min_heapify(smallest)
15
16 def build_min_heap(self,L):
17 self.A = []
18 for i in L:
19 self.A.append(i)
20 n = int((len(self.A)//2)-1)
21 for k in range(n, -1, -1):
22 self.min_heapify(k)
23
24
25 def delete_min(self):
26 item = None
27 if self.A != []:
28 self.A[0],self.A[-1] = self.A[-1],self.A[0]
29 item = self.A.pop()
30 self.min_heapify(0)
31 return item
32
33
34 def insert_in_minheap(self,d):
35 self.A.append(d)
36 index = len(self.A)-1
37 while index > 0:
38 parent = (index-1)//2
39 if self.A[index] < self.A[parent]:
40 self.A[index],self.A[parent] = self.A[parent],self.A[index]
41 index = parent
42 else:
43 break
44
45heap = minheap()
46heap.build_min_heap([6,5,4,3,2])
47print(heap.A)
48heap.insert_in_minheap(1)
49print(heap.A)
50heap.insert_in_minheap(8)
51print(heap.A)
52print(heap.delete_min())
53print(heap.delete_min())
54print(heap.A)
Output
xxxxxxxxxx
61[2, 3, 4, 6, 5]
2[1, 3, 2, 6, 5, 4]
3[1, 3, 2, 6, 5, 4, 8]
41
52
6[3, 5, 4, 6, 8]
Complexity
Heaps are a tree implementation of priority queues
Old implementation for adjacency matrix
xxxxxxxxxx
371def dijkstra(WMat,s):
2 (rows,cols,x) = WMat.shape
3 infinity = np.max(WMat)*rows+1
4 (visited,distance) = ({},{})
5 for v in range(rows):
6 (visited[v],distance[v]) = (False,infinity)
7
8 distance[s] = 0
9
10 for u in range(rows):
11 nextd = min([distance[v] for v in range(rows)
12 if not visited[v]])
13 nextvlist = [v for v in range(rows)
14 if (not visited[v]) and
15 distance[v] == nextd]
16 if nextvlist == []:
17 break
18 nextv = min(nextvlist)
19
20 visited[nextv] = True
21 for v in range(cols):
22 if WMat[nextv,v,0] == 1 and (not visited[v]):
23 distance[v] = min(distance[v],distance[nextv]
24 +WMat[nextv,v,1])
25 return(distance)
26
27
28dedges = [(0,1,10),(0,2,80),(1,2,6),(1,4,20),(2,3,70),(4,5,50),(4,6,5),(5,6,10)]
29#edges = dedges + [(j,i,w) for (i,j,w) in dedges]
30size = 7
31import numpy as np
32W = np.zeros(shape=(size,size,2))
33for (i,j,w) in dedges:
34 W[i,j,0] = 1
35 W[i,j,1] = w
36s = 0
37print(dijkstra(W,s))
Output
xxxxxxxxxx
11{0: 0, 1: 10.0, 2: 16.0, 3: 86.0, 4: 30.0, 5: 80.0, 6: 35.0}
Updated Implementation for adjacency matrix using min heap
xxxxxxxxxx
791# considering dictionary as a heap for given code
2def min_heapify(i,size):
3 lchild = 2*i + 1
4 rchild = 2*i + 2
5 small = i
6 if lchild < size-1 and HtoV[lchild][1] < HtoV[small][1]:
7 small = lchild
8 if rchild < size-1 and HtoV[rchild][1] < HtoV[small][1]:
9 small = rchild
10 if small != i:
11 VtoH[HtoV[small][0]] = i
12 VtoH[HtoV[i][0]] = small
13 (HtoV[small],HtoV[i]) = (HtoV[i], HtoV[small])
14 min_heapify(small,size)
15
16def create_minheap(size):
17 for x in range((size//2)-1,-1,-1):
18 min_heapify(x,size)
19
20def minheap_update(i,size):
21 if i!= 0:
22 while i > 0:
23 parent = (i-1)//2
24 if HtoV[parent][1] > HtoV[i][1]:
25 VtoH[HtoV[parent][0]] = i
26 VtoH[HtoV[i][0]] = parent
27 (HtoV[parent],HtoV[i]) = (HtoV[i], HtoV[parent])
28 else:
29 break
30 i = parent
31
32def delete_min(hsize):
33 VtoH[HtoV[0][0]] = hsize-1
34 VtoH[HtoV[hsize-1][0]] = 0
35 HtoV[hsize-1],HtoV[0] = HtoV[0],HtoV[hsize-1]
36 node,dist = HtoV[hsize-1]
37 hsize = hsize - 1
38 min_heapify(0,hsize)
39 return node,dist,hsize
40
41#global HtoV map heap index to (vertex,distance from source)
42#global VtoH map vertex to heap index
43HtoV, VtoH = {},{}
44def dijkstra(WMat,s):
45 (rows,cols,x) = WMat.shape
46 infinity = float('inf')
47 visited = {}
48 heapsize = rows
49 for v in range(rows):
50 VtoH[v]=v
51 HtoV[v]=[v,infinity]
52 visited[v] = False
53 HtoV[s]= [s,0]
54 create_minheap(heapsize)
55
56 for u in range(rows):
57 nextd,ds,heapsize = delete_min(heapsize)
58 visited[nextd] = True
59 for v in range(cols):
60 if WMat[nextd,v,0] == 1 and (not visited[v]):
61 # update distance of adjacent of v if it is less than to previous one
62 HtoV[VtoH[v]][1] = min(HtoV[VtoH[v]][1],ds+WMat[nextd,v,1])
63 minheap_update(VtoH[v],heapsize)
64
65
66dedges = [(0,1,10),(0,2,80),(1,2,6),(1,4,20),(2,3,70),(4,5,50),(4,6,5),(5,6,10)]
67#edges = dedges + [(j,i,w) for (i,j,w) in dedges]
68size = 7
69import numpy as np
70W = np.zeros(shape=(size,size,2))
71for (i,j,w) in dedges:
72 W[i,j,0] = 1
73 W[i,j,1] = w
74s = 0
75dijkstra(W,s)
76#print(HtoV)
77#print(VtoH)
78for i in range(size):
79 print('Shortest distance from {0} to {1} = {2}'.format(s,i,HtoV[VtoH[i]][1]))
Output
xxxxxxxxxx
71Shortest distance from 0 to 0 = 0
2Shortest distance from 0 to 1 = 10.0
3Shortest distance from 0 to 2 = 16.0
4Shortest distance from 0 to 3 = 86.0
5Shortest distance from 0 to 4 = 30.0
6Shortest distance from 0 to 5 = 80.0
7Shortest distance from 0 to 6 = 35.0
Updated Implementation for adjacency list using min heap
xxxxxxxxxx
781def min_heapify(i,size):
2 lchild = 2*i + 1
3 rchild = 2*i + 2
4 small = i
5 if lchild < size-1 and HtoV[lchild][1] < HtoV[small][1]:
6 small = lchild
7 if rchild < size-1 and HtoV[rchild][1] < HtoV[small][1]:
8 small = rchild
9 if small != i:
10 VtoH[HtoV[small][0]] = i
11 VtoH[HtoV[i][0]] = small
12 (HtoV[small],HtoV[i]) = (HtoV[i], HtoV[small])
13 min_heapify(small,size)
14
15def create_minheap(size):
16 for x in range((size//2)-1,-1,-1):
17 min_heapify(x,size)
18
19def minheap_update(i,size):
20 if i!= 0:
21 while i > 0:
22 parent = (i-1)//2
23 if HtoV[parent][1] > HtoV[i][1]:
24 VtoH[HtoV[parent][0]] = i
25 VtoH[HtoV[i][0]] = parent
26 (HtoV[parent],HtoV[i]) = (HtoV[i], HtoV[parent])
27 else:
28 break
29 i = parent
30
31def delete_min(hsize):
32 VtoH[HtoV[0][0]] = hsize-1
33 VtoH[HtoV[hsize-1][0]] = 0
34 HtoV[hsize-1],HtoV[0] = HtoV[0],HtoV[hsize-1]
35 node,dist = HtoV[hsize-1]
36 hsize = hsize - 1
37 min_heapify(0,hsize)
38 return node,dist,hsize
39
40
41HtoV, VtoH = {},{}
42#global HtoV map heap index to (vertex,distance from source)
43#global VtoH map vertex to heap index
44def dijkstralist(WList,s):
45 infinity = float('inf')
46 visited = {}
47 heapsize = len(WList)
48 for v in WList.keys():
49 VtoH[v]=v
50 HtoV[v]=[v,infinity]
51 visited[v] = False
52 HtoV[s]= [s,0]
53 create_minheap(heapsize)
54
55 for u in WList.keys():
56 nextd,ds,heapsize = delete_min(heapsize)
57 visited[nextd] = True
58 for v,d in WList[nextd]:
59 if not visited[v]:
60 HtoV[VtoH[v]][1] = min(HtoV[VtoH[v]][1],ds+d)
61 minheap_update(VtoH[v],heapsize)
62
63
64dedges = [(0,1,10),(0,2,80),(1,2,6),(1,4,20),(2,3,70),(4,5,50),(4,6,5),(5,6,10)]
65#edges = dedges + [(j,i,w) for (i,j,w) in dedges]
66size = 7
67WL = {}
68for i in range(size):
69 WL[i] = []
70for (i,j,d) in dedges:
71 WL[i].append((j,d))
72s = 0
73dijkstralist(WL,s)
74#print(HtoV)
75#print(VtoH)
76for i in range(size):
77 print('Shortest distance from {0} to {1} = {2}'.format(s,i,HtoV[VtoH[i]][1]))
78
Output
xxxxxxxxxx
71Shortest distance from 0 to 0 = 0
2Shortest distance from 0 to 1 = 10
3Shortest distance from 0 to 2 = 16
4Shortest distance from 0 to 3 = 86
5Shortest distance from 0 to 4 = 30
6Shortest distance from 0 to 5 = 80
7Shortest distance from 0 to 6 = 35
Complexity
Using min-heaps:-
Cumulatively:-
Implementation
xxxxxxxxxx
281def max_heapify(A,size,k):
2 l = 2 * k + 1
3 r = 2 * k + 2
4 largest = k
5 if l < size and A[l] > A[largest]:
6 largest = l
7 if r < size and A[r] > A[largest]:
8 largest = r
9 if largest != k:
10 (A[k], A[largest]) = (A[largest], A[k])
11 max_heapify(A,size,largest)
12
13def build_max_heap(A):
14 n = (len(A)//2)-1
15 for i in range(n, -1, -1):
16 max_heapify(A,len(A),i)
17
18def heapsort(A):
19 build_max_heap(A)
20 n = len(A)
21 for i in range(n-1,-1,-1):
22 A[0],A[i] = A[i],A[0]
23 max_heapify(A,i,0)
24
25
26A = [8,6,9,3,4,5,61,6666]
27heapsort(A)
28print(A)
Output
xxxxxxxxxx
11[3, 4, 5, 6, 8, 9, 61, 6666]
Complexity
For dynamic stored data
How can we improve Insert/delete time? - using tree structure?
A binary search tree is a binary tree that is either empty or satisfies the following conditions:
For each node V in the Tree
Visualization
Implementation
xxxxxxxxxx
1151class Tree:
2# Constructor:
3 def __init__(self,initval=None):
4 self.value = initval
5 if self.value:
6 self.left = Tree()
7 self.right = Tree()
8 else:
9 self.left = None
10 self.right = None
11 return
12 # Only empty node has value None
13 def isempty(self):
14 return (self.value == None)
15 # Leaf nodes have both children empty
16 def isleaf(self):
17 return (self.value != None and self.left.isempty() and self.right.isempty())
18 # Inorder traversal
19 def inorder(self):
20 if self.isempty():
21 return([])
22 else:
23 return(self.left.inorder()+[self.value]+self.right.inorder())
24 # Display Tree as a string
25 def __str__(self):
26 return(str(self.inorder()))
27 # Check if value v occurs in tree
28 def find(self,v):
29 if self.isempty():
30 return(False)
31 if self.value == v:
32 return(True)
33 if v < self.value:
34 return(self.left.find(v))
35 if v > self.value:
36 return(self.right.find(v))
37 # return minimum value for tree rooted on self - Minimum is left most node in the tree
38 def minval(self):
39 if self.left.isempty():
40 return(self.value)
41 else:
42 return(self.left.minval())
43 # return max value for tree rooted on self - Maximum is right most node in the tree
44 def maxval(self):
45 if self.right.isempty():
46 return(self.value)
47 else:
48 return(self.right.maxval())
49 # insert new element in binary search tree
50 def insert(self,v):
51 if self.isempty():
52 self.value = v
53 self.left = Tree()
54 self.right = Tree()
55 if self.value == v:
56 return
57 if v < self.value:
58 self.left.insert(v)
59 return
60 if v > self.value:
61 self.right.insert(v)
62 return
63 # delete element from binary search tree
64 def delete(self,v):
65 if self.isempty():
66 return
67 if v < self.value:
68 self.left.delete(v)
69 return
70 if v > self.value:
71 self.right.delete(v)
72 return
73 if v == self.value:
74 if self.isleaf():
75 self.makeempty()
76 elif self.left.isempty():
77 self.copyright()
78 elif self.right.isempty():
79 self.copyleft()
80 else:
81 self.value = self.left.maxval()
82 self.left.delete(self.left.maxval())
83 return
84 # Convert leaf node to empty node
85 def makeempty(self):
86 self.value = None
87 self.left = None
88 self.right = None
89 return
90 # Promote left child
91 def copyleft(self):
92 self.value = self.left.value
93 self.right = self.left.right
94 self.left = self.left.left
95 return
96 # Promote right child
97 def copyright(self):
98 self.value = self.right.value
99 self.left = self.right.left
100 self.right = self.right.right
101 return
102
103
104
105T = Tree()
106bst = [9,8,7,6,5,4,3,2,1]
107k = 4
108for i in bst:
109 T.insert(i)
110print('Element in BST are:= ',T.inorder())
111print('Maximum element in BST are:= ',T.maxval())
112print('Minimum element in BST are:= ',T.minval())
113print(k,'is present or not = ',T.find(k))
114T.delete(3)
115print('Element in BST after delete 3:= ',T.inorder())
Output
xxxxxxxxxx
51Element in BST are:= [1, 2, 3, 4, 5, 6, 7, 8, 9]
2Maximum element in BST are:= 9
3Minimum element in BST are:= 1
44 is present or not = True
5Element in BST after delete 3:= [1, 2, 4, 5, 6, 7, 8, 9]
Complexity