|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding:UTF-8 |
| 3 | +from ssj.lib import Stack |
| 4 | + |
| 5 | +__author__ = 'shenshijun' |
| 6 | + |
| 7 | + |
| 8 | +class Node(object): |
| 9 | + def __init__(self, is_leaf, keys, childs=None, parent=None): |
| 10 | + """ |
| 11 | + 在真实的实现中应该在加上一个数组表示指向真实数据的指针 |
| 12 | + keys和childs都是一个数组,分别表示关键字和孩子 |
| 13 | + """ |
| 14 | + self.keys = list(sorted(keys)) |
| 15 | + self.is_leaf = is_leaf |
| 16 | + self.__size = len(self.keys) |
| 17 | + if childs is None: |
| 18 | + self.childs = [None for x in xrange(0, self.__size)] |
| 19 | + self.childs.append(None) |
| 20 | + else: |
| 21 | + self.childs = childs |
| 22 | + self.parent = parent |
| 23 | + |
| 24 | + def __str__(self): |
| 25 | + return "".join(['Node(keys=', ",".join(map(lambda key: str(key), self.keys)), |
| 26 | + ',leaf' if self.is_leaf else ',not leaf', |
| 27 | + ',childs num=', str(len(self.childs)), ')\n']) |
| 28 | + |
| 29 | + def __len__(self): |
| 30 | + return self.__size |
| 31 | + |
| 32 | + def append(self, key): |
| 33 | + """ |
| 34 | + 向B树的节点中插入一个关键字,返回这个关键字的下标 |
| 35 | + """ |
| 36 | + result = self.__size |
| 37 | + self.__size += 1 |
| 38 | + for x in xrange(0, result): |
| 39 | + if self.keys[x] > key: |
| 40 | + self.keys.insert(x, key) |
| 41 | + self.childs.insert(x, None) |
| 42 | + return x |
| 43 | + self.keys.append(key) |
| 44 | + self.childs.append(None) |
| 45 | + return result |
| 46 | + |
| 47 | + def search_child(self, instance): |
| 48 | + """ |
| 49 | + 查找小于instance的子树 |
| 50 | + """ |
| 51 | + for x in xrange(0, self.__size): |
| 52 | + if self.keys[x] > instance: |
| 53 | + return self.childs[x] |
| 54 | + return self.childs[self.__size] |
| 55 | + |
| 56 | + |
| 57 | +class BTree(object): |
| 58 | + """ |
| 59 | + B树实现,注意,不是二叉树 |
| 60 | + """ |
| 61 | + |
| 62 | + def __init__(self, load_factor=4, *vargs): |
| 63 | + """Constructor for BTree""" |
| 64 | + self.__root = None |
| 65 | + self.__load_factor = load_factor |
| 66 | + self.__size = len(vargs) |
| 67 | + map(self.insert, vargs) |
| 68 | + |
| 69 | + def insert(self, key): |
| 70 | + """ |
| 71 | + 节点插入的时候不需要再检测节点是不是满了,因为load_factor>=2,每次插入节点前调整都是使得节点关键字个数为load_factor-1。 |
| 72 | + 而插入一个关键字之后节点关键字个数是2*load_factor-1或者load_factor |
| 73 | + :param key: |
| 74 | + :return: |
| 75 | + """ |
| 76 | + if self.__root is None: |
| 77 | + self.__root = Node(True, [key]) |
| 78 | + return |
| 79 | + cur_node = self.__root |
| 80 | + while not cur_node.is_leaf: |
| 81 | + self.__split(cur_node) |
| 82 | + cur_node = cur_node.search_child(key) |
| 83 | + left_node, right_node = self.__split(cur_node) |
| 84 | + if left_node is None or right_node is None: |
| 85 | + # 返回None表示叶节点没有满 |
| 86 | + cur_node.append(key) |
| 87 | + else: |
| 88 | + if left_node.keys[-1] < key: |
| 89 | + # 说明left_node中的所有节点都比key小,所以把新节点插入到右边 |
| 90 | + right_node.append(key) |
| 91 | + else: |
| 92 | + left_node.append(key) |
| 93 | + |
| 94 | + def __split(self, node): |
| 95 | + if self.full(node): |
| 96 | + parent_node = node.parent |
| 97 | + middle_key = node.keys[self.__load_factor - 1] |
| 98 | + if parent_node is None: |
| 99 | + # 处理根节点 |
| 100 | + self.__root = Node(False, []) |
| 101 | + parent_node = self.__root |
| 102 | + parent_middle_index = parent_node.append(middle_key) |
| 103 | + left_node = Node(node.is_leaf, node.keys[:self.__load_factor - 1], node.childs[:self.__load_factor], |
| 104 | + parent_node) |
| 105 | + # 注意设定分裂节点的子节点的父指针 |
| 106 | + for child in left_node.childs: |
| 107 | + if child is not None: |
| 108 | + child.parent = left_node |
| 109 | + right_node = Node(node.is_leaf, node.keys[self.__load_factor:], node.childs[self.__load_factor:], |
| 110 | + parent_node) |
| 111 | + for child in right_node.childs: |
| 112 | + if child is not None: |
| 113 | + child.parent = right_node |
| 114 | + parent_node.childs[parent_middle_index] = left_node |
| 115 | + parent_node.childs[parent_middle_index + 1] = right_node |
| 116 | + self.__root.is_leaf = False |
| 117 | + return left_node, right_node |
| 118 | + return None, None |
| 119 | + |
| 120 | + def search(self, instance): |
| 121 | + return self.__search(self.__root, instance) |
| 122 | + |
| 123 | + def full(self, node): |
| 124 | + return len(node) >= (self.__load_factor * 2 - 1) |
| 125 | + |
| 126 | + @classmethod |
| 127 | + def __search(cls, root, instance): |
| 128 | + cur_node = root |
| 129 | + while True: |
| 130 | + cur_len = len(cur_node) |
| 131 | + x = 0 |
| 132 | + while x < cur_len and cur_node.keys[x] < instance: |
| 133 | + x += 1 |
| 134 | + if cur_node.keys[x] == instance: |
| 135 | + return cur_node, x |
| 136 | + elif cur_node.is_leaf: |
| 137 | + return None, None |
| 138 | + else: |
| 139 | + cur_node = cur_node.childs[x] |
| 140 | + |
| 141 | + def min(self): |
| 142 | + cur_node = self.__root |
| 143 | + while not cur_node.is_leaf: |
| 144 | + cur_node = cur_node.childs[0] |
| 145 | + return cur_node.keys[0] |
| 146 | + |
| 147 | + def max(self): |
| 148 | + cur_node = self.__root |
| 149 | + while not cur_node.is_leaf: |
| 150 | + cur_node = cur_node.childs[-1] |
| 151 | + return cur_node.keys[-1] |
| 152 | + |
| 153 | + def midorder(self, f): |
| 154 | + """ |
| 155 | + B树中序遍历 |
| 156 | + :param f: |
| 157 | + :return: |
| 158 | + """ |
| 159 | + result = [] |
| 160 | + stack = Stack() |
| 161 | + cur_node = self.__root |
| 162 | + if cur_node.is_leaf: |
| 163 | + return map(f, cur_node.keys) |
| 164 | + |
| 165 | + while True: |
| 166 | + if cur_node.is_leaf: |
| 167 | + # 到叶节点了,开始把叶节点的所有关键字都遍历掉 |
| 168 | + result.extend(map(f, cur_node.keys)) |
| 169 | + # 开始从栈中取元素,遍历下一个节点叶节点 |
| 170 | + if stack.empty(): |
| 171 | + return result |
| 172 | + cur_node, i = stack.pop() |
| 173 | + result.append(f(cur_node.keys[i])) |
| 174 | + if i < len(cur_node) - 1: |
| 175 | + stack.push((cur_node, i + 1)) |
| 176 | + cur_node = cur_node.childs[i + 1] |
| 177 | + else: |
| 178 | + stack.push((cur_node, 0)) |
| 179 | + cur_node = cur_node.childs[0] |
| 180 | + return result |
| 181 | + |
| 182 | + def __str__(self): |
| 183 | + return "\n".join(self.midorder(lambda s: str(s))) |
| 184 | + |
| 185 | + def test(self): |
| 186 | + print "-" * 20 |
| 187 | + print self.__root |
| 188 | + print self.__root.childs[0] |
| 189 | + print self.__root.childs[1] |
| 190 | + |
| 191 | + |
| 192 | +def main(): |
| 193 | + btree = BTree(3, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', |
| 194 | + 'V', 'X', 'Y', 'Z') |
| 195 | + print btree |
| 196 | + print btree.max() |
| 197 | + print btree.min() |
| 198 | + |
| 199 | + |
| 200 | +if __name__ == "__main__": |
| 201 | + main() |
0 commit comments