Unit 4: Advanced Trees

2-4 Trees, B Trees, Disk-based Operations

Duration: 8 Hours

Learning Objectives

1. 2-4 Trees

A 2-4 tree is a self-balancing tree where each internal node has 2, 3, or 4 children, and all leaves are at the same level.

Properties of 2-4 Trees:

  • Node Types: 2-nodes (1 key), 3-nodes (2 keys), 4-nodes (3 keys)
  • Balance: All leaves at the same level
  • Height: O(log n) guaranteed
  • Operations: Search, insert, delete in O(log n)

2-4 Tree Operations

Visualize insertion and balancing in 2-4 trees:

Insert values to build the 2-4 tree

2-4 Tree Implementation

#include 
#include 
#include 

class TwoFourNode {
public:
    std::vector keys;
    std::vector children;
    bool isLeaf;
    
    TwoFourNode(bool leaf = true) : isLeaf(leaf) {}
    
    ~TwoFourNode() {
        for (auto child : children) {
            delete child;
        }
    }
    
    bool isFull() const {
        return keys.size() == 3; // 4-node has 3 keys
    }
    
    void insertKey(int key) {
        auto pos = std::lower_bound(keys.begin(), keys.end(), key);
        keys.insert(pos, key);
    }
    
    void insertChild(int index, TwoFourNode* child) {
        children.insert(children.begin() + index, child);
    }
    
    int findChild(int key) {
        int i = 0;
        while (i < keys.size() && key > keys[i]) {
            i++;
        }
        return i;
    }
};

class TwoFourTree {
private:
    TwoFourNode* root;
    
public:
    TwoFourTree() {
        root = new TwoFourNode();
    }
    
    ~TwoFourTree() {
        delete root;
    }
    
    void insert(int key) {
        if (root->isFull()) {
            TwoFourNode* newRoot = new TwoFourNode(false);
            newRoot->children.push_back(root);
            splitChild(newRoot, 0);
            root = newRoot;
        }
        insertNonFull(root, key);
    }
    
private:
    void insertNonFull(TwoFourNode* node, int key) {
        if (node->isLeaf) {
            node->insertKey(key);
        } else {
            int childIndex = node->findChild(key);
            
            if (node->children[childIndex]->isFull()) {
                splitChild(node, childIndex);
                if (key > node->keys[childIndex]) {
                    childIndex++;
                }
            }
            
            insertNonFull(node->children[childIndex], key);
        }
    }
    
    void splitChild(TwoFourNode* parent, int childIndex) {
        TwoFourNode* fullChild = parent->children[childIndex];
        TwoFourNode* newChild = new TwoFourNode(fullChild->isLeaf);
        
        // Move last key and children to new node
        newChild->keys.push_back(fullChild->keys[2]);
        fullChild->keys.pop_back();
        
        if (!fullChild->isLeaf) {
            newChild->children.push_back(fullChild->children[2]);
            newChild->children.push_back(fullChild->children[3]);
            fullChild->children.pop_back();
            fullChild->children.pop_back();
        }
        
        // Move middle key up to parent
        int middleKey = fullChild->keys[1];
        fullChild->keys.pop_back();
        
        parent->keys.insert(parent->keys.begin() + childIndex, middleKey);
        parent->children.insert(parent->children.begin() + childIndex + 1, newChild);
    }
    
public:
    bool search(int key) {
        return searchHelper(root, key);
    }
    
private:
    bool searchHelper(TwoFourNode* node, int key) {
        int i = 0;
        while (i < node->keys.size() && key > node->keys[i]) {
            i++;
        }
        
        if (i < node->keys.size() && key == node->keys[i]) {
            return true;
        }
        
        if (node->isLeaf) {
            return false;
        }
        
        return searchHelper(node->children[i], key);
    }
    
public:
    void display() {
        displayHelper(root, 0);
    }
    
private:
    void displayHelper(TwoFourNode* node, int level) {
        if (node) {
            std::cout << std::string(level * 2, ' ') << "Keys: ";
            for (int key : node->keys) {
                std::cout << key << " ";
            }
            std::cout << std::endl;
            
            for (auto child : node->children) {
                displayHelper(child, level + 1);
            }
        }
    }
};

2. B-Trees

B-trees are a generalization of 2-4 trees designed for systems that read and write large blocks of data, particularly databases and file systems.

B-Tree Properties:

  • Order m: Each node has at most m-1 keys
  • Minimum Degree t: Non-root nodes have at least t-1 keys
  • Balanced: All leaves at the same level
  • Disk Optimization: Designed for block-based storage

B-Tree Operations

Visualize B-tree structure and operations:

B-Tree with minimum degree t = 3

B-Tree Implementation

#include 
#include 
#include 

class BTreeNode {
public:
    std::vector keys;
    std::vector children;
    bool isLeaf;
    int minDegree;
    
    BTreeNode(int degree, bool leaf = true) 
        : minDegree(degree), isLeaf(leaf) {}
    
    ~BTreeNode() {
        for (auto child : children) {
            delete child;
        }
    }
    
    bool isFull() const {
        return keys.size() == 2 * minDegree - 1;
    }
    
    void insertNonFull(int key) {
        int i = keys.size() - 1;
        
        if (isLeaf) {
            keys.push_back(0);
            while (i >= 0 && keys[i] > key) {
                keys[i + 1] = keys[i];
                i--;
            }
            keys[i + 1] = key;
        } else {
            while (i >= 0 && keys[i] > key) {
                i--;
            }
            i++;
            
            if (children[i]->isFull()) {
                splitChild(i);
                if (keys[i] < key) {
                    i++;
                }
            }
            children[i]->insertNonFull(key);
        }
    }
    
    void splitChild(int index) {
        BTreeNode* fullChild = children[index];
        BTreeNode* newChild = new BTreeNode(minDegree, fullChild->isLeaf);
        
        int mid = minDegree - 1;
        
        // Move second half of keys to new node
        for (int j = 0; j < minDegree - 1; j++) {
            newChild->keys.push_back(fullChild->keys[mid + 1 + j]);
        }
        
        // Move second half of children if not leaf
        if (!fullChild->isLeaf) {
            for (int j = 0; j < minDegree; j++) {
                newChild->children.push_back(fullChild->children[mid + 1 + j]);
            }
        }
        
        // Resize original child
        fullChild->keys.resize(mid);
        if (!fullChild->isLeaf) {
            fullChild->children.resize(mid + 1);
        }
        
        // Insert new child into this node
        children.insert(children.begin() + index + 1, newChild);
        keys.insert(keys.begin() + index, fullChild->keys[mid]);
    }
    
    bool search(int key) {
        int i = 0;
        while (i < keys.size() && key > keys[i]) {
            i++;
        }
        
        if (i < keys.size() && key == keys[i]) {
            return true;
        }
        
        if (isLeaf) {
            return false;
        }
        
        return children[i]->search(key);
    }
    
    void display(int level = 0) {
        std::cout << std::string(level * 2, ' ') << "[";
        for (size_t i = 0; i < keys.size(); i++) {
            std::cout << keys[i];
            if (i < keys.size() - 1) std::cout << ", ";
        }
        std::cout << "]" << std::endl;
        
        for (auto child : children) {
            child->display(level + 1);
        }
    }
};

class BTree {
private:
    BTreeNode* root;
    int minDegree;
    
public:
    BTree(int degree) : minDegree(degree) {
        root = new BTreeNode(degree);
    }
    
    ~BTree() {
        delete root;
    }
    
    void insert(int key) {
        if (root->isFull()) {
            BTreeNode* newRoot = new BTreeNode(minDegree, false);
            newRoot->children.push_back(root);
            newRoot->splitChild(0);
            root = newRoot;
        }
        root->insertNonFull(key);
    }
    
    bool search(int key) {
        return root->search(key);
    }
    
    void display() {
        std::cout << "B-Tree structure:" << std::endl;
        root->display();
    }
    
    int getHeight() {
        return getHeightHelper(root);
    }
    
private:
    int getHeightHelper(BTreeNode* node) {
        if (node->isLeaf) {
            return 1;
        }
        return 1 + getHeightHelper(node->children[0]);
    }
};

3. Disk-based Operations

B-trees are specifically designed for storage systems where data is accessed in blocks from disk.

Disk Access Considerations:

  • Block Size: Node size matches disk block size
  • I/O Complexity: Minimize disk reads/writes
  • Locality: Keep related data in same block
  • Buffering: Cache frequently accessed nodes

Disk-Optimized B-Tree

#include 
#include 
#include 
#include 

class DiskBTreeNode {
public:
    static const int BLOCK_SIZE = 4096; // 4KB block
    static const int MAX_KEYS = (BLOCK_SIZE - sizeof(bool) - sizeof(int)) / 
                               (sizeof(int) + sizeof(long)); // File offsets
    
    int keyCount;
    bool isLeaf;
    int keys[MAX_KEYS];
    long children[MAX_KEYS + 1]; // File offsets to child nodes
    
    DiskBTreeNode(bool leaf = true) : keyCount(0), isLeaf(leaf) {
        memset(keys, 0, sizeof(keys));
        memset(children, -1, sizeof(children));
    }
    
    bool isFull() const {
        return keyCount == MAX_KEYS;
    }
    
    void insertKey(int key) {
        int i = keyCount - 1;
        while (i >= 0 && keys[i] > key) {
            keys[i + 1] = keys[i];
            i--;
        }
        keys[i + 1] = key;
        keyCount++;
    }
    
    int findChild(int key) {
        int i = 0;
        while (i < keyCount && key > keys[i]) {
            i++;
        }
        return i;
    }
};

class DiskBTree {
private:
    std::fstream file;
    std::string filename;
    long rootOffset;
    int minDegree;
    long nextFreeOffset;
    
public:
    DiskBTree(const std::string& fname, int degree) 
        : filename(fname), minDegree(degree) {
        
        file.open(filename, std::ios::in | std::ios::out | std::ios::binary);
        
        if (!file.is_open()) {
            // Create new file
            file.open(filename, std::ios::out | std::ios::binary);
            file.close();
            file.open(filename, std::ios::in | std::ios::out | std::ios::binary);
            
            // Initialize with root node
            DiskBTreeNode root(true);
            rootOffset = 0;
            nextFreeOffset = sizeof(DiskBTreeNode);
            
            writeNode(rootOffset, root);
            writeHeader();
        } else {
            readHeader();
        }
    }
    
    ~DiskBTree() {
        if (file.is_open()) {
            writeHeader();
            file.close();
        }
    }
    
private:
    void writeHeader() {
        file.seekp(0, std::ios::end);
        file.write(reinterpret_cast(&rootOffset), sizeof(long));
        file.write(reinterpret_cast(&nextFreeOffset), sizeof(long));
        file.write(reinterpret_cast(&minDegree), sizeof(int));
    }
    
    void readHeader() {
        file.seekg(-sizeof(long) - sizeof(long) - sizeof(int), std::ios::end);
        file.read(reinterpret_cast(&rootOffset), sizeof(long));
        file.read(reinterpret_cast(&nextFreeOffset), sizeof(long));
        file.read(reinterpret_cast(&minDegree), sizeof(int));
    }
    
    void writeNode(long offset, const DiskBTreeNode& node) {
        file.seekp(offset);
        file.write(reinterpret_cast(&node), sizeof(DiskBTreeNode));
        file.flush();
    }
    
    DiskBTreeNode readNode(long offset) {
        DiskBTreeNode node;
        file.seekg(offset);
        file.read(reinterpret_cast(&node), sizeof(DiskBTreeNode));
        return node;
    }
    
    long allocateNode() {
        long offset = nextFreeOffset;
        nextFreeOffset += sizeof(DiskBTreeNode);
        return offset;
    }
    
public:
    void insert(int key) {
        DiskBTreeNode root = readNode(rootOffset);
        
        if (root.isFull()) {
            long newRootOffset = allocateNode();
            DiskBTreeNode newRoot(false);
            newRoot.children[0] = rootOffset;
            
            splitChild(newRootOffset, newRoot, 0);
            writeNode(newRootOffset, newRoot);
            rootOffset = newRootOffset;
        }
        
        insertNonFull(rootOffset, key);
    }
    
private:
    void insertNonFull(long nodeOffset, int key) {
        DiskBTreeNode node = readNode(nodeOffset);
        
        if (node.isLeaf) {
            node.insertKey(key);
            writeNode(nodeOffset, node);
        } else {
            int childIndex = node.findChild(key);
            DiskBTreeNode child = readNode(node.children[childIndex]);
            
            if (child.isFull()) {
                splitChild(nodeOffset, node, childIndex);
                node = readNode(nodeOffset); // Re-read after split
                if (key > node.keys[childIndex]) {
                    childIndex++;
                }
            }
            
            insertNonFull(node.children[childIndex], key);
        }
    }
    
    void splitChild(long parentOffset, DiskBTreeNode& parent, int childIndex) {
        long fullChildOffset = parent.children[childIndex];
        DiskBTreeNode fullChild = readNode(fullChildOffset);
        
        long newChildOffset = allocateNode();
        DiskBTreeNode newChild(fullChild.isLeaf);
        
        int mid = minDegree - 1;
        
        // Move second half of keys
        for (int j = 0; j < minDegree - 1; j++) {
            newChild.keys[j] = fullChild.keys[mid + 1 + j];
        }
        newChild.keyCount = minDegree - 1;
        
        // Move second half of children if not leaf
        if (!fullChild.isLeaf) {
            for (int j = 0; j < minDegree; j++) {
                newChild.children[j] = fullChild.children[mid + 1 + j];
            }
        }
        
        fullChild.keyCount = mid;
        
        // Insert new child reference in parent
        for (int j = parent.keyCount; j > childIndex; j--) {
            parent.children[j + 1] = parent.children[j];
        }
        parent.children[childIndex + 1] = newChildOffset;
        
        for (int j = parent.keyCount - 1; j >= childIndex; j--) {
            parent.keys[j + 1] = parent.keys[j];
        }
        parent.keys[childIndex] = fullChild.keys[mid];
        parent.keyCount++;
        
        // Write all modified nodes
        writeNode(fullChildOffset, fullChild);
        writeNode(newChildOffset, newChild);
        writeNode(parentOffset, parent);
    }
    
public:
    bool search(int key) {
        return searchHelper(rootOffset, key);
    }
    
private:
    bool searchHelper(long nodeOffset, int key) {
        DiskBTreeNode node = readNode(nodeOffset);
        
        int i = 0;
        while (i < node.keyCount && key > node.keys[i]) {
            i++;
        }
        
        if (i < node.keyCount && key == node.keys[i]) {
            return true;
        }
        
        if (node.isLeaf) {
            return false;
        }
        
        return searchHelper(node.children[i], key);
    }
    
public:
    void displayStats() {
        std::cout << "B-Tree Statistics:" << std::endl;
        std::cout << "File: " << filename << std::endl;
        std::cout << "Block size: " << DiskBTreeNode::BLOCK_SIZE << " bytes" << std::endl;
        std::cout << "Max keys per node: " << DiskBTreeNode::MAX_KEYS << std::endl;
        std::cout << "Minimum degree: " << minDegree << std::endl;
        std::cout << "Root offset: " << rootOffset << std::endl;
        std::cout << "Next free offset: " << nextFreeOffset << std::endl;
        
        // Calculate file size
        file.seekg(0, std::ios::end);
        long fileSize = file.tellg();
        std::cout << "File size: " << fileSize << " bytes" << std::endl;
        std::cout << "Number of nodes: " << nextFreeOffset / sizeof(DiskBTreeNode) << std::endl;
    }
};

4. Performance Comparison

Operation BST 2-4 Tree B-Tree
Search O(n) worst O(log n) O(log n)
Insert O(n) worst O(log n) O(log n)
Delete O(n) worst O(log n) O(log n)
Disk I/O Not optimized Not optimized Optimized
Height O(n) worst O(log n) O(log_t n)

5. Complete Example Program

#include 

int main() {
    std::cout << "=== Unit 4: Advanced Trees Demo ===" << std::endl;
    
    // 2-4 Tree Demo
    std::cout << "\n--- 2-4 Tree Demo ---" << std::endl;
    TwoFourTree twoFourTree;
    
    int values[] = {10, 20, 5, 6, 12, 30, 7, 17};
    for (int val : values) {
        std::cout << "Inserting " << val << std::endl;
        twoFourTree.insert(val);
    }
    
    twoFourTree.display();
    
    std::cout << "Search 12: " << (twoFourTree.search(12) ? "Found" : "Not found") << std::endl;
    std::cout << "Search 25: " << (twoFourTree.search(25) ? "Found" : "Not found") << std::endl;
    
    // B-Tree Demo
    std::cout << "\n--- B-Tree Demo ---" << std::endl;
    BTree bTree(3); // Minimum degree 3
    
    int bValues[] = {10, 20, 5, 6, 12, 30, 7, 17, 15, 25, 40, 50};
    for (int val : bValues) {
        std::cout << "Inserting " << val << std::endl;
        bTree.insert(val);
    }
    
    bTree.display();
    std::cout << "B-Tree height: " << bTree.getHeight() << std::endl;
    
    // Disk B-Tree Demo
    std::cout << "\n--- Disk B-Tree Demo ---" << std::endl;
    {
        DiskBTree diskTree("btree.dat", 3);
        
        for (int val : bValues) {
            diskTree.insert(val);
        }
        
        std::cout << "Search 15: " << (diskTree.search(15) ? "Found" : "Not found") << std::endl;
        diskTree.displayStats();
    } // File automatically closed
    
    return 0;
}