Sharding

Introduction

  • Database sharding is a database architecture strategy used to divide and distribute data across multiple database instances or servers

  • Sharding spreads the database load evenly across multiple servers, resulting in better query performance and responsiveness.

  • As data grows, new shards can be added, allowing for nearly unlimited scalability.

Strategy

Range-based

  • Range-based sharding involves partitioning data based on a specific range of values in the shard key.

Directory-based

  • Directory-based sharding maintains a central directory that maps shard keys to their corresponding shards. This directory helps route queries to the appropriate shards efficiently. However, it can introduce a single point of failure.

Routing Implemetation

const app = require("express")();
const {Client} = require("pg");
const HashRing = require("hashring");
const crypto = require("crypto");

// create consistent hashing instance and add 3 database ports
const hr = new HashRing();
hr.add("5432");
hr.add("5433");
hr.add("5434");

// create postgres client for 3 shards
const clients = {
    "5432": new Client({
        "host": "172.17.0.2",
        "port": "5432",
        "user": "postgres",
        "password": "postgres",
        "database": "postgres",
    }),

    "5433": new Client({
        "host": "172.17.0.3",
        "port": "5433",
        "user": "postgres",
        "password": "postgres",
        "database": "postgres",
    }),

    "5434": new Client({
        "host": "172.17.0.4",
        "port": "5434",
        "user": "postgres",
        "password": "postgres",
        "database": "postgres",
    })
};

// Now connect the three database servers

connect();

async function connect() {
    try {
        await clients["5432"].connect();
        await clients["5433"].connect();
        await clients["5434"].connect();
    } catch(e){

    }
}

// Write to sharded database 

app.post("/", async (req, res) => {
    const url = req.query.url;
    const hash = crypto.createHash("sha256").update(url).digest("base64");
    const urlId = hash.substring(0,5);
    const server = hr.get(urlId);

    console.log(server);

    await clients[server].query("INSERT INTO url_table (url, url_id) VALUES($1,$2)", [url, urlId]);

    res.send({
        "urlId": urlId,
        "url": url,
        "server": server,
    });
});

// Read from sharded database 

app.get("/:urlId", async (req, res) => {
    const urlId = req.params.urlId;
    const server = hr.get(urlId);
    const result = await clients[server].query("SELECT * FROM URL_TABLE WHERE URL_ID = $1", [urlId]);
    if(result.rowCount > 0) {
        res.send({
            "urlId": result.url_id,
            "url": result.url,
            "server": server,
        })
    } else {
        res.sendStatus(404);
    }
})

Partitioning

  • Some of the database (e.g: PostgresSQL) supports partitioning natively

// create overall 
CREATE TABLE sales (
sale_id SERIAL,
sale_date DATE NOT NULL,
product_id INT,
quantity INT,
amount NUMERIC,
PRIMARY KEY (sale_id, sale_date)
) PARTITION BY RANGE (sale_date);

// create tables for each partition
CREATE TABLE sales_january PARTITION OF sales
    FOR VALUES FROM ('2023-01-01') TO ('2023-02-01');

CREATE TABLE sales_february PARTITION OF sales
    FOR VALUES FROM ('2023-02-01') TO ('2023-03-01');

CREATE TABLE sales_march PARTITION OF sales
    FOR VALUES FROM ('2023-03-01') TO ('2023-04-01');
    

// insert    
INSERT INTO sales (sale_date, product_id, quantity, amount)
VALUES ('2023-01-15', 101, 5, 100.00);

INSERT INTO sales (sale_date, product_id, quantity, amount)
VALUES ('2023-02-20', 102, 10, 200.00);

INSERT INTO sales (sale_date, product_id, quantity, amount)
VALUES ('2023-03-10', 103, 8, 150.00);

Last updated

Was this helpful?