Sharding
Introduction

Database sharding is a database architecture strategy used to divide and distribute data across multiple database instances or servers
Sharding spreads the database load evenly across multiple servers, resulting in better query performance and responsiveness.
As data grows, new shards can be added, allowing for nearly unlimited scalability.
Strategy
Range-based
Range-based sharding involves partitioning data based on a specific range of values in the shard key.

Directory-based
Directory-based sharding maintains a central directory that maps shard keys to their corresponding shards. This directory helps route queries to the appropriate shards efficiently. However, it can introduce a single point of failure.

Routing Implemetation
const app = require("express")();
const {Client} = require("pg");
const HashRing = require("hashring");
const crypto = require("crypto");
// create consistent hashing instance and add 3 database ports
const hr = new HashRing();
hr.add("5432");
hr.add("5433");
hr.add("5434");
// create postgres client for 3 shards
const clients = {
"5432": new Client({
"host": "172.17.0.2",
"port": "5432",
"user": "postgres",
"password": "postgres",
"database": "postgres",
}),
"5433": new Client({
"host": "172.17.0.3",
"port": "5433",
"user": "postgres",
"password": "postgres",
"database": "postgres",
}),
"5434": new Client({
"host": "172.17.0.4",
"port": "5434",
"user": "postgres",
"password": "postgres",
"database": "postgres",
})
};
// Now connect the three database servers
connect();
async function connect() {
try {
await clients["5432"].connect();
await clients["5433"].connect();
await clients["5434"].connect();
} catch(e){
}
}
// Write to sharded database
app.post("/", async (req, res) => {
const url = req.query.url;
const hash = crypto.createHash("sha256").update(url).digest("base64");
const urlId = hash.substring(0,5);
const server = hr.get(urlId);
console.log(server);
await clients[server].query("INSERT INTO url_table (url, url_id) VALUES($1,$2)", [url, urlId]);
res.send({
"urlId": urlId,
"url": url,
"server": server,
});
});
// Read from sharded database
app.get("/:urlId", async (req, res) => {
const urlId = req.params.urlId;
const server = hr.get(urlId);
const result = await clients[server].query("SELECT * FROM URL_TABLE WHERE URL_ID = $1", [urlId]);
if(result.rowCount > 0) {
res.send({
"urlId": result.url_id,
"url": result.url,
"server": server,
})
} else {
res.sendStatus(404);
}
})
Partitioning
Some of the database (e.g: PostgresSQL) supports partitioning natively
// create overall
CREATE TABLE sales (
sale_id SERIAL,
sale_date DATE NOT NULL,
product_id INT,
quantity INT,
amount NUMERIC,
PRIMARY KEY (sale_id, sale_date)
) PARTITION BY RANGE (sale_date);
// create tables for each partition
CREATE TABLE sales_january PARTITION OF sales
FOR VALUES FROM ('2023-01-01') TO ('2023-02-01');
CREATE TABLE sales_february PARTITION OF sales
FOR VALUES FROM ('2023-02-01') TO ('2023-03-01');
CREATE TABLE sales_march PARTITION OF sales
FOR VALUES FROM ('2023-03-01') TO ('2023-04-01');
// insert
INSERT INTO sales (sale_date, product_id, quantity, amount)
VALUES ('2023-01-15', 101, 5, 100.00);
INSERT INTO sales (sale_date, product_id, quantity, amount)
VALUES ('2023-02-20', 102, 10, 200.00);
INSERT INTO sales (sale_date, product_id, quantity, amount)
VALUES ('2023-03-10', 103, 8, 150.00);
Last updated
Was this helpful?