Central Limit Theorem
Math
Statistics
Bar Chart
Code
= {
chart const margin = { top: 20, right: 10, bottom: 20, left: 10 };
const width = fullWidth - margin.left - margin.right,
= fullHeight - margin.top - margin.bottom;
height
const window = d3.create("svg")
.attr("width", "100%")
.attr("height", "100%")
.attr("viewBox", `0 0 ${fullWidth} ${fullHeight}`)
.attr("preserveAspectRatio", "xMidYMid meet");
const svg = window.append("g")
.attr("class", "clt")
.attr("transform", `translate(${margin.left},${margin.top})`);
let delay = defaults.delay,
= defaults.n;
n
const f = {
sample: Math.random,
mu: 1 / 2,
sigma: 1 / (2 * Math.sqrt(3))
;
}
function pdf(x) {
return Math.sqrt(n) / Math.sqrt(2 * Math.PI) / f.sigma
* Math.exp(-n * (x - f.mu) * (x - f.mu) / (2 * f.sigma * f.sigma));
;
}
const x = d3.scaleLinear()
.domain([0, 1])
.rangeRound([0, width]);
const y1 = height / 3,
= height / 2;
y2
const y = d3.scaleLinear()
.domain([0, pdf(f.mu)])
.range([0, height - y2]);
const histogram = d3.histogram()
.domain(x.domain())
.thresholds(x.ticks(20));
const area = d3.area()
.x(d => x(d[0]))
.y0(y2)
.y1(d => y2 + y(d[1]))
.curve(d3.curveBasis);
.append("path")
svg.attr("class", "line");
.append("g")
svg.attr("class", "bars");
const axis = svg.selectAll(".axis")
.data([
y: 0, label: "draw" },
{ y: y1, label: "average" },
{ y: y2, label: "count" },
{
]).enter().append("g")
.attr("class", "axis")
.attr("transform", d => `translate(0,${d.y})`);
.append("path")
axis.attr("d", `M0,0H${width}`)
.append("text")
axis.attr("dominant-baseline", "hanging")
.attr("dy", 5)
.text(d => d.label);
let counts = [];
function renderBars() {
if (counts.length < 1) {
return;
}
const data = histogram(counts)
.map(d => { d.y = d.length / counts.length; return d; })
.filter(d => d.x1 > d.x0);
const ymax = d3.max(data, d => d.y);
.domain([0, ymax / (1 / 20)]);
y
let bar = svg.select(".bars").selectAll(".bar")
.data(data);
const g = bar.enter().append("g")
.attr("class", "bar")
.attr("transform", d => `translate(${x(d.x0)},${y2})`);
.append("rect")
g.attr("width", d => (x(d.x1) - x(d.x0)) - 2);
.append("text")
g.attr("x", x(1 / 40))
.attr("dy", 10)
.attr("dominant-baseline", "hanging")
.attr("text-anchor", "middle");
= g.merge(bar);
bar
const t = d3.transition().duration(delay / 4);
.select("rect")
bar.transition(t)
.attr("height", d => y(d.y / (1 / 20)));
.select("text")
bar.text(d => d.y > 0 ? d3.format(".0%")(d.y) : "");
.select(".line")
svg.datum(d3.range(0, 1.05, 0.05).map(x => [x, pdf(x)]))
.transition(t)
.attr("d", area);
}
function renderBalls() {
const data = d3.range(n).map(f.sample);
const mean = d3.mean(data);
const ball = svg.append("g").selectAll(".ball")
.data(data);
let i = 0;
.enter().append("circle")
ball.attr("class", "ball")
.attr("cx", d => x(d))
.attr("cy", 0)
.attr("r", 5)
.transition().duration(delay).ease(d3.easeBounce)
.attr("cy", y1 - 5)
.on("end", function() {
.select(this)
d3.transition().duration(delay / 4)
.attr("cy", (y2 + y1) / 2)
.transition().duration(delay / 4)
.attr("cx", x(mean))
.transition().duration(delay / 4).ease(d3.easeBounce)
.attr("cy", y2 - 3)
.attr("r", 3)
.each(() => ++i)
.on("end", function() {
if (!--i) {
.push(mean);
countselse {
} .select(this).remove();
d3
};
});
})
}
function renderAll() {
renderBars();
renderBalls();
}
function start() {
return d3.interval(renderAll, delay);
}
let timer = start();
.then(() => {
invalidation.stop();
timer.selectAll("*").interrupt();
svg;
})
return Object.assign(window.node(), {
update(values) {
.stop();
timer
if (n !== values.n) {
.selectAll("*").interrupt();
svg.selectAll(".ball").remove();
svg.selectAll(".bar").remove();
svg.select(".line").attr("d", null);
svg
= [];
counts
}
= values.delay;
delay = values.n;
n
.select(".line")
svg.attr("opacity", n > 1 ? 1 : 0);
= start();
timer ,
};
}) }
Let \(x_1 \dots x_n\) be a set of independent, identically distributed random variables following a distribution \(f\) with mean \(\mu\) and finite variance \(\sigma^2\). In the limit of large \(n\), the arithmetic mean \[\bar{x}=\frac{1}{n}\sum_{i=1}^{n}x_i\] follows a normal distribution with mean \(\mu\) and variance \(\frac{\sigma^2}{n}\). This holds regardless of the specific form of \(f\). In this example, \(f\) is a uniform distribution (set \(n = 1\) to see this) and \(n = 4\) by default.