Central Limit Theorem

Math

Statistics

Bar Chart

d3 = require("d3@7");

import { fullWidth, fullHeight } from "./tools/Globals.js";

defaults = ({
  delay: 1000,
  n: 4,
});

Code

chart = {
  const margin = { top: 20, right: 10, bottom: 20, left: 10 };
  const width = fullWidth - margin.left - margin.right,
        height = fullHeight - margin.top - margin.bottom;

  const window = d3.create("svg")
   .attr("width", "100%")
   .attr("height", "100%")
   .attr("viewBox", `0 0 ${fullWidth} ${fullHeight}`)
   .attr("preserveAspectRatio", "xMidYMid meet");

  const svg = window.append("g")
   .attr("class", "clt")
   .attr("transform", `translate(${margin.left},${margin.top})`);

  let delay = defaults.delay,
      n = defaults.n;

  const f = {
    sample: Math.random,
    mu: 1 / 2,
    sigma: 1 / (2 * Math.sqrt(3))
  };

  function pdf(x) {
    return Math.sqrt(n) / Math.sqrt(2 * Math.PI) / f.sigma
      * Math.exp(-n * (x - f.mu) * (x - f.mu) / (2 * f.sigma * f.sigma));
  };

  const x = d3.scaleLinear()
    .domain([0, 1])
    .rangeRound([0, width]);

  const y1 = height / 3,
        y2 = height / 2;

  const y = d3.scaleLinear()
    .domain([0, pdf(f.mu)])
    .range([0, height - y2]);

  const histogram = d3.histogram()
    .domain(x.domain())
    .thresholds(x.ticks(20));

  const area = d3.area()
    .x(d => x(d[0]))
    .y0(y2)
    .y1(d => y2 + y(d[1]))
    .curve(d3.curveBasis);

  svg.append("path")
    .attr("class", "line");

  svg.append("g")
    .attr("class", "bars");

  const axis = svg.selectAll(".axis")
    .data([
      { y: 0, label: "draw" },
      { y: y1, label: "average" },
      { y: y2, label: "count" },
    ])
    .enter().append("g")
      .attr("class", "axis")
      .attr("transform", d => `translate(0,${d.y})`);

  axis.append("path")
    .attr("d", `M0,0H${width}`)

  axis.append("text")
    .attr("dominant-baseline", "hanging")
    .attr("dy", 5)
    .text(d => d.label);

  let counts = [];

  function renderBars() {
    if (counts.length < 1) {
      return;
    }

    const data = histogram(counts)
      .map(d => { d.y = d.length / counts.length; return d; })
      .filter(d => d.x1 > d.x0);

    const ymax = d3.max(data, d => d.y);
    y.domain([0, ymax / (1 / 20)]);

    let bar = svg.select(".bars").selectAll(".bar")
      .data(data);

    const g = bar.enter().append("g")
      .attr("class", "bar")
      .attr("transform", d => `translate(${x(d.x0)},${y2})`);

    g.append("rect")
      .attr("width", d => (x(d.x1) - x(d.x0)) - 2);

    g.append("text")
      .attr("x", x(1 / 40))
      .attr("dy", 10)
      .attr("dominant-baseline", "hanging")
      .attr("text-anchor", "middle");

    bar = g.merge(bar);

    const t = d3.transition().duration(delay / 4);

    bar.select("rect")
      .transition(t)
      .attr("height", d => y(d.y / (1 / 20)));

    bar.select("text")
      .text(d => d.y > 0 ? d3.format(".0%")(d.y) : "");

    svg.select(".line")
      .datum(d3.range(0, 1.05, 0.05).map(x => [x, pdf(x)]))
      .transition(t)
      .attr("d", area);
  }

  function renderBalls() {
    const data = d3.range(n).map(f.sample);
    const mean = d3.mean(data);

    const ball = svg.append("g").selectAll(".ball")
      .data(data);

    let i = 0;
    ball.enter().append("circle")
      .attr("class", "ball")
      .attr("cx", d => x(d))
      .attr("cy", 0)
      .attr("r", 5)
      .transition().duration(delay).ease(d3.easeBounce)
      .attr("cy", y1 - 5)
      .on("end", function() {
        d3.select(this)
          .transition().duration(delay / 4)
          .attr("cy", (y2 + y1) / 2)
          .transition().duration(delay / 4)
          .attr("cx", x(mean))
          .transition().duration(delay / 4).ease(d3.easeBounce)
          .attr("cy", y2 - 3)
          .attr("r", 3)
          .each(() => ++i)
          .on("end", function() {
            if (!--i) {
              counts.push(mean);
            } else {
              d3.select(this).remove();
            }
          });
      });
  }

  function renderAll() {
    renderBars();
    renderBalls();
  }

  function start() {
    return d3.interval(renderAll, delay);
  }

  let timer = start();

  invalidation.then(() => {
    timer.stop();
    svg.selectAll("*").interrupt();
  });

  return Object.assign(window.node(), {
    update(values) {
      timer.stop();

      if (n !== values.n) {
        svg.selectAll("*").interrupt();
        svg.selectAll(".ball").remove();
        svg.selectAll(".bar").remove();
        svg.select(".line").attr("d", null);

        counts = [];
      }

      delay = values.delay;
      n = values.n;

      svg.select(".line")
        .attr("opacity", n > 1 ? 1 : 0);

      timer = start();
    },
  });
}

updateResult = chart.update({ delay: _delay, n: _n });

viewof _delay = Inputs.range([100, 1000], {
  value: defaults.delay,
  step: 10,
  label: "Delay",
});
viewof _n = Inputs.range([1, 16], {
  value: defaults.n,
  step: 1,
  label: "Sample Size",
});

Let \(x_1 \dots x_n\) be a set of independent, identically distributed random variables following a distribution \(f\) with mean \(\mu\) and finite variance \(\sigma^2\). In the limit of large \(n\), the arithmetic mean \[\bar{x}=\frac{1}{n}\sum_{i=1}^{n}x_i\] follows a normal distribution with mean \(\mu\) and variance \(\frac{\sigma^2}{n}\). This holds regardless of the specific form of \(f\). In this example, \(f\) is a uniform distribution (set \(n = 1\) to see this) and \(n = 4\) by default.

Resources

Central Limit Theorem (Wikipedia)