estimating pi with random numbers
Estimating pi with a random number generator
Recently, a friend asked me if I knew about Monte Carlo and told me that the number PI can be estimated with random numbers.
I then decided to see if I could do that without looking up the solution.
Here it goes…
Import stuff
%pip install "matplotlib<4" "numpy<2"
import numpy as np
import matplotlib.pyplot as plt
# Set the global grid property
plt.rcParams['axes.grid'] = True
# Initialize random number generator
rng = np.random.default_rng(seed=42)
Requirement already satisfied: matplotlib<4 in ./venv/lib/python3.11/site-packages (3.8.2)
Requirement already satisfied: numpy<2 in ./venv/lib/python3.11/site-packages (1.26.2)
Requirement already satisfied: contourpy>=1.0.1 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (1.2.0)
Requirement already satisfied: cycler>=0.10 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (4.44.3)
Requirement already satisfied: kiwisolver>=1.3.1 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (1.4.5)
Requirement already satisfied: packaging>=20.0 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (23.2)
Requirement already satisfied: pillow>=8 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (10.1.0)
Requirement already satisfied: pyparsing>=2.3.1 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (3.1.1)
Requirement already satisfied: python-dateutil>=2.7 in ./venv/lib/python3.11/site-packages (from matplotlib<4) (2.8.2)
Requirement already satisfied: six>=1.5 in ./venv/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib<4) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
Generate random points (x, y pairs) uniformly distributed in the unit square
number_of_pairs = 1_000
# Generate both axes
xy = rng.uniform(0, 1, number_of_pairs * 2)
# Split into x and y
x, y = np.array_split(xy, 2)
def show_scatter(x, y):
plt.figure(figsize=(6, 6))
plt.scatter(x, y)
plt.grid(True)
plt.show()
show_scatter(x, y)
Remove points outside of the unit circle
mask = x**2 + y**2 <= 1
x = x[mask]
y = y[mask]
show_scatter(x, y)
Map all points on the circle border
distances_from_origin = np.sqrt(x**2 + y**2)
x = np.divide(x, distances_from_origin)
y = np.divide(y, distances_from_origin)
show_scatter(x, y)
Observe the distribution of x
def show_histogram(x, bins: int = 30):
plt.figure(figsize=(6, 6))
plt.hist(x, bins=bins, edgecolor='black')
plt.show()
show_histogram(x)
As we get from x=0 to x=1, the circle border gets more perpendicular to the x axis, and the x values of the random numbers are more condensed.
Get all the distances between x values
def get_counterpart(x_or_y):
"""
Given an x value between 0 and 1,
find the y value that maps
that x value on the unit circle.
Works x->y and y->x.
"""
return np.sqrt(np.ones(x_or_y.shape) - x_or_y**2)
def get_euclidian_distances(x1, x2):
"""
Given two x matrices,
find the euclidian distance between their
corresponding points on the unit circle.
It is an element-wise operation.
"""
y1 = get_counterpart(x1)
y2 = get_counterpart(x2)
return np.sqrt((x1 - x2)**2 + (y1 -y2)**2)
x.sort()
# Obtain each distance between neighboring points on the unit circle
distances = get_euclidian_distances(x[:-1], x[1:])
Estimate pi by summing the distances (obtaining the length of the quarter circle)
quarter_circle_circumference = np.sum(distances)
# pi is half a complete circle
pi = quarter_circle_circumference * 2
print(pi)
3.1375185348374206
Grid search various numbers of points on the circle to observe convergence
import math
from time import perf_counter
def estimate_pi(points_inside_unit_square: int) -> float:
"""
Given an amount of number of random points, estimate pi.
"""
# Generate points
xy = rng.uniform(0, 1, points_inside_unit_square * 2)
x, y = np.array_split(xy, 2)
# Remove points outside of circle
mask = x**2 + y**2 <= 1
x = x[mask]
y = y[mask]
# Carry x values to the circle border
distances_from_origin = np.sqrt(x**2 + y**2)
x = np.divide(x, distances_from_origin)
# Sum over the distances between points on circle
x.sort()
distances = get_euclidian_distances(x[:-1], x[1:])
return np.sum(distances) * 2
def get_error(pi_estimation: float) -> float:
"""
Get the relative error of the pi estimation.
"""
return abs(pi_estimation - math.pi) / math.pi
numbers_of_points_inside_unit_square = [
10_000, 100_000, 1_000_000,
10_000_000, 100_000_000
]
errors = []
start_time = perf_counter()
for number_of_points_inside_unit_square in numbers_of_points_inside_unit_square:
pi = estimate_pi(number_of_points_inside_unit_square)
error = get_error(pi)
errors.append(error)
print(f"{number_of_points_inside_unit_square=:>15}, {error=:.10f}")
end_time = perf_counter()
print(f"\nCircumference method took: {end_time - start_time} seconds.")
number_of_points_inside_unit_square= 10000, error=0.0000264447
number_of_points_inside_unit_square= 100000, error=0.0000401923
number_of_points_inside_unit_square= 1000000, error=0.0000037544
number_of_points_inside_unit_square= 10000000, error=0.0000001447
number_of_points_inside_unit_square= 100000000, error=0.0000000194
Circumference method took: 14.25504916600039 seconds.
plt.figure(figsize=(6, 6))
plt.plot(numbers_of_points_inside_unit_square, errors)
plt.xscale("log")
plt.yscale("log")
plt.xlabel('random points on circle')
plt.ylabel('error in PI estimation')
plt.show()
UPDATE
I showed this to my friend and found out that I did not have to make this as complicated as it is.
Here is a version that uses the numbers of points before and after cleaning the points inside the square, outside the circle.
def estimate_pi_area(number_of_points_inside_unit_square) -> float:
xy = rng.uniform(0, 1, number_of_points_inside_unit_square * 2)
x, y = np.array_split(xy, 2)
mask = x**2 + y**2 <= 1
number_of_points_inside_unit_circle = np.sum(mask)
# pi * r^2 (r=1) = pi (our quarter circle is pi / 4)
entire_circle_area = number_of_points_inside_unit_circle * 4
return entire_circle_area / number_of_points_inside_unit_square
errors_area = []
start_time = perf_counter()
for number_of_points_inside_unit_square in numbers_of_points_inside_unit_square:
pi = estimate_pi_area(number_of_points_inside_unit_square)
error_area = get_error(pi)
errors_area.append(error_area)
print(f"{number_of_points_inside_unit_square=:>15}, {error_area=:.10f}")
end_time = perf_counter()
print(f"\Area method took: {end_time - start_time} seconds.")
number_of_points_inside_unit_square= 10000, error_area=0.0077644228
number_of_points_inside_unit_square= 100000, error_area=0.0013137752
number_of_points_inside_unit_square= 1000000, error_area=0.0000978314
number_of_points_inside_unit_square= 10000000, error_area=0.0000083568
number_of_points_inside_unit_square= 100000000, error_area=0.0000315297
\Area method took: 1.3466923750020214 seconds.
Let’s see if we can get closer sparing some of the gained speed.
start_time = perf_counter()
largest_amount_in_list = numbers_of_points_inside_unit_square[-1]
# 4 times the largest example in circumference method
larger_amount_test_area = largest_amount_in_list * 4
pi = estimate_pi_area(larger_amount_test_area)
error_area = get_error(pi)
errors_area.append(error_area)
print(f"{number_of_points_inside_unit_square=:>15}, {error_area=:.10f}")
end_time = perf_counter()
print(f"\Area method with {larger_amount_test_area:_} took: {end_time - start_time} seconds.")
number_of_points_inside_unit_square= 100000000, error_area=0.0000055947
\Area method with 400_000_000 took: 32.012539082999865 seconds.
plt.figure(figsize=(6, 6))
plt.plot(numbers_of_points_inside_unit_square + [larger_amount_test_area],
errors_area, label="area method")
plt.plot(numbers_of_points_inside_unit_square, errors, label="circumference method")
plt.xscale("log")
plt.yscale("log")
plt.xlabel('random points on circle')
plt.ylabel('error in PI estimation')
plt.legend()
plt.show()
The area method is much much simpler. After learning about it, I was a bit ashamed of having added all that complexity, but it turns out it was improving performance. Let’s drop one final plot to better compare runtimes and call it a day.
def estimate_pi_circumference_with_time(
number_of_points_inside_unit_square
) -> tuple:
"""
Estimate pi with circumference method.
Returns a tuple with error in pi estimation, and the runtime.
"""
start_time = perf_counter()
pi = estimate_pi(number_of_points_inside_unit_square)
error = get_error(pi)
print(f"{number_of_points_inside_unit_square=:>15}, {error=:.10f}")
end_time = perf_counter()
return (error, end_time - start_time)
def estimate_pi_area_with_time(
number_of_points_inside_unit_square
) -> tuple:
"""
Estimate pi with area method.
Returns a tuple with error in pi estimation, and the runtime.
"""
start_time = perf_counter()
pi = estimate_pi_area(number_of_points_inside_unit_square)
error_area = get_error(pi)
print(f"{number_of_points_inside_unit_square=:>15}, {error_area=:.10f}")
end_time = perf_counter()
return (error_area, end_time - start_time)
numbers_of_points_inside_unit_square = [
10_000, 100_000, 1_000_000,
10_000_000, 100_000_000
]
errors_circumference = []
runtimes_circumference = []
errors_area = []
runtimes_area = []
for number_of_points_inside_unit_square in numbers_of_points_inside_unit_square:
error_circumference, runtime_circumference = (
estimate_pi_circumference_with_time(number_of_points_inside_unit_square)
)
errors_circumference.append(error_circumference)
runtimes_circumference.append(runtime_circumference)
error_area, runtime_area = (
estimate_pi_area_with_time(number_of_points_inside_unit_square)
)
errors_area.append(error_area)
runtimes_area.append(runtime_area)
larger_amount_test_area = numbers_of_points_inside_unit_square[-1] * 5
error_area, runtime_area = estimate_pi_area_with_time(larger_amount_test_area)
errors_area.append(error_area)
runtimes_area.append(runtime_area)
number_of_points_inside_unit_square= 10000, error=0.0006619905
number_of_points_inside_unit_square= 10000, error_area=0.0038220571
number_of_points_inside_unit_square= 100000, error=0.0000117467
number_of_points_inside_unit_square= 100000, error_area=0.0001423948
number_of_points_inside_unit_square= 1000000, error=0.0000034482
number_of_points_inside_unit_square= 1000000, error_area=0.0004785300
number_of_points_inside_unit_square= 10000000, error=0.0000003295
number_of_points_inside_unit_square= 10000000, error_area=0.0001714588
number_of_points_inside_unit_square= 100000000, error=0.0000000743
number_of_points_inside_unit_square= 100000000, error_area=0.0001217046
number_of_points_inside_unit_square= 500000000, error_area=0.0000182473
plt.figure(figsize=(6, 6))
plt.plot(runtimes_area, errors_area, label="area method")
plt.plot(runtimes_circumference, errors_circumference,
label="circumference method")
plt.xscale("log")
plt.yscale("log")
plt.xlabel('time (s)')
plt.ylabel('error in PI estimation')
plt.legend()
plt.show()