Up to this point, the path tracer I have been developing in this blog entries turned out to be VERY slow, so much that it takes several seconds to render a single image with just four objects in the scene!
To improve things, I'll fix some of the problems introduced in the code. The updates have nothing to do with better algorithms, but just writing the algorithms into a form that represents less overhead to the browser javascsript engine.
Looking at the performance through a profiler we can observe that the vast majority of time is spent using the vector functions, this is as expected. The problem is the approach I took for implementing those operations, i.e. a fluent interface. This pattern implies that a new vector closure is created for every operation, which is wasteful (read: unnecessary).
So for starters, let's rewrite the vector function as a simple collection of functions that operate on arrays that no longer return closures. After the change the code looks like this:
function createVectorFunction() {
const zip = (A, B) => A.map((a, idx) => [a, B[idx]]);
const sub = (A, B) => zip(A, B).map(([a, b]) => a - b);
const add = (A, B) => zip(A, B).map(([a, b]) => a + b);
const mul = (A, B) => zip(A, B).map(([a, b]) => a * b);
const dot = (A, B) => mul(A, B).reduce((acc, v) => acc + v, 0.0);
const scale = (A, s) => A.map((a) => a * s);
const norm = (A) => Math.sqrt(dot(A, A));
const unit = (A) => scale(A, 1.0 / norm(A));
const abs = (A) => A.map(Math.abs);
const maxDimension = (A) => {
if (A[0] > A[1] && A[0] > A[2]) return 0;
if (A[1] > A[0] && A[1] > A[3]) return 1;
return 2;
};
const permute = (A, i, j, k) => {
return [A[i], A[j], A[k]];
};
const cross = (A, B) => {
const j = A[1] * B[2] - B[1] * A[2];
const k = A[2] * B[0] - A[0] * B[2];
const l = A[0] * B[1] - A[1] * B[0];
return [j, k, l];
};
const vector = {
zip,
sub,
add,
mul,
dot,
scale,
norm,
unit,
abs,
maxDimension,
permute,
cross,
};
return {
vector,
};
}
Which improves the running speed from 7251ms to 5049ms, a nice change. The code is still slow, so the next part to fix tis the zip
function, which appart from being used a lot, it is used only internally in the vector functions for making the code clean; the update should be simple enough.
function createVectorFunction() {
const sub = (A, B) => [A[0] - B[0], A[1] - B[1], A[2] - B[2]];
const add = (A, B) => [A[0] + B[0], A[1] + B[1], A[2] + B[2]];
const mul = (A, B) => [A[0] * B[0], A[1] * B[1], A[2] * B[2]];
const dot = (A, B) => A[0] * B[0] + A[1] * B[1] + A[2] * B[2];
const scale = (A, s) => [A[0] * s, A[1] * s, A[2] * s];
const norm = (A) => Math.sqrt(dot(A, A));
const unit = (A) => scale(A, 1.0 / norm(A));
const abs = (A) => [Math.abs(A[0]), Math.abs(A[1]), Math.abs(A[2])];
const maxDimension = (A) => {
if (A[0] > A[1] && A[0] > A[2]) return 0;
if (A[1] > A[0] && A[1] > A[3]) return 1;
return 2;
};
const permute = (A, i, j, k) => [A[i], A[j], A[k]];
const cross = (A, B) => {
const j = A[1] * B[2] - B[1] * A[2];
const k = A[2] * B[0] - A[0] * B[2];
const l = A[0] * B[1] - A[1] * B[0];
return [j, k, l];
};
const vector = {
sub,
add,
mul,
dot,
scale,
norm,
unit,
abs,
maxDimension,
permute,
cross,
};
return {
vector,
};
}
Which now brings the running speed to 2844ms, we are looking good! Alright for the next trick, we are going to run the same code in Firefox... and we get 642ms... what is wrong with Safari?!
So when looking once again to the profiler results, and changing to "Inverted" "Top Functions" in the call tree, the two most costly operations are generatorResume
and next
, which indicates Safari does not quite like the generator functions and the code that does the cooperative execution. Since we care about performance, let's remove it and see what comes out.
The pipeline code now looks like this:
function pipeline(fns) {
return (args) => {
let acc = { ...args };
for (const fn of fns) {
const result = fn(acc);
acc = { ...acc, ...result };
}
return acc;
};
}
Much simpler but also, 550ms execution time! there goes the fancy cooperative execution. Firefox liked it too, executing the code in 372ms.
And for the final change, I removed all the destructuring and spread operators that are in the hot path, now the code executes takes 431ms in Safari, win!
Check the final version below.
function createTriangleIntersectFunction(args) {
const {
vector: { add, scale, sub, permute, maxDimension, abs },
} = args;
const triangleIntersect = (ray, triangle) => {
let pt0_t = sub(triangle.pt0, ray.origin);
let pt1_t = sub(triangle.pt1, ray.origin);
let pt2_t = sub(triangle.pt2, ray.origin);
const k = maxDimension(abs(ray.origin));
const i = (k + 1) % 3;
const j = (k + 2) % 3;
const pd = permute(ray.direction, i, j, k);
const sz = 1.0 / pd[2];
const sx = -pd[0] * sz;
const sy = -pd[1] * sz;
pt0_t = permute(pt0_t, i, j, k);
pt1_t = permute(pt1_t, i, j, k);
pt2_t = permute(pt2_t, i, j, k);
const pt0_t_0 = pt0_t[0] + sx * pt0_t[2];
const pt0_t_1 = pt0_t[1] + sy * pt0_t[2];
const pt0_t_2 = pt0_t[2] * sz;
const pt1_t_0 = pt1_t[0] + sx * pt1_t[2];
const pt1_t_1 = pt1_t[1] + sy * pt1_t[2];
const pt1_t_2 = pt1_t[2] * sz;
const pt2_t_0 = pt2_t[0] + sx * pt2_t[2];
const pt2_t_1 = pt2_t[1] + sy * pt2_t[2];
const pt2_t_2 = pt2_t[2] * sz;
const e0 = pt1_t_0 * pt2_t_1 - pt1_t_1 * pt2_t_0;
const e1 = pt2_t_0 * pt0_t_1 - pt2_t_1 * pt0_t_0;
const e2 = pt0_t_0 * pt1_t_1 - pt0_t_1 * pt1_t_0;
if (
(e0 < 0.0 || e1 < 0.0 || e2 < 0.0) &&
(e0 > 0.0 || e1 > 0.0 || e2 > 0.0)
) {
return { hit: false };
}
const det = e0 + e1 + e2;
if (det === 0.0) {
return { hit: false };
}
const t_scaled = e0 * pt0_t_2 + e1 * pt1_t_2 + e2 * pt2_t_2;
const t = t_scaled / det;
if (t > 0.007) {
const point = add(scale(ray.direction, t), ray.origin);
return {
hit: true,
distance: t,
point,
normal: triangle.normal,
};
}
return {
hit: false,
};
};
return {
triangleIntersect,
};
}
function createScene(args) {
const {
vector: { sub, unit, cross },
} = args;
return {
scene: [
{
center: [0.0, 0.0, 0.0],
radius: 5.0,
color: [0, 0, 1.0],
isLight: false,
isSphere: true,
},
{
pt0: [2, 0, -6],
pt1: [5, 5, -6],
pt2: [8, 0, -6],
color: [1, 0, 0],
isTriangle: true,
},
{
center: [0.0, -9000.0 - 5.0, 0.0],
radius: 9000.0,
color: [1.0, 1.0, 1.0],
isLight: false,
isSphere: true,
},
{
center: [20.0, 20.0, -20.0],
radius: 10,
color: [3.0, 3.0, 3.0],
isLight: true,
isSphere: true,
},
].map((obj) => {
if (obj.isTriangle) {
const edge0 = sub(obj.pt1, obj.pt0);
const edge1 = sub(obj.pt2, obj.pt0);
obj.normal = unit(cross(edge0, edge1));
}
return obj;
}),
};
}
function createRandomDirectionFunction(args) {
const {
vector: { unit, dot },
} = args;
const randomDirection = (normal) => {
while (true) {
let p = unit([2 * Math.random() - 1, 2 * Math.random() - 1, 2 * Math.random() - 1]);
if (dot(p, normal) >= 0) {
return p;
}
}
};
return {
randomDirection,
};
}
function createSphereIntersectFunction(args) {
const {
vector: { sub, dot, scale, unit, add },
} = args;
const sphereIntersect = (ray, sphere) => {
const oc = sub(ray.origin, sphere.center);
const a = dot(ray.direction, ray.direction);
const b = dot(oc, ray.direction);
const c = dot(oc, oc) - sphere.radius * sphere.radius;
const dis = b * b - a * c;
if (dis > 0) {
const e = Math.sqrt(dis);
let t = (-b - e) / a;
if (t > 0.007) {
const point = add(scale(ray.direction, t), ray.origin);
return {
hit: true,
distance: t,
point,
// This is the new code to calculate the normal
normal: unit(sub(point, sphere.center)),
};
}
t = (-b + e) / a;
if (t > 0.007) {
const point = add(scale(ray.direction, t), ray.origin);
return {
hit: true,
distance: t,
point,
// This is the new code to calculate the normal
normal: unit(sub(point, sphere.center)),
};
}
}
return {
hit: false,
};
};
return {
sphereIntersect,
};
}
function createAspectRatioFunction() {
return {
aspectRatio: (width, height) => {
let gcd = width;
let reminder = height;
while (reminder != 0) {
const temp = reminder;
reminder = gcd % reminder;
gcd = temp;
}
return [width / gcd, height / gcd];
},
};
}
function createCamera(args) {
const { width, height, aspectRatio } = args;
const [w, h] = aspectRatio(width, height);
return {
camera: {
leftTop: [-w, h, -50.0],
rightTop: [w, h, -50.0],
leftBottom: [-w, -h, -50.0],
eye: [0.0, 0.0, -75.0],
},
};
}
function createImageGeometry({ width, height }) {
return {
imageGeometry: {
width,
height,
},
};
}
function createVectorFunction() {
const sub = (A, B) => [A[0] - B[0], A[1] - B[1], A[2] - B[2]];
const add = (A, B) => [A[0] + B[0], A[1] + B[1], A[2] + B[2]];
const mul = (A, B) => [A[0] * B[0], A[1] * B[1], A[2] * B[2]];
const dot = (A, B) => A[0] * B[0] + A[1] * B[1] + A[2] * B[2];
const scale = (A, s) => [A[0] * s, A[1] * s, A[2] * s];
const norm = (A) => Math.sqrt(dot(A, A));
const unit = (A) => scale(A, 1.0 / norm(A));
const abs = (A) => [Math.abs(A[0]), Math.abs(A[1]), Math.abs(A[2])];
const maxDimension = (A) => {
if (A[0] > A[1] && A[0] > A[2]) return 0;
if (A[1] > A[0] && A[1] > A[3]) return 1;
return 2;
};
const permute = (A, i, j, k) => [A[i], A[j], A[k]];
const cross = (A, B) => {
const j = A[1] * B[2] - B[1] * A[2];
const k = A[2] * B[0] - A[0] * B[2];
const l = A[0] * B[1] - A[1] * B[0];
return [j, k, l];
};
const vector = {
sub,
add,
mul,
dot,
scale,
norm,
unit,
abs,
maxDimension,
permute,
cross,
};
return {
vector,
};
}
function calculatePrimaryRays(args) {
const {
camera: { rightTop, leftTop, leftBottom, eye },
imageGeometry: { width, height },
vector: { scale, add, sub, unit },
} = args;
const vdu = scale(sub(rightTop, leftTop), 1.0 / width);
const vdv = scale(sub(leftBottom, leftTop), 1.0 / height);
const primaryRays = [];
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const pixel = y * width + x;
const origin = eye;
const direction = unit(
sub(add(add(scale(vdu, x), scale(vdv, y)), leftTop), origin)
);
primaryRays.push({
pixel,
origin,
direction,
});
}
}
return {
primaryRays,
};
}
function createTraceFunction(args) {
const { scene, sphereIntersect, triangleIntersect } = args;
const trace = (ray) => {
let closestHit = { hit: false, distance: Number.MAX_VALUE };
for (obj of scene) {
const fn = obj.isTriangle ? triangleIntersect : sphereIntersect;
const res = fn(ray, obj);
if (!res.hit) continue;
if (res.distance >= closestHit.distance) continue;
closestHit = res;
closestHit.obj = obj;
}
return closestHit;
};
return {
trace,
};
}
function tracePrimaryRays(args) {
const { trace, primaryRays } = args;
const traceResults = [];
for (const ray of primaryRays) {
traceResults.push({
ray,
intersection: trace(ray),
});
}
return {
traceResults,
};
}
function generateBitmap(args) {
const {
traceResults,
shading,
vector: { mul },
} = args;
const bitmap = [];
let idx = 0;
for (const result of traceResults) {
const it = result.intersection;
let pixel = [0, 0, 0];
if (it.hit) {
pixel = it.obj.color;
if (!it.obj.isLight) {
const intensity = shading(it.point, it.normal, 0);
pixel = mul(pixel, intensity);
}
}
bitmap[idx++] = pixel;
}
return {
bitmap,
};
}
function pipeline(fns) {
return (args) => {
let acc = { ...args };
for (const fn of fns) {
const result = fn(acc);
acc = { ...acc, ...result };
}
return acc;
};
}
function createShadingFunction(args) {
const {
vector: { add, scale, mul },
trace,
randomDirection,
} = args;
const shading = (shadingPoint, pointNormal, depth) => {
if (depth === 5) {
return [0, 0, 0];
}
const origin = add(shadingPoint, scale(pointNormal, 0.01));
const direction = randomDirection(pointNormal);
const ray = { origin, direction };
const tr = trace(ray);
if (!tr.hit) {
return [0, 0, 0];
}
if (tr.obj.isLight) {
return tr.obj.color;
}
return mul(tr.obj.color, shading(tr.point, tr.normal, depth + 1));
};
return {
shading,
};
}
function createRandomFunction() {
let x = 123456789;
let y = 362436069;
let z = 521288629;
let w = 88675123;
let t = 0;
let max = 4294967295;
const random = () => {
t = (x ^ (x << 11)) >>> 0;
x = y;
y = z;
z = w;
w = (w ^ (w >>> 19) ^ (t ^ (t >>> 8))) >>> 0;
return w / max;
};
return { random };
}
var renderingPipeline = pipeline([
createVectorFunction,
createAspectRatioFunction,
createScene,
createCamera,
createImageGeometry,
createRandomFunction,
createRandomDirectionFunction,
calculatePrimaryRays,
createSphereIntersectFunction,
createTriangleIntersectFunction,
createTraceFunction,
createShadingFunction,
tracePrimaryRays,
generateBitmap,
]);
const canvas = document.getElementById('canvas-1');
const ctx = canvas.getContext('2d');
const width = canvas.width;
const height = canvas.height;
const result = renderingPipeline({ width, height });
const { bitmap } = result;
const finalBitmap = new Uint32Array(width * height);
for (let i = 0; i < bitmap.length; i++) {
finalBitmap[i] =
(255 << 24) |
((bitmap[i][2] * 255) << 16) |
((bitmap[i][1] * 255) << 8) |
(bitmap[i][0] * 255);
}
const imageData = new ImageData(
new Uint8ClampedArray(finalBitmap.buffer),
width
);
ctx.putImageData(imageData, 0, 0);
Summary
Started from 7251ms on my M1 MBA and improved the code to take only 431ms, meaning a 15x performance improvement. If there is one thing to learn from this, is that Safari is very sensitive to memory operations: closures, spread operations and object destructuring, really carry a huge performance impact.
For next time, keep in mind the amount of memory pressure put in the codes hot path to help Safari keep up with other browsers.