This tutorial shows how to create as much geometry as possible in a finite amount of RAM in a procedural.

Procedurally generated geometry for rendering in Arnold goes through 3 phases of RAM consumption. The first phase is the generation of data, next is the filling of Arnold's data structures, and last is the ray acceleration data structure creation (the Bounding Volume Hierarchy, or BVH, is usually the largest). When this is all done, the actual rendering begins, which does not use much more RAM. Directly building arrays of data into Arnold's data structures is the most efficient method, if it is possible. Users do not have control over RAM consumption of the BVH.

The Mandelbulb

In our example, we are generating a "Mandelbulb" 3D version of the Mandelbrot and Julia sets. This algorithm involves iterating a function (Z^n+C) and seeing if it exits a sphere of radius 2; if it stays inside for a set number of steps, it is considered a "prisoner point" and a small sphere is put there. Rendering a Mandelbulb as a dense grid of spheres is neither the most elegant nor the most efficient way to display this mathematical entity; we are just using this as a method to create large amounts of data for the purposes of this tutorial.

 

This animation shows the sphere size in a closeup on a Mandelbulb made from approximately 1/4 of a billion spheres. It was rendered on a laptop computer in 8 GB of RAM:

Breaking Generation into Chunks

It is not possible to know how many spheres we will have when we are done, so we cannot fill Arnold's arrays directly; instead, we fill a linked list and then use that to fill the arrays in a second pass. If we fill RAM with a giant linked list of all spheres, we would then need to allocate an array for Arnold to copy the data into; we would only be able to use half of the RAM in a system using this method. Instead, in our example, we break the task into smaller chunks, with each chunk filling a linked list, allocating an array, and then deleting the linked list. This allows us to fill our RAM to the top with renderable geometry. For our example, we simply broke the Mandelbulb up into slabs on the X-axis.

Multi-Threading

Running the math that generates the points takes CPU power, and many modern systems have access to multiple CPUs on a single system. In order to fill the RAM as efficiently as possible, we take each of our chunks and break it into sub-chunks, allowing a single CPU to fill each of those in parallel, using Arnold's AiThreadCreate(). There are numerous other accelerations that can be incorporated into this, such as SIMD sin/cos functions or possibly offloading computations to a GPU or other methods, but we leave these types of optimizations out of our example.

Source Code

Below is the source code for the procedural:

mandelbulb.cpp
#include <ai.h>
#include <cstring>
#include <cstdlib>
#include <vector>
 
using namespace std;
 
// procedural parameters
struct Mandelbulb
{
   int      grid_size;       // sample grid resolution
   int      max_iterations;  // max Z^n+C iterations to try
   float    power;           // exponent, the "n" in Z^n+C
   float    sphere_scale;    // scales the spheres
   float    orbit_threshold; // clears out a hollow center in the set
   int      chunks;          // number of "chunks" for RAM management
   int      threads;         // number of threads to use
   bool     julia;           // mandelbrot/julia set switch
   AtVector julia_C;         // C value for julia sets (unused for mandelbrot)
   int counter;
};
 
// returns Z^n + C
// for more info: http://www.skytopia.com/project/fractal/2mandelbulb.html#formula
// this function is called often and is not very fast, 
// this would be an obvious place to add optimizations, 
// such as SIMD sin() functions or whatnot
static AtVector iterate(AtVector Z, float n, AtVector C)
{
   AtVector Zsquared;
   float r2 = AiV3Dot(Z, Z);
   float theta = atan2f(sqrtf(Z.x * Z.x + Z.y * Z.y), Z.z);
   float phi = atan2f(Z.y, Z.x);
   float r_n;
   if (n == 8)
      r_n = r2 * r2;
   else
      r_n = powf(r2,n*.5f);
   const float sin_thetan = sinf(theta * n);
   
   Zsquared.x = r_n * sin_thetan * cosf(phi * n);
   Zsquared.y = r_n * sin_thetan * sinf(phi * n);
   Zsquared.z = r_n * cosf(theta * n);
   return Zsquared + C;
}
 
AI_PROCEDURAL_NODE_EXPORT_METHODS(MandelbulbMtd);
 
node_parameters
{
   AiParameterInt("grid_size"      , 1000);
   AiParameterInt("max_iterations" , 10);
   AiParameterFlt("power"          , 8);
   AiParameterFlt("sphere_scale"   , 1);
   AiParameterFlt("orbit_threshold", 0.05);
   AiParameterInt("chunks"         , 30);
   AiParameterInt("threads"        , 4);
   AiParameterBool("julia"         , false);
   AiParameterVec("julia_C"        , 0, 0, 0);
}
 
// we read the UI parameters into their global vars
procedural_init
{
   Mandelbulb *bulb = new Mandelbulb();
   *user_ptr = bulb;
 
   bulb->grid_size = AiNodeGetInt(node, "grid_size");
   bulb->max_iterations = AiNodeGetInt(node, "max_iterations");
   bulb->power = AiNodeGetFlt(node, "power");
   bulb->sphere_scale = AiNodeGetFlt(node, "sphere_scale");
   bulb->orbit_threshold = AiSqr(AiNodeGetFlt(node, "orbit_threshold"));
   bulb->chunks = AiNodeGetInt(node, "chunks");
   bulb->threads = AiClamp(AiNodeGetInt(node, "threads"), 1, AI_MAX_THREADS);
   bulb->julia = AiNodeGetBool(node, "julia");
   bulb->julia_C = AiNodeGetVec(node, "julia_C");
   bulb->counter = 0;
 
   return true;
}
 
procedural_cleanup
{
   Mandelbulb *bulb = (Mandelbulb*)user_ptr;
   delete bulb;
   return true;
}
 
// we will create one node per chunk as set in the UI
procedural_num_nodes
{
   Mandelbulb *bulb = (Mandelbulb*)user_ptr;
   return bulb->chunks;
}
 
// this is the function that gets run on each thread
// the function (Z^n+C) is sampled at all points in a regular grid
// prisoner points with an orbit greater than bulb->orbit_threshold are added
// the total grid is broken into bulb->chnks number of slabs on the X axis
// each slab is broken into bulb->threads number of sub slabs
// and the start and end value in X is passed in and that section is sampled
void fillList(Mandelbulb *bulb, int start, int end, int chunknum, vector<AtVector>& list)
{
   float inv_grid_size = 1.0f / bulb->grid_size;
   // these vars are for a crude counter for percent completed
   unsigned int modder = static_cast<unsigned int>(float(end-start) / 5);
   int levelcount = 0;
   int percent = 0;
   int localcounter = 0;
 
   // only samples X in the range "start" to "end"
   for (int X = start; X < end; X++) {
      levelcount++;
      // echo out some completion info
      if (modder > 0) {
         if ((levelcount%modder) ==0 ) {
            percent += 10;
            AiMsgInfo("[mandelbulb] %d percent of chunk %d", percent, chunknum);
         }
      }
 
      // samples all points in Y and Z
      for (int Y = 0; Y < bulb->grid_size; Y++) {
         for (int Z = 0; Z < bulb->grid_size; Z++) {
            AtVector sample;
            sample.x = (X * inv_grid_size - 0.5f) * 2.5f;
            sample.y = (Y * inv_grid_size - 0.5f) * 2.5f;
            sample.z = (Z * inv_grid_size - 0.5f) * 2.5f;
            // init the iterator
            AtVector iterator = sample;
            // now iterate the Z^n+C function bulb->max_iterations number of times
            for (int iter = 0; iter < bulb->max_iterations; iter++) {
               if (AiV3Dot(iterator,iterator) > 4)
                  break; //orbit has left the max radius of 2....
               if (bulb->julia) {
                  // each UI value of C creates a full Julia set
                  iterator = iterate(iterator,bulb->power,bulb->julia_C);
               } else {
                  // Mandelbrot set is the centerpoints of all Julia sets
                  iterator = iterate(iterator,bulb->power,sample);
               }
            }
 
            // tiny orbits are usually inside the set, disallow them.
            bool allowit = AiV3Dist2(sample,iterator) >= bulb->orbit_threshold;
 
            // if the orbit is inside radius 2
            // and its endpoint travelled greater than orbittthresh
            if (AiV3Dot(iterator, iterator) < 4 && allowit) {
               bulb->counter++; // increment global counter
               localcounter++; // increment local counter
               // this is a prisoner point, add it to the set
               list.push_back(sample);
            }
         }
      }
   }
 
   AiMsgInfo("[mandelbulb] finished 1 thread of chunk %d, new total new points %d", chunknum, localcounter);
}
 
// this builds the "points" node in Arnold and sets
// the point locations, radius, and sets it to sphere mode
static AtNode *build_node(const AtNode *parent, Mandelbulb *bulb, vector<AtVector>& list)
{
   AtArray *pointarr = AiArrayConvert(list.size(), 1, AI_TYPE_VECTOR, &list[0]);
   vector<AtVector>().swap(list); // clear data used by points vector.
   AtNode *currentInstance = AiNode("points", "mandelbulb", parent); // initialize node as child of procedural
   AiNodeSetArray(currentInstance, "points", pointarr);
   AiNodeSetFlt(currentInstance, "radius", (2.0f/bulb->grid_size) * bulb->sphere_scale);
   AiNodeSetInt(currentInstance, "mode", 1);
   return currentInstance;
}
 
// a data structure to hold the arguments for the thread
// corresponds to the arguments to the fillList() function
struct ThreadArgs {
   Mandelbulb *bulb;
   int start;
   int end;
   int i;
   vector<AtVector> list;
};
 
// a function to be passed for the thread to execute
// basically a wrapper to the fillList() function
unsigned int threadloop(void *pointer)
{
   ThreadArgs *thread_args = (ThreadArgs*) pointer;
   fillList(thread_args->bulb, thread_args->start, thread_args->end, thread_args->i, thread_args->list);
   return 0;
}
 
// this is the function that Arnold calls to request the nodes
// that this procedural creates.
procedural_get_node
{
   Mandelbulb *bulb = (Mandelbulb*)user_ptr;
 
   // determine the start and end point of this chunk
   float chunksize = float(bulb->grid_size) / float(bulb->chunks);
   int start = static_cast<int>(i*chunksize);
   int end = static_cast<int>((i+1)*chunksize);
   if (end>bulb->grid_size)
      end = bulb->grid_size;
   float range = end - start;
 
   // make an array of arguments for the threads
   vector<ThreadArgs> thread_args(bulb->threads);
   vector<void*> threads(bulb->threads);
 
   // now loop through and launch the threads
   for (int tnum = 0; tnum < bulb->threads; tnum++) {
      // figure out the threads start and end points for the sub-chunks
      int tstart = start + static_cast<int>((range/bulb->threads)*tnum);
      int tend = start + static_cast<int>((range/bulb->threads)*(tnum+1));
      thread_args[tnum].start = tstart;
      thread_args[tnum].end = tend;
      thread_args[tnum].i = i;
      thread_args[tnum].bulb = bulb;
      threads[tnum] = AiThreadCreate(threadloop, &thread_args[tnum], 0);
   }
 
   // using AiThreadWait, wait 'til the threads finish
   size_t listlength = 0;
   for (int tnum = 0; tnum < bulb->threads; tnum++) {
      AiThreadWait(threads[tnum]);
      // sum up the length of all threads lists
      listlength += thread_args[tnum].list.size();
   }
 
   // a vector to hold all the point data
   vector<AtVector> allpoints;
   allpoints.reserve(listlength);
 
   // concatenate all the vectors returned by the threads
   for (int tnum = 0; tnum < bulb->threads; tnum++) {
      allpoints.insert(allpoints.end(), thread_args[tnum].list.begin(), thread_args[tnum].list.end());
      vector<AtVector>().swap(thread_args[tnum].list); // clear data
   }
 
   for (int k = 0; k < bulb->threads; k++)
      AiThreadClose(threads[k]);
 
   AiMsgInfo("[mandelbulb] total sphere count: %d", bulb->counter);
 
   // if it's empty, return a null and Arnold handles it well.
   // passing a node with no geometry causes errors.
   if (listlength == 0)
      return NULL;
 
   // build the AtNode
   return build_node(node, bulb, allpoints);
}
 
node_loader
{
   if (i>0)
      return false;
   node->methods      = MandelbulbMtd;
   node->output_type  = AI_TYPE_NONE;
   node->name         = "mandelbulb";
   node->node_type    = AI_NODE_SHAPE_PROCEDURAL;
   strcpy(node->version, AI_VERSION);
   return true;
}

Example Scene

The following .ass file generates an image similar to the one above:

mandelbulb.ass
options
{
 AA_samples 9
 outputs "RGBA RGBA /out/arnold1:gaussian_filter /out/arnold1:jpeg"
 xres 960
 yres 540
 GI_diffuse_depth 1
 GI_specular_depth 1
 GI_diffuse_samples 3
}
 
driver_jpeg
{
 name /out/arnold1:jpeg
 filename "mandelbulb.jpg"
}
 
gaussian_filter
{
 name /out/arnold1:gaussian_filter
}
 
persp_camera
{
 name /obj/cam1
 fov 54.512329
 focus_distance 1 2 FLOAT 3.0099871 3.01
 aperture_size 0.004
 aperture_blades 5
 matrix 1 2 MATRIX
  0.9011746 0.010277364 0.43333444 0
  0.15803696 0.92311317 -0.35055155 0
  -0.4036195 0.38439101 0.83026195 0
  -1.38442 1.3010246 2.6316857 1
 
  0.90098524 0.010277364 0.43372801 0
  0.15819006 0.92311317 -0.35048249 0
  -0.40398207 0.38439101 0.83008558 0
  -1.386554 1.3010246 2.6282051 1
 shutter_start 0.25
 shutter_end 0.75
}
 
skydome_light
{
 name mysky
 intensity 1
}
 
utility
{
 name /shop/utility1
 shade_mode "lambert"
 color 0.5 0.5 0.5
}
 
plane
{
 name /obj/FLOOR:/shop/utility1:plane_0
 point 0.0 -0.5 0.0
 normal 0.0 1.0 0.0
 shader "/shop/utility1"
}
 
standard_surface
{
 name /shop/standard1
 base 0.9
 base_color 0.7 0.7 0.7
 specular_roughness 0.167138
}
 
mandelbulb
{
 name /obj/MandelJulia_Procedural1
 shader "/shop/standard1"
 
 grid_size 1600
 max_iterations 10
 power 8
 sphere_scale 1
 orbit_threshold 0.05
 chunks 30
 threads 16
 julia off
 julia_C -0.161224 1.04 0.183673
}
 
point_light
{
 name /obj/arnold_light2
 radius 5
 matrix
  1 0 0 0
  0 1 0 0
  0 0 1 0
  -10 10 0 1
 color 1 0.5996 0.076
 intensity 300
 samples 2
}

 

 

Here is a high-resolution, 2.5 billion sphere render by Thiago Ize and some close-ups rendered by Lee Griggs.

 

 

  • No labels