diff --git a/3rdparty/meshoptimizer/src/clusterizer.cpp b/3rdparty/meshoptimizer/src/clusterizer.cpp index b01342781..26d2fb11c 100644 --- a/3rdparty/meshoptimizer/src/clusterizer.cpp +++ b/3rdparty/meshoptimizer/src/clusterizer.cpp @@ -13,12 +13,16 @@ namespace meshopt { -// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet -const size_t kMeshletMaxVertices = 255; +// This must be <= 256 since meshlet indices are stored as bytes +const size_t kMeshletMaxVertices = 256; // A reasonable limit is around 2*max_vertices or less const size_t kMeshletMaxTriangles = 512; +// We keep a limited number of seed triangles and add a few triangles per finished meshlet +const size_t kMeshletMaxSeeds = 256; +const size_t kMeshletAddSeeds = 4; + struct TriangleAdjacency2 { unsigned int* counts; @@ -97,10 +101,9 @@ static void buildTriangleAdjacencySparse(TriangleAdjacency2& adjacency, const un for (size_t i = 0; i < index_count; ++i) adjacency.counts[indices[i]]++; - // fill offset table + // fill offset table; uses sparse_seen bit to tag visited vertices unsigned int offset = 0; - // when using sparse mode this pass uses sparse_seen bit to tag visited vertices for (size_t i = 0; i < index_count; ++i) { unsigned int v = indices[i]; @@ -126,7 +129,7 @@ static void buildTriangleAdjacencySparse(TriangleAdjacency2& adjacency, const un } // fix offsets that have been disturbed by the previous pass - // when using sparse mode this pass also fixes counts (that were marked with sparse_seen) + // also fix counts (that were marked with sparse_seen by the first pass) for (size_t i = 0; i < index_count; ++i) { unsigned int v = indices[i]; @@ -141,67 +144,82 @@ static void buildTriangleAdjacencySparse(TriangleAdjacency2& adjacency, const un } } -static void computeBoundingSphere(float result[4], const float points[][3], size_t count) +static void computeBoundingSphere(float result[4], const float* points, size_t count, size_t points_stride, const float* radii, size_t radii_stride) { assert(count > 0); + size_t points_stride_float = points_stride / sizeof(float); + size_t radii_stride_float = radii_stride / sizeof(float); + // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates size_t pmin[3] = {0, 0, 0}; size_t pmax[3] = {0, 0, 0}; for (size_t i = 0; i < count; ++i) { - const float* p = points[i]; + const float* p = points + i * points_stride_float; + float r = radii[i * radii_stride_float]; for (int axis = 0; axis < 3; ++axis) { - pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis]; - pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis]; + float bmin = points[pmin[axis] * points_stride_float + axis] - radii[pmin[axis] * radii_stride_float]; + float bmax = points[pmax[axis] * points_stride_float + axis] + radii[pmax[axis] * radii_stride_float]; + + pmin[axis] = (p[axis] - r < bmin) ? i : pmin[axis]; + pmax[axis] = (p[axis] + r > bmax) ? i : pmax[axis]; } } // find the pair of points with largest distance - float paxisd2 = 0; int paxis = 0; + float paxisdr = 0; for (int axis = 0; axis < 3; ++axis) { - const float* p1 = points[pmin[axis]]; - const float* p2 = points[pmax[axis]]; + const float* p1 = points + pmin[axis] * points_stride_float; + const float* p2 = points + pmax[axis] * points_stride_float; + float r1 = radii[pmin[axis] * radii_stride_float]; + float r2 = radii[pmax[axis] * radii_stride_float]; float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]); + float dr = sqrtf(d2) + r1 + r2; - if (d2 > paxisd2) + if (dr > paxisdr) { - paxisd2 = d2; + paxisdr = dr; paxis = axis; } } // use the longest segment as the initial sphere diameter - const float* p1 = points[pmin[paxis]]; - const float* p2 = points[pmax[paxis]]; + const float* p1 = points + pmin[paxis] * points_stride_float; + const float* p2 = points + pmax[paxis] * points_stride_float; + float r1 = radii[pmin[paxis] * radii_stride_float]; + float r2 = radii[pmax[paxis] * radii_stride_float]; - float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2}; - float radius = sqrtf(paxisd2) / 2; + float paxisd = sqrtf((p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2])); + float paxisk = paxisd > 0 ? (paxisd + r2 - r1) / (2 * paxisd) : 0.f; + + float center[3] = {p1[0] + (p2[0] - p1[0]) * paxisk, p1[1] + (p2[1] - p1[1]) * paxisk, p1[2] + (p2[2] - p1[2]) * paxisk}; + float radius = paxisdr / 2; // iteratively adjust the sphere up until all points fit for (size_t i = 0; i < count; ++i) { - const float* p = points[i]; + const float* p = points + i * points_stride_float; + float r = radii[i * radii_stride_float]; + float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]); + float d = sqrtf(d2); - if (d2 > radius * radius) + if (d + r > radius) { - float d = sqrtf(d2); - assert(d > 0); + float k = d > 0 ? (d + r - radius) / (2 * d) : 0.f; - float k = 0.5f + (radius / d) / 2; - - center[0] = center[0] * k + p[0] * (1 - k); - center[1] = center[1] * k + p[1] * (1 - k); - center[2] = center[2] * k + p[2] * (1 - k); - radius = (radius + d) / 2; + center[0] += k * (p[0] - center[0]); + center[1] += k * (p[1] - center[1]); + center[2] += k * (p[2] - center[2]); + radius = (radius + d + r) / 2; } } @@ -217,12 +235,25 @@ struct Cone float nx, ny, nz; }; -static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius) +static float getDistance(float dx, float dy, float dz, bool aa) { + if (!aa) + return sqrtf(dx * dx + dy * dy + dz * dz); + + float rx = fabsf(dx), ry = fabsf(dy), rz = fabsf(dz); + float rxy = rx > ry ? rx : ry; + return rxy > rz ? rxy : rz; +} + +static float getMeshletScore(float distance, float spread, float cone_weight, float expected_radius) +{ + if (cone_weight < 0) + return 1 + distance / expected_radius; + float cone = 1.f - spread * cone_weight; float cone_clamped = cone < 1e-3f ? 1e-3f : cone; - return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped; + return (1 + distance / expected_radius * (1 - cone_weight)) * cone_clamped; } static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count) @@ -296,22 +327,22 @@ static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_trian meshlet_triangles[offset++] = 0; } -static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles) +static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, short* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles, bool split = false) { - unsigned char& av = used[a]; - unsigned char& bv = used[b]; - unsigned char& cv = used[c]; + short& av = used[a]; + short& bv = used[b]; + short& cv = used[c]; bool result = false; - int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + int used_extra = (av < 0) + (bv < 0) + (cv < 0); - if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles || split) { meshlets[meshlet_offset] = meshlet; for (size_t j = 0; j < meshlet.vertex_count; ++j) - used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff; + used[meshlet_vertices[meshlet.vertex_offset + j]] = -1; finishMeshlet(meshlet, meshlet_triangles); @@ -323,33 +354,33 @@ static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int result = true; } - if (av == 0xff) + if (av < 0) { - av = (unsigned char)meshlet.vertex_count; + av = short(meshlet.vertex_count); meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a; } - if (bv == 0xff) + if (bv < 0) { - bv = (unsigned char)meshlet.vertex_count; + bv = short(meshlet.vertex_count); meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b; } - if (cv == 0xff) + if (cv < 0) { - cv = (unsigned char)meshlet.vertex_count; + cv = short(meshlet.vertex_count); meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c; } - meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av; - meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv; - meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = (unsigned char)av; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = (unsigned char)bv; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = (unsigned char)cv; meshlet.triangle_count++; return result; } -static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const unsigned char* used, float meshlet_expected_radius, float cone_weight) +static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone& meshlet_cone, const unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const short* used, float meshlet_expected_radius, float cone_weight) { unsigned int best_triangle = ~0u; int best_priority = 5; @@ -367,7 +398,7 @@ static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Co unsigned int triangle = neighbors[j]; unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; - int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); + int extra = (used[a] < 0) + (used[b] < 0) + (used[c] < 0); assert(extra <= 2); int priority = -1; @@ -389,27 +420,13 @@ static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Co if (priority > best_priority) continue; - float score = 0; + const Cone& tri_cone = triangles[triangle]; - // caller selects one of two scoring functions: geometrical (based on meshlet cone) or topological (based on remaining triangles) - if (meshlet_cone) - { - const Cone& tri_cone = triangles[triangle]; + float dx = tri_cone.px - meshlet_cone.px, dy = tri_cone.py - meshlet_cone.py, dz = tri_cone.pz - meshlet_cone.pz; + float distance = getDistance(dx, dy, dz, cone_weight < 0); + float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz; - float distance2 = - (tri_cone.px - meshlet_cone->px) * (tri_cone.px - meshlet_cone->px) + - (tri_cone.py - meshlet_cone->py) * (tri_cone.py - meshlet_cone->py) + - (tri_cone.pz - meshlet_cone->pz) * (tri_cone.pz - meshlet_cone->pz); - - float spread = tri_cone.nx * meshlet_cone->nx + tri_cone.ny * meshlet_cone->ny + tri_cone.nz * meshlet_cone->nz; - - score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); - } - else - { - // each live_triangles entry is >= 1 since it includes the current triangle we're processing - score = float(live_triangles[a] + live_triangles[b] + live_triangles[c] - 3); - } + float score = getMeshletScore(distance, spread, cone_weight, meshlet_expected_radius); // note that topology-based priority is always more important than the score // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost @@ -425,6 +442,113 @@ static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Co return best_triangle; } +static size_t appendSeedTriangles(unsigned int* seeds, const meshopt_Meshlet& meshlet, const unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, float cornerx, float cornery, float cornerz) +{ + unsigned int best_seeds[kMeshletAddSeeds]; + unsigned int best_live[kMeshletAddSeeds]; + float best_score[kMeshletAddSeeds]; + + for (size_t i = 0; i < kMeshletAddSeeds; ++i) + { + best_seeds[i] = ~0u; + best_live[i] = ~0u; + best_score[i] = FLT_MAX; + } + + for (size_t i = 0; i < meshlet.vertex_count; ++i) + { + unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; + + unsigned int best_neighbor = ~0u; + unsigned int best_neighbor_live = ~0u; + + // find the neighbor with the smallest live metric + unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbors_size = adjacency.counts[index]; + + for (size_t j = 0; j < neighbors_size; ++j) + { + unsigned int triangle = neighbors[j]; + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + + unsigned int live = live_triangles[a] + live_triangles[b] + live_triangles[c]; + + if (live < best_neighbor_live) + { + best_neighbor = triangle; + best_neighbor_live = live; + } + } + + // add the neighbor to the list of seeds; the list is unsorted and the replacement criteria is approximate + if (best_neighbor == ~0u) + continue; + + float best_neighbor_score = getDistance(triangles[best_neighbor].px - cornerx, triangles[best_neighbor].py - cornery, triangles[best_neighbor].pz - cornerz, false); + + for (size_t j = 0; j < kMeshletAddSeeds; ++j) + { + // non-strict comparison reduces the number of duplicate seeds (triangles adjacent to multiple vertices) + if (best_neighbor_live < best_live[j] || (best_neighbor_live == best_live[j] && best_neighbor_score <= best_score[j])) + { + best_seeds[j] = best_neighbor; + best_live[j] = best_neighbor_live; + best_score[j] = best_neighbor_score; + break; + } + } + } + + // add surviving seeds to the meshlet + size_t seed_count = 0; + + for (size_t i = 0; i < kMeshletAddSeeds; ++i) + if (best_seeds[i] != ~0u) + seeds[seed_count++] = best_seeds[i]; + + return seed_count; +} + +static size_t pruneSeedTriangles(unsigned int* seeds, size_t seed_count, const unsigned char* emitted_flags) +{ + size_t result = 0; + + for (size_t i = 0; i < seed_count; ++i) + { + unsigned int index = seeds[i]; + + seeds[result] = index; + result += emitted_flags[index] == 0; + } + + return result; +} + +static unsigned int selectSeedTriangle(const unsigned int* seeds, size_t seed_count, const unsigned int* indices, const Cone* triangles, const unsigned int* live_triangles, float cornerx, float cornery, float cornerz) +{ + unsigned int best_seed = ~0u; + unsigned int best_live = ~0u; + float best_score = FLT_MAX; + + for (size_t i = 0; i < seed_count; ++i) + { + unsigned int index = seeds[i]; + unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2]; + + unsigned int live = live_triangles[a] + live_triangles[b] + live_triangles[c]; + float score = getDistance(triangles[index].px - cornerx, triangles[index].py - cornery, triangles[index].pz - cornerz, false); + + if (live < best_live || (live == best_live && score < best_score)) + { + best_seed = index; + best_live = live; + best_score = score; + } + } + + return best_seed; +} + struct KDNode { union @@ -533,7 +657,7 @@ static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size); } -static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit) +static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, bool aa, unsigned int& result, float& limit) { const KDNode& node = nodes[root]; @@ -549,11 +673,8 @@ static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, const float* point = points + index * stride; - float distance2 = - (point[0] - position[0]) * (point[0] - position[0]) + - (point[1] - position[1]) * (point[1] - position[1]) + - (point[2] - position[2]) * (point[2] - position[2]); - float distance = sqrtf(distance2); + float dx = point[0] - position[0], dy = point[1] - position[1], dz = point[2] - position[2]; + float distance = getDistance(dx, dy, dz, aa); if (distance < limit) { @@ -569,11 +690,11 @@ static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, unsigned int first = (delta <= 0) ? 0 : node.children; unsigned int second = first ^ node.children; - kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit); + kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, aa, result, limit); // only process the other node if it can have a match based on closest distance so far if (fabsf(delta) <= limit) - kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit); + kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, aa, result, limit); } } @@ -601,7 +722,7 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_ return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; } -size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor) { using namespace meshopt; @@ -610,10 +731,14 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve assert(vertex_positions_stride % sizeof(float) == 0); assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); - assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); - assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned + assert(min_triangles >= 1 && min_triangles <= max_triangles && max_triangles <= kMeshletMaxTriangles); + assert(min_triangles % 4 == 0 && max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned - assert(cone_weight >= 0 && cone_weight <= 1); + assert(cone_weight <= 1); // negative cone weight switches metric to optimize for axis-aligned meshlets + assert(split_factor >= 0); + + if (index_count == 0) + return 0; meshopt_Allocator allocator; @@ -647,9 +772,42 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve KDNode* nodes = allocator.allocate(face_count * 2); kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8); - // index of the vertex in the meshlet, 0xff if the vertex isn't used - unsigned char* used = allocator.allocate(vertex_count); - memset(used, -1, vertex_count); + // find a specific corner of the mesh to use as a starting point for meshlet flow + float cornerx = FLT_MAX, cornery = FLT_MAX, cornerz = FLT_MAX; + + for (size_t i = 0; i < face_count; ++i) + { + const Cone& tri = triangles[i]; + + cornerx = cornerx > tri.px ? tri.px : cornerx; + cornery = cornery > tri.py ? tri.py : cornery; + cornerz = cornerz > tri.pz ? tri.pz : cornerz; + } + + // index of the vertex in the meshlet, -1 if the vertex isn't used + short* used = allocator.allocate(vertex_count); + memset(used, -1, vertex_count * sizeof(short)); + + // initial seed triangle is the one closest to the corner + unsigned int initial_seed = ~0u; + float initial_score = FLT_MAX; + + for (size_t i = 0; i < face_count; ++i) + { + const Cone& tri = triangles[i]; + + float score = getDistance(tri.px - cornerx, tri.py - cornery, tri.pz - cornerz, false); + + if (initial_seed == ~0u || score < initial_score) + { + initial_seed = unsigned(i); + initial_score = score; + } + } + + // seed triangles to continue meshlet flow + unsigned int seeds[kMeshletMaxSeeds] = {}; + size_t seed_count = 0; meshopt_Meshlet meshlet = {}; size_t meshlet_offset = 0; @@ -660,35 +818,53 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve { Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count); - unsigned int best_triangle = getNeighborTriangle(meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight); - int best_extra = best_triangle == ~0u ? -1 : (used[indices[best_triangle * 3 + 0]] == 0xff) + (used[indices[best_triangle * 3 + 1]] == 0xff) + (used[indices[best_triangle * 3 + 2]] == 0xff); + unsigned int best_triangle = ~0u; - // if the best triangle doesn't fit into current meshlet, the spatial scoring we've used is not very meaningful, so we re-select using topological scoring - if (best_triangle != ~0u && (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles)) - { - best_triangle = getNeighborTriangle(meshlet, NULL, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0.f); - } + // for the first triangle, we don't have a meshlet cone yet, so we use the initial seed + // to continue the meshlet, we select an adjacent triangle based on connectivity and spatial scoring + if (meshlet_offset == 0 && meshlet.triangle_count == 0) + best_triangle = initial_seed; + else + best_triangle = getNeighborTriangle(meshlet, meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight); - // when we run out of neighboring triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity + bool split = false; + + // when we run out of adjacent triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity if (best_triangle == ~0u) { float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz}; unsigned int index = ~0u; - float limit = FLT_MAX; + float distance = FLT_MAX; - kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit); + kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, cone_weight < 0.f, index, distance); best_triangle = index; + split = meshlet.triangle_count >= min_triangles && split_factor > 0 && distance > meshlet_expected_radius * split_factor; } if (best_triangle == ~0u) break; + int best_extra = (used[indices[best_triangle * 3 + 0]] < 0) + (used[indices[best_triangle * 3 + 1]] < 0) + (used[indices[best_triangle * 3 + 2]] < 0); + + // if the best triangle doesn't fit into current meshlet, we re-select using seeds to maintain global flow + if (split || (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles)) + { + seed_count = pruneSeedTriangles(seeds, seed_count, emitted_flags); + seed_count = (seed_count + kMeshletAddSeeds <= kMeshletMaxSeeds) ? seed_count : kMeshletMaxSeeds - kMeshletAddSeeds; + seed_count += appendSeedTriangles(seeds + seed_count, meshlet, meshlet_vertices, indices, adjacency, triangles, live_triangles, cornerx, cornery, cornerz); + + unsigned int best_seed = selectSeedTriangle(seeds, seed_count, indices, triangles, live_triangles, cornerx, cornery, cornerz); + + // we may not find a valid seed triangle if the mesh is disconnected as seeds are based on adjacency + best_triangle = best_seed != ~0u ? best_seed : best_triangle; + } + unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2]; assert(a < vertex_count && b < vertex_count && c < vertex_count); // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds - if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles)) + if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles, split)) { meshlet_offset++; memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc)); @@ -725,6 +901,7 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve meshlet_cone_acc.ny += triangles[best_triangle].ny; meshlet_cone_acc.nz += triangles[best_triangle].nz; + assert(!emitted_flags[best_triangle]); emitted_flags[best_triangle] = 1; } @@ -735,10 +912,17 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve meshlets[meshlet_offset++] = meshlet; } - assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, min_triangles)); return meshlet_offset; } +size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +{ + assert(cone_weight >= 0); // to use negative cone weight, use meshopt_buildMeshletsFlex + + return meshopt_buildMeshletsFlex(meshlets, meshlet_vertices, meshlet_triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, max_triangles, cone_weight, 0.0f); +} + size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) { using namespace meshopt; @@ -751,9 +935,9 @@ size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshle meshopt_Allocator allocator; - // index of the vertex in the meshlet, 0xff if the vertex isn't used - unsigned char* used = allocator.allocate(vertex_count); - memset(used, -1, vertex_count); + // index of the vertex in the meshlet, -1 if the vertex isn't used + short* used = allocator.allocate(vertex_count); + memset(used, -1, vertex_count * sizeof(short)); meshopt_Meshlet meshlet = {}; size_t meshlet_offset = 0; @@ -834,15 +1018,17 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t if (triangles == 0) return bounds; + const float rzero = 0.f; + // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well float psphere[4] = {}; - computeBoundingSphere(psphere, corners[0], triangles * 3); + computeBoundingSphere(psphere, corners[0][0], triangles * 3, sizeof(float) * 3, &rzero, 0); float center[3] = {psphere[0], psphere[1], psphere[2]}; // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis float nsphere[4] = {}; - computeBoundingSphere(nsphere, normals, triangles); + computeBoundingSphere(nsphere, normals[0], triangles, sizeof(float) * 3, &rzero, 0); float axis[3] = {nsphere[0], nsphere[1], nsphere[2]}; float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); @@ -952,6 +1138,33 @@ meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); } +meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count, size_t positions_stride, const float* radii, size_t radii_stride) +{ + using namespace meshopt; + + assert(positions_stride >= 12 && positions_stride <= 256); + assert(positions_stride % sizeof(float) == 0); + assert((radii_stride >= 4 && radii_stride <= 256) || radii == NULL); + assert(radii_stride % sizeof(float) == 0); + + meshopt_Bounds bounds = {}; + + if (count == 0) + return bounds; + + const float rzero = 0.f; + + float psphere[4] = {}; + computeBoundingSphere(psphere, positions, count, positions_stride, radii ? radii : &rzero, radii ? radii_stride : 0); + + bounds.center[0] = psphere[0]; + bounds.center[1] = psphere[1]; + bounds.center[2] = psphere[2]; + bounds.radius = psphere[3]; + + return bounds; +} + void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count) { using namespace meshopt; @@ -1019,23 +1232,23 @@ void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* mesh // reorder meshlet vertices for access locality assuming index buffer is scanned sequentially unsigned int order[kMeshletMaxVertices]; - unsigned char remap[kMeshletMaxVertices]; - memset(remap, -1, vertex_count); + short remap[kMeshletMaxVertices]; + memset(remap, -1, vertex_count * sizeof(short)); size_t vertex_offset = 0; for (size_t i = 0; i < triangle_count * 3; ++i) { - unsigned char& r = remap[indices[i]]; + short& r = remap[indices[i]]; - if (r == 0xff) + if (r < 0) { - r = (unsigned char)(vertex_offset); + r = short(vertex_offset); order[vertex_offset] = vertices[indices[i]]; vertex_offset++; } - indices[i] = r; + indices[i] = (unsigned char)r; } assert(vertex_offset <= vertex_count); diff --git a/3rdparty/meshoptimizer/src/indexcodec.cpp b/3rdparty/meshoptimizer/src/indexcodec.cpp index b30046005..b4fdfe16d 100644 --- a/3rdparty/meshoptimizer/src/indexcodec.cpp +++ b/3rdparty/meshoptimizer/src/indexcodec.cpp @@ -14,6 +14,7 @@ const unsigned char kIndexHeader = 0xe0; const unsigned char kSequenceHeader = 0xd0; static int gEncodeIndexVersion = 1; +const int kDecodeIndexVersion = 1; typedef unsigned int VertexFifo[16]; typedef unsigned int EdgeFifo[16][2]; @@ -354,11 +355,28 @@ size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count) void meshopt_encodeIndexVersion(int version) { - assert(unsigned(version) <= 1); + assert(unsigned(version) <= unsigned(meshopt::kDecodeIndexVersion)); meshopt::gEncodeIndexVersion = version; } +int meshopt_decodeIndexVersion(const unsigned char* buffer, size_t buffer_size) +{ + if (buffer_size < 1) + return -1; + + unsigned char header = buffer[0]; + + if ((header & 0xf0) != meshopt::kIndexHeader && (header & 0xf0) != meshopt::kSequenceHeader) + return -1; + + int version = header & 0x0f; + if (version > meshopt::kDecodeIndexVersion) + return -1; + + return version; +} + int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) { using namespace meshopt; @@ -374,7 +392,7 @@ int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t inde return -1; int version = buffer[0] & 0x0f; - if (version > 1) + if (version > kDecodeIndexVersion) return -1; EdgeFifo edgefifo; @@ -627,7 +645,7 @@ int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t in return -1; int version = buffer[0] & 0x0f; - if (version > 1) + if (version > kDecodeIndexVersion) return -1; const unsigned char* data = buffer + 1; diff --git a/3rdparty/meshoptimizer/src/meshoptimizer.h b/3rdparty/meshoptimizer/src/meshoptimizer.h index baeba1522..295324c78 100644 --- a/3rdparty/meshoptimizer/src/meshoptimizer.h +++ b/3rdparty/meshoptimizer/src/meshoptimizer.h @@ -1,5 +1,5 @@ /** - * meshoptimizer - version 0.22 + * meshoptimizer - version 0.23 * * Copyright (C) 2016-2025, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/meshoptimizer @@ -12,7 +12,7 @@ #include /* Version macro; major * 1000 + minor * 10 + patch */ -#define MESHOPTIMIZER_VERSION 220 /* 0.22 */ +#define MESHOPTIMIZER_VERSION 230 /* 0.23 */ /* If no API is defined, assume default */ #ifndef MESHOPTIMIZER_API @@ -243,6 +243,13 @@ MESHOPTIMIZER_API void meshopt_encodeIndexVersion(int version); */ MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); +/** + * Get encoded index format version + * Returns format version of the encoded index buffer/sequence, or -1 if the buffer header is invalid + * Note that a non-negative value doesn't guarantee that the buffer will be decoded correctly if the input is malformed. + */ +MESHOPTIMIZER_API int meshopt_decodeIndexVersion(const unsigned char* buffer, size_t buffer_size); + /** * Index sequence encoder * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original. @@ -285,7 +292,7 @@ MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, si * * level should be in the range [0, 3] with 0 being the fastest and 3 being the slowest and producing the best compression ratio. */ -MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level); /** * Set vertex encoder format version @@ -303,6 +310,13 @@ MESHOPTIMIZER_API void meshopt_encodeVertexVersion(int version); */ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size); +/** + * Get encoded vertex format version + * Returns format version of the encoded vertex buffer, or -1 if the buffer header is invalid + * Note that a non-negative value doesn't guarantee that the buffer will be decoded correctly if the input is malformed. + */ +MESHOPTIMIZER_API int meshopt_decodeVertexVersion(const unsigned char* buffer, size_t buffer_size); + /** * Vertex buffer filters * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place. @@ -344,7 +358,7 @@ enum meshopt_EncodeExpMode meshopt_EncodeExpSharedVector, /* When encoding exponents, use shared value for each component of all vectors (best compression) */ meshopt_EncodeExpSharedComponent, - /* Experimental: When encoding exponents, use separate values for each component, but clamp to 0 (good quality if very small values are not important) */ + /* When encoding exponents, use separate values for each component, but clamp to 0 (good quality if very small values are not important) */ meshopt_EncodeExpClamped, }; @@ -385,7 +399,7 @@ enum MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error); /** - * Experimental: Mesh simplifier with attribute metric + * Mesh simplifier with attribute metric * The algorithm enhances meshopt_simplify by incorporating attribute values into the error metric used to prioritize simplification order; see meshopt_simplify documentation for details. * Note that the number of attributes affects memory requirements and running time; this algorithm requires ~1.5x more memory and time compared to meshopt_simplify when using 4 scalar attributes. * @@ -394,7 +408,7 @@ MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsig * attribute_count must be <= 32 * vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; 1 denotes vertices that can't be moved */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error); +MESHOPTIMIZER_API size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error); /** * Experimental: Mesh simplifier (sloppy) @@ -412,7 +426,7 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* d MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); /** - * Experimental: Point cloud simplifier + * Point cloud simplifier * Reduces the number of points in the cloud to reach the given target * Returns the number of points after simplification, with destination containing new index data * The resulting index buffer references vertices from the original vertex buffer. @@ -420,10 +434,10 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destinati * * destination must contain enough space for the target index buffer (target_vertex_count elements) * vertex_positions should have float3 position in the first 12 bytes of each vertex - * vertex_colors should can be NULL; when it's not NULL, it should have float3 color in the first 12 bytes of each vertex + * vertex_colors can be NULL; when it's not NULL, it should have float3 color in the first 12 bytes of each vertex * color_weight determines relative priority of color wrt position; 1.0 is a safe default */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count); +MESHOPTIMIZER_API size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count); /** * Returns the error scaling factor used by the simplifier to convert between absolute and relative extents @@ -530,7 +544,7 @@ struct meshopt_Meshlet * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 * vertex_positions should have float3 position in the first 12 bytes of each vertex - * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512; max_triangles must be divisible by 4) + * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 256, max_triangles <= 512; max_triangles must be divisible by 4) * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency */ MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); @@ -538,14 +552,30 @@ MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshl MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); /** - * Experimental: Meshlet optimizer + * Experimental: Meshlet builder with flexible cluster sizes + * Splits the mesh into a set of meshlets, similarly to meshopt_buildMeshlets, but allows to specify minimum and maximum number of triangles per meshlet. + * Clusters between min and max triangle counts are split when the cluster size would have exceeded the expected cluster size by more than split_factor. + * Additionally, allows to switch to axis aligned clusters by setting cone_weight to a negative value. + * + * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound using min_triangles (not max!) + * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices + * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * max_vertices, min_triangles and max_triangles must not exceed implementation limits (max_vertices <= 256, max_triangles <= 512; min_triangles <= max_triangles; both min_triangles and max_triangles must be divisible by 4) + * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency; additionally, cone_weight can be set to a negative value to prioritize axis aligned clusters (for raytracing) instead + * split_factor should be set to a non-negative value; when greater than 0, clusters that have large bounds may be split unless they are under the min_triangles threshold + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsFlex(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor); + +/** + * Meshlet optimizer * Reorders meshlet vertices and triangles to maximize locality to improve rasterizer throughput * * meshlet_triangles and meshlet_vertices must refer to meshlet triangle and vertex index data; when buildMeshlets* is used, these * need to be computed from meshlet's vertex_offset and triangle_offset - * triangle_count and vertex_count must not exceed implementation limits (vertex_count <= 255 - not 256!, triangle_count <= 512) + * triangle_count and vertex_count must not exceed implementation limits (vertex_count <= 256, triangle_count <= 512) */ -MESHOPTIMIZER_EXPERIMENTAL void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count); +MESHOPTIMIZER_API void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count); struct meshopt_Bounds { @@ -589,6 +619,27 @@ struct meshopt_Bounds MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +/** + * Experimental: Sphere bounds generator + * Creates bounding sphere around a set of points or a set of spheres; returns the center and radius of the sphere, with other fields of the result set to 0. + * + * positions should have float3 position in the first 12 bytes of each element + * radii can be NULL; when it's not NULL, it should have a non-negative float radius in the first 4 bytes of each element + */ +MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count, size_t positions_stride, const float* radii, size_t radii_stride); + +/** + * Experimental: Cluster partitioner + * Partitions clusters into groups of similar size, prioritizing grouping clusters that share vertices. + * + * destination must contain enough space for the resulting partiotion data (cluster_count elements) + * destination[i] will contain the partition id for cluster i, with the total number of partitions returned by the function + * cluster_indices should have the vertex indices referenced by each cluster, stored sequentially + * cluster_index_counts should have the number of indices in each cluster; sum of all cluster_index_counts must be equal to total_index_count + * target_partition_size is a target size for each partition, in clusters; the resulting partitions may be smaller or larger + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size); + /** * Spatial sorter * Generates a remap table that can be used to reorder points for spatial locality. @@ -724,8 +775,12 @@ inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* mes template inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); template +inline size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor); +template inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); template +inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size); +template inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); #endif @@ -1104,6 +1159,14 @@ inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles); } +template +inline size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + + return meshopt_buildMeshletsFlex(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, min_triangles, max_triangles, cone_weight, split_factor); +} + template inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { @@ -1112,6 +1175,14 @@ inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t inde return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } +template +inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size) +{ + meshopt_IndexAdapter in(NULL, cluster_indices, total_index_count); + + return meshopt_partitionClusters(destination, in.data, total_index_count, cluster_index_counts, cluster_count, vertex_count, target_partition_size); +} + template inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { diff --git a/3rdparty/meshoptimizer/src/partition.cpp b/3rdparty/meshoptimizer/src/partition.cpp new file mode 100644 index 000000000..9c2299805 --- /dev/null +++ b/3rdparty/meshoptimizer/src/partition.cpp @@ -0,0 +1,429 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include +#include +#include + +namespace meshopt +{ + +struct ClusterAdjacency +{ + unsigned int* offsets; + unsigned int* clusters; + unsigned int* shared; +}; + +static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, unsigned char* used, size_t vertex_count, meshopt_Allocator& allocator) +{ + unsigned int* ref_offsets = allocator.allocate(vertex_count + 1); + + // compute number of clusters referenced by each vertex + memset(ref_offsets, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < cluster_count; ++i) + { + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + { + unsigned int v = cluster_indices[j]; + assert(v < vertex_count); + + ref_offsets[v] += 1 - used[v]; + used[v] = 1; + } + + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + used[cluster_indices[j]] = 0; + } + + // compute (worst-case) number of adjacent clusters for each cluster + size_t total_adjacency = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + size_t count = 0; + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + { + unsigned int v = cluster_indices[j]; + assert(v < vertex_count); + + // worst case is every vertex has a disjoint cluster list + count += used[v] ? 0 : ref_offsets[v] - 1; + used[v] = 1; + } + + // ... but only every other cluster can be adjacent in the end + total_adjacency += count < cluster_count - 1 ? count : cluster_count - 1; + + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + used[cluster_indices[j]] = 0; + } + + // we can now allocate adjacency buffers + adjacency.offsets = allocator.allocate(cluster_count + 1); + adjacency.clusters = allocator.allocate(total_adjacency); + adjacency.shared = allocator.allocate(total_adjacency); + + // convert ref counts to offsets + size_t total_refs = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + size_t count = ref_offsets[i]; + ref_offsets[i] = unsigned(total_refs); + total_refs += count; + } + + unsigned int* ref_data = allocator.allocate(total_refs); + + // fill cluster refs for each vertex + for (size_t i = 0; i < cluster_count; ++i) + { + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + { + unsigned int v = cluster_indices[j]; + assert(v < vertex_count); + + if (used[v]) + continue; + + ref_data[ref_offsets[v]++] = unsigned(i); + used[v] = 1; + } + + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + used[cluster_indices[j]] = 0; + } + + // after the previous pass, ref_offsets contain the end of the data for each vertex; shift it forward to get the start + memmove(ref_offsets + 1, ref_offsets, vertex_count * sizeof(unsigned int)); + ref_offsets[0] = 0; + + // fill cluster adjacency for each cluster... + adjacency.offsets[0] = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + unsigned int* adj = adjacency.clusters + adjacency.offsets[i]; + unsigned int* shd = adjacency.shared + adjacency.offsets[i]; + size_t count = 0; + + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + { + unsigned int v = cluster_indices[j]; + assert(v < vertex_count); + + if (used[v]) + continue; + + // merge the entire cluster list of each vertex into current list + for (size_t k = ref_offsets[v]; k < ref_offsets[v + 1]; ++k) + { + unsigned int c = ref_data[k]; + assert(c < cluster_count); + + if (c == unsigned(i)) + continue; + + // if the cluster is already in the list, increment the shared count + bool found = false; + for (size_t l = 0; l < count; ++l) + if (adj[l] == c) + { + found = true; + shd[l]++; + break; + } + + // .. or append a new cluster + if (!found) + { + adj[count] = c; + shd[count] = 1; + count++; + } + } + + used[v] = 1; + } + + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + used[cluster_indices[j]] = 0; + + // mark the end of the adjacency list; the next cluster will start there as well + adjacency.offsets[i + 1] = adjacency.offsets[i] + unsigned(count); + } + + assert(adjacency.offsets[cluster_count] <= total_adjacency); + + // ref_offsets can't be deallocated as it was allocated before adjacency + allocator.deallocate(ref_data); +} + +struct ClusterGroup +{ + int group; + int next; + unsigned int size; // 0 unless root + unsigned int vertices; +}; + +struct GroupOrder +{ + unsigned int id; + int order; +}; + +static void heapPush(GroupOrder* heap, size_t size, GroupOrder item) +{ + // insert a new element at the end (breaks heap invariant) + heap[size++] = item; + + // bubble up the new element to its correct position + size_t i = size - 1; + while (i > 0 && heap[i].order < heap[(i - 1) / 2].order) + { + size_t p = (i - 1) / 2; + + GroupOrder temp = heap[i]; + heap[i] = heap[p]; + heap[p] = temp; + i = p; + } +} + +static GroupOrder heapPop(GroupOrder* heap, size_t size) +{ + assert(size > 0); + GroupOrder top = heap[0]; + + // move the last element to the top (breaks heap invariant) + heap[0] = heap[--size]; + + // bubble down the new top element to its correct position + size_t i = 0; + while (i * 2 + 1 < size) + { + // find the smallest child + size_t j = i * 2 + 1; + j += (j + 1 < size && heap[j + 1].order < heap[j].order); + + // if the parent is already smaller than both children, we're done + if (heap[j].order >= heap[i].order) + break; + + // otherwise, swap the parent and child and continue + GroupOrder temp = heap[i]; + heap[i] = heap[j]; + heap[j] = temp; + i = j; + } + + return top; +} + +static unsigned int countTotal(const ClusterGroup* groups, int id, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, unsigned char* used) +{ + unsigned int total = 0; + + for (int i = id; i >= 0; i = groups[i].next) + { + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + { + unsigned int v = cluster_indices[j]; + total += 1 - used[v]; + used[v] = 1; + } + } + + for (int i = id; i >= 0; i = groups[i].next) + { + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + used[cluster_indices[j]] = 0; + } + + return total; +} + +static unsigned int countShared(const ClusterGroup* groups, int group1, int group2, const ClusterAdjacency& adjacency) +{ + unsigned int total = 0; + + for (int i1 = group1; i1 >= 0; i1 = groups[i1].next) + for (int i2 = group2; i2 >= 0; i2 = groups[i2].next) + { + for (unsigned int adj = adjacency.offsets[i1]; adj < adjacency.offsets[i1 + 1]; ++adj) + if (adjacency.clusters[adj] == unsigned(i2)) + { + total += adjacency.shared[adj]; + break; + } + } + + return total; +} + +static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size) +{ + assert(groups[id].size > 0); + + float group_rsqrt = 1.f / sqrtf(float(int(groups[id].vertices))); + + int best_group = -1; + float best_score = 0; + + for (int ci = id; ci >= 0; ci = groups[ci].next) + { + for (unsigned int adj = adjacency.offsets[ci]; adj != adjacency.offsets[ci + 1]; ++adj) + { + int other = groups[adjacency.clusters[adj]].group; + if (other < 0) + continue; + + assert(groups[other].size > 0); + if (groups[id].size + groups[other].size > max_partition_size) + continue; + + unsigned int shared = countShared(groups, id, other, adjacency); + float other_rsqrt = 1.f / sqrtf(float(int(groups[other].vertices))); + + // normalize shared count by the expected boundary of each group (+ keeps scoring symmetric) + float score = float(int(shared)) * (group_rsqrt + other_rsqrt); + + if (score > best_score) + { + best_group = other; + best_score = score; + } + } + } + + return best_group; +} + +} // namespace meshopt + +size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, size_t vertex_count, size_t target_partition_size) +{ + using namespace meshopt; + + assert(target_partition_size > 0); + + size_t max_partition_size = target_partition_size + target_partition_size * 3 / 8; + + meshopt_Allocator allocator; + + unsigned char* used = allocator.allocate(vertex_count); + memset(used, 0, vertex_count); + + // build cluster index offsets as a prefix sum + unsigned int* cluster_offsets = allocator.allocate(cluster_count + 1); + unsigned int cluster_nextoffset = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + assert(cluster_index_counts[i] > 0); + + cluster_offsets[i] = cluster_nextoffset; + cluster_nextoffset += cluster_index_counts[i]; + } + + assert(cluster_nextoffset == total_index_count); + cluster_offsets[cluster_count] = unsigned(total_index_count); + + // build cluster adjacency along with edge weights (shared vertex count) + ClusterAdjacency adjacency = {}; + buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, used, vertex_count, allocator); + + ClusterGroup* groups = allocator.allocate(cluster_count); + + GroupOrder* order = allocator.allocate(cluster_count); + size_t pending = 0; + + // create a singleton group for each cluster and order them by priority + for (size_t i = 0; i < cluster_count; ++i) + { + groups[i].group = int(i); + groups[i].next = -1; + groups[i].size = 1; + groups[i].vertices = countTotal(groups, int(i), cluster_indices, cluster_offsets, used); + + GroupOrder item = {}; + item.id = unsigned(i); + item.order = groups[i].vertices; + + heapPush(order, pending++, item); + } + + // iteratively merge the smallest group with the best group + while (pending) + { + GroupOrder top = heapPop(order, pending--); + + // this group was merged into another group earlier + if (groups[top.id].size == 0) + continue; + + // disassociate clusters from the group to prevent them from being merged again; we will re-associate them if the group is reinserted + for (int i = top.id; i >= 0; i = groups[i].next) + { + assert(groups[i].group == int(top.id)); + groups[i].group = -1; + } + + // the group is large enough, emit as is + if (groups[top.id].size >= target_partition_size) + continue; + + int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size); + + // we can't grow the group any more, emit as is + if (best_group == -1) + continue; + + // compute shared vertices to adjust the total vertices estimate after merging + unsigned int shared = countShared(groups, top.id, best_group, adjacency); + + // combine groups by linking them together + assert(groups[best_group].size > 0); + + for (int i = top.id; i >= 0; i = groups[i].next) + if (groups[i].next < 0) + { + groups[i].next = best_group; + break; + } + + // update group sizes; note, the vertex update is an approximation which avoids recomputing the true size via countTotal + groups[top.id].size += groups[best_group].size; + groups[top.id].vertices += groups[best_group].vertices; + groups[top.id].vertices = (groups[top.id].vertices > shared) ? groups[top.id].vertices - shared : 1; + + groups[best_group].size = 0; + groups[best_group].vertices = 0; + + // re-associate all clusters back to the merged group + for (int i = top.id; i >= 0; i = groups[i].next) + groups[i].group = int(top.id); + + top.order = groups[top.id].vertices; + heapPush(order, pending++, top); + } + + size_t next_group = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + if (groups[i].size == 0) + continue; + + for (int j = int(i); j >= 0; j = groups[j].next) + destination[j] = unsigned(next_group); + + next_group++; + } + + assert(next_group <= cluster_count); + return next_group; +} diff --git a/3rdparty/meshoptimizer/src/simplifier.cpp b/3rdparty/meshoptimizer/src/simplifier.cpp index d464fc607..cf0a8a187 100644 --- a/3rdparty/meshoptimizer/src/simplifier.cpp +++ b/3rdparty/meshoptimizer/src/simplifier.cpp @@ -437,8 +437,13 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned { // vertex_lock may lock any wedge, not just the primary vertex, so we need to lock the primary vertex and relock any wedges for (size_t i = 0; i < vertex_count; ++i) - if (vertex_lock[sparse_remap ? sparse_remap[i] : i]) + { + unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i); + assert(vertex_lock[ri] <= 1); // values other than 0/1 are reserved for future use + + if (vertex_lock[ri]) result[remap[i]] = Kind_Locked; + } for (size_t i = 0; i < vertex_count; ++i) if (result[remap[i]] == Kind_Locked) diff --git a/3rdparty/meshoptimizer/src/vertexcodec.cpp b/3rdparty/meshoptimizer/src/vertexcodec.cpp index b0ea61f5b..53cf9d753 100644 --- a/3rdparty/meshoptimizer/src/vertexcodec.cpp +++ b/3rdparty/meshoptimizer/src/vertexcodec.cpp @@ -123,6 +123,7 @@ namespace meshopt const unsigned char kVertexHeader = 0xa0; static int gEncodeVertexVersion = 0; +const int kDecodeVertexVersion = 1; const size_t kVertexBlockSizeBytes = 8192; const size_t kVertexBlockMaxSize = 256; @@ -1803,11 +1804,28 @@ size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) void meshopt_encodeVertexVersion(int version) { - assert(unsigned(version) <= 1); + assert(unsigned(version) <= unsigned(meshopt::kDecodeVertexVersion)); meshopt::gEncodeVertexVersion = version; } +int meshopt_decodeVertexVersion(const unsigned char* buffer, size_t buffer_size) +{ + if (buffer_size < 1) + return -1; + + unsigned char header = buffer[0]; + + if ((header & 0xf0) != meshopt::kVertexHeader) + return -1; + + int version = header & 0x0f; + if (version > meshopt::kDecodeVertexVersion) + return -1; + + return version; +} + int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size) { using namespace meshopt; @@ -1844,7 +1862,7 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve return -1; int version = data_header & 0x0f; - if (version > 1) + if (version > kDecodeVertexVersion) return -1; size_t tail_size = vertex_size + (version == 0 ? 0 : vertex_size / 4);