Skip to content

Commit 41d6036

Browse files
tarang-jainlowener
authored andcommitted
[FEA] IVF-PQ to Write Flat PQ Codes (#1607)
This PR brings new params to ivf_pq: an option for the user to choose the layout of the ivf lists. The lists can be flat (no interleaving) or interleaved (current default). Flat codes allows building the index in a CPU-compatible format. [UPDATE as of 12/19/2025]: After #1278 is merged, we can unify IVF-PQ and PQ API codepaths. [UPDATE 01/08/2026]: This PR can be merged before #1278. The flat code-writing can potentially be reverted once #1278 is merged (so we can later use the PQ preprocessing API directly). However that will come naturally as a part if a broader unification of IVF-PQ and PQ codepaths. [Benchmarks 01/15/2026]: ## IVF-PQ Layout Benchmark Results **Dataset**: 1,000,000 vectors × 128 dimensions | **pq_dim**: 32 pq_bits | Code Size | Direct FLAT Build (ms) | INTERLEAVED Build (ms) | Convert INTERLEAVED to FLAT with Codepacker (ms) | Total time for INTERLEAVED build + Conversion to FLAT with Codepacker (unpack) (ms) | Overhead | |:-------:|:---------:|:---------------:|:----------------------:|:-------------------:|:----------------------:|:--------:| | 8 | 32 bytes | 372.46 | 385.86 | 985.28 | 1371.13 | 3.68× | | 6 | 24 bytes | 298.83 | 300.99 | 961.82 | 1262.82 | 4.23× | | 5 | 20 bytes | 283.25 | 281.95 | 795.43 | 1077.38 | 3.80× | | 4 | 16 bytes | 270.63 | 271.01 | 489.73 | 760.75 | 2.81× | Authors: - Tarang Jain (https://github.com/tarang-jain) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Robert Maynard (https://github.com/robertmaynard) URL: #1607
1 parent 93fc12f commit 41d6036

29 files changed

+1173
-331
lines changed

c/include/cuvs/neighbors/ivf_pq.h

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,18 @@ extern "C" {
2323
* @brief A type for specifying how PQ codebooks are created
2424
*
2525
*/
26-
enum codebook_gen { // NOLINT
27-
PER_SUBSPACE = 0, // NOLINT
28-
PER_CLUSTER = 1, // NOLINT
26+
enum cuvsIvfPqCodebookGen {
27+
CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE = 0,
28+
CUVS_IVF_PQ_CODEBOOK_GEN_PER_CLUSTER = 1,
29+
};
30+
31+
/**
32+
* @brief A type for specifying the memory layout of IVF-PQ list data
33+
*
34+
*/
35+
enum cuvsIvfPqListLayout {
36+
CUVS_IVF_PQ_LIST_LAYOUT_FLAT = 0,
37+
CUVS_IVF_PQ_LIST_LAYOUT_INTERLEAVED = 1,
2938
};
3039

3140
/**
@@ -80,7 +89,7 @@ struct cuvsIvfPqIndexParams {
8089
*/
8190
uint32_t pq_dim;
8291
/** How PQ codebooks are created. */
83-
enum codebook_gen codebook_kind;
92+
enum cuvsIvfPqCodebookGen codebook_kind;
8493
/**
8594
* Apply a random rotation matrix on the input data and queries even if `dim % pq_dim == 0`.
8695
*
@@ -114,6 +123,14 @@ struct cuvsIvfPqIndexParams {
114123
* points to train each codebook.
115124
*/
116125
uint32_t max_train_points_per_pq_code;
126+
/**
127+
* Memory layout of the IVF-PQ list data.
128+
*
129+
* - CUVS_IVF_PQ_LIST_LAYOUT_FLAT: Codes are stored contiguously, one vector's codes after another.
130+
* - CUVS_IVF_PQ_LIST_LAYOUT_INTERLEAVED: Codes are interleaved for optimized search performance.
131+
* This is the default and recommended for search workloads.
132+
*/
133+
enum cuvsIvfPqListLayout codes_layout;
117134
};
118135

119136
typedef struct cuvsIvfPqIndexParams* cuvsIvfPqIndexParams_t;
@@ -294,8 +311,8 @@ cuvsError_t cuvsIvfPqIndexGetCentersPadded(cuvsIvfPqIndex_t index, DLManagedTens
294311
/**
295312
* @brief Get the PQ cluster centers
296313
*
297-
* - codebook_gen::PER_SUBSPACE: [pq_dim , pq_len, pq_book_size]
298-
* - codebook_gen::PER_CLUSTER: [n_lists, pq_len, pq_book_size]
314+
* - CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE: [pq_dim , pq_len, pq_book_size]
315+
* - CUVS_IVF_PQ_CODEBOOK_GEN_PER_CLUSTER: [n_lists, pq_len, pq_book_size]
299316
*
300317
* @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
301318
* @param[out] pq_centers Output tensor that will be populated with a non-owning view of the data
@@ -443,8 +460,8 @@ cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
443460
* matrices)
444461
* @param[in] dim dimensionality of the input data
445462
* @param[in] pq_centers PQ codebook on device memory with required shape:
446-
* - codebook_kind PER_SUBSPACE: [pq_dim, pq_len, pq_book_size]
447-
* - codebook_kind PER_CLUSTER: [n_lists, pq_len, pq_book_size]
463+
* - codebook_kind CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE: [pq_dim, pq_len, pq_book_size]
464+
* - codebook_kind CUVS_IVF_PQ_CODEBOOK_GEN_PER_CLUSTER: [n_lists, pq_len, pq_book_size]
448465
* @param[in] centers Cluster centers in the original space [n_lists, dim_ext]
449466
* where dim_ext = round_up(dim + 1, 8)
450467
* @param[in] centers_rot Rotated cluster centers [n_lists, rot_dim]

c/src/neighbors/cagra.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ static void _populate_c_ivf_pq_params(cuvsIvfPqParams* c_ivf_pq,
367367
c_ivf_pq->ivf_pq_build_params->kmeans_trainset_fraction = bp.kmeans_trainset_fraction;
368368
c_ivf_pq->ivf_pq_build_params->pq_bits = bp.pq_bits;
369369
c_ivf_pq->ivf_pq_build_params->pq_dim = bp.pq_dim;
370-
c_ivf_pq->ivf_pq_build_params->codebook_kind = static_cast<codebook_gen>(bp.codebook_kind);
370+
c_ivf_pq->ivf_pq_build_params->codebook_kind = static_cast<cuvsIvfPqCodebookGen>(bp.codebook_kind);
371371
c_ivf_pq->ivf_pq_build_params->force_random_rotation = bp.force_random_rotation;
372372
c_ivf_pq->ivf_pq_build_params->conservative_memory_allocation = bp.conservative_memory_allocation;
373373
c_ivf_pq->ivf_pq_build_params->max_train_points_per_pq_code = bp.max_train_points_per_pq_code;

c/src/neighbors/ivf_pq.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ void convert_c_index_params(cuvsIvfPqIndexParams params, cuvs::neighbors::ivf_pq
3434
out->force_random_rotation = params.force_random_rotation;
3535
out->conservative_memory_allocation = params.conservative_memory_allocation;
3636
out->max_train_points_per_pq_code = params.max_train_points_per_pq_code;
37+
out->codes_layout = static_cast<cuvs::neighbors::ivf_pq::list_layout>((int)params.codes_layout);
3738
}
3839
void convert_c_search_params(cuvsIvfPqSearchParams params,
3940
cuvs::neighbors::ivf_pq::search_params* out)
@@ -218,8 +219,16 @@ void _get_list_indices(cuvsIvfPqIndex index,
218219
uint32_t label,
219220
DLManagedTensor* out_labels)
220221
{
221-
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
222-
cuvs::core::to_dlpack(index_ptr->lists()[label]->indices.view(), out_labels);
222+
auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_pq::index<IdxT>*>(index.addr);
223+
if (index_ptr->codes_layout() == cuvs::neighbors::ivf_pq::list_layout::FLAT) {
224+
auto& list =
225+
static_cast<cuvs::neighbors::ivf_pq::list_data_flat<IdxT>&>(*index_ptr->lists()[label]);
226+
cuvs::core::to_dlpack(list.indices.view(), out_labels);
227+
} else {
228+
auto& list = static_cast<cuvs::neighbors::ivf_pq::list_data_interleaved<IdxT>&>(
229+
*index_ptr->lists()[label]);
230+
cuvs::core::to_dlpack(list.indices.view(), out_labels);
231+
}
223232
}
224233
} // namespace
225234

@@ -325,10 +334,11 @@ extern "C" cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t* params
325334
.kmeans_trainset_fraction = 0.5,
326335
.pq_bits = 8,
327336
.pq_dim = 0,
328-
.codebook_kind = codebook_gen::PER_SUBSPACE,
337+
.codebook_kind = CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE,
329338
.force_random_rotation = false,
330339
.conservative_memory_allocation = false,
331-
.max_train_points_per_pq_code = 256};
340+
.max_train_points_per_pq_code = 256,
341+
.codes_layout = CUVS_IVF_PQ_LIST_LAYOUT_INTERLEAVED};
332342
});
333343
}
334344

cpp/cmake/patches/faiss-1.13-cuvs-26.02.diff

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
diff --git a/faiss/gpu/impl/CuvsIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu
2-
index 1e2fef225..35b388147 100644
2+
index 1e2fef225..2ee40da46 100644
33
--- a/faiss/gpu/impl/CuvsIVFPQ.cu
44
+++ b/faiss/gpu/impl/CuvsIVFPQ.cu
55
@@ -129,8 +129,14 @@ void CuvsIVFPQ::updateQuantizer(Index* quantizer) {
@@ -122,7 +122,35 @@ index 1e2fef225..35b388147 100644
122122
}
123123

124124
setPQCentroids_();
125-
@@ -520,7 +583,7 @@ void CuvsIVFPQ::setPQCentroids_() {
125+
@@ -404,10 +467,11 @@ void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) {
126+
auto& cuvs_index_lists = cuvs_index->lists();
127+
128+
// conservative memory alloc for cloning cpu inverted lists
129+
- cuvs::neighbors::ivf_pq::list_spec<uint32_t, idx_t> ivf_list_spec{
130+
- static_cast<uint32_t>(bitsPerSubQuantizer_),
131+
- static_cast<uint32_t>(numSubQuantizers_),
132+
- true};
133+
+ cuvs::neighbors::ivf_pq::list_spec_interleaved<uint32_t, idx_t>
134+
+ ivf_list_spec{
135+
+ static_cast<uint32_t>(bitsPerSubQuantizer_),
136+
+ static_cast<uint32_t>(numSubQuantizers_),
137+
+ true};
138+
139+
for (size_t i = 0; i < nlist; ++i) {
140+
size_t listSize = ivf->list_size(i);
141+
@@ -426,9 +490,9 @@ void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) {
142+
// This cuVS list must currently be empty
143+
FAISS_ASSERT(getListLength(i) == 0);
144+
145+
- cuvs::neighbors::ivf::resize_list(
146+
+ cuvs::neighbors::ivf_pq::helpers::resize_list(
147+
raft_handle,
148+
- cuvs_index_lists[i],
149+
+ cuvs_index_lists[i],
150+
ivf_list_spec,
151+
static_cast<uint32_t>(listSize),
152+
static_cast<uint32_t>(0));
153+
@@ -520,7 +587,7 @@ void CuvsIVFPQ::setPQCentroids_() {
126154
auto stream = resources_->getDefaultStreamCurrentDevice();
127155

128156
raft::copy(

cpp/include/cuvs/neighbors/common.hpp

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,11 +711,52 @@ template <typename IdxT>
711711
constexpr static IdxT kInvalidRecord =
712712
(std::is_signed_v<IdxT> ? IdxT{0} : std::numeric_limits<IdxT>::max()) - 1;
713713

714+
/**
715+
* Abstract base class for IVF list data.
716+
* This allows polymorphic access to list data regardless of the underlying layout.
717+
*
718+
* @tparam ValueT The data element type (e.g., uint8_t for PQ codes, float for raw vectors)
719+
* @tparam IdxT The index type for source indices
720+
* @tparam SizeT The size type
721+
*
722+
* TODO: Make this struct internal (tracking issue: https://github.com/rapidsai/cuvs/issues/1726)
723+
*/
724+
template <typename ValueT, typename IdxT, typename SizeT = uint32_t>
725+
struct list_base {
726+
using value_type = ValueT;
727+
using index_type = IdxT;
728+
using size_type = SizeT;
729+
730+
virtual ~list_base() = default;
731+
732+
/** Get the raw data pointer. */
733+
virtual value_type* data_ptr() noexcept = 0;
734+
virtual const value_type* data_ptr() const noexcept = 0;
735+
736+
/** Get the indices pointer. */
737+
virtual index_type* indices_ptr() noexcept = 0;
738+
virtual const index_type* indices_ptr() const noexcept = 0;
739+
740+
/** Get the current size (number of records). */
741+
virtual size_type get_size() const noexcept = 0;
742+
743+
/** Set the current size (number of records). */
744+
virtual void set_size(size_type new_size) noexcept = 0;
745+
746+
/** Get the total size of the data array in bytes. */
747+
virtual size_t data_byte_size() const noexcept = 0;
748+
749+
/** Get the capacity (number of indices that can be stored). */
750+
virtual size_type indices_capacity() const noexcept = 0;
751+
};
752+
714753
/** The data for a single IVF list. */
715754
template <template <typename, typename...> typename SpecT,
716755
typename SizeT,
717756
typename... SpecExtraArgs>
718-
struct list {
757+
struct list : public list_base<typename SpecT<SizeT, SpecExtraArgs...>::value_type,
758+
typename SpecT<SizeT, SpecExtraArgs...>::index_type,
759+
SizeT> {
719760
using size_type = SizeT;
720761
using spec_type = SpecT<size_type, SpecExtraArgs...>;
721762
using value_type = typename spec_type::value_type;
@@ -731,6 +772,18 @@ struct list {
731772

732773
/** Allocate a new list capable of holding at least `n_rows` data records and indices. */
733774
list(raft::resources const& res, const spec_type& spec, size_type n_rows);
775+
776+
value_type* data_ptr() noexcept override { return data.data_handle(); }
777+
const value_type* data_ptr() const noexcept override { return data.data_handle(); }
778+
779+
index_type* indices_ptr() noexcept override { return indices.data_handle(); }
780+
const index_type* indices_ptr() const noexcept override { return indices.data_handle(); }
781+
782+
size_type get_size() const noexcept override { return size.load(); }
783+
void set_size(size_type new_size) noexcept override { size.store(new_size); }
784+
785+
size_t data_byte_size() const noexcept override { return data.size() * sizeof(value_type); }
786+
size_type indices_capacity() const noexcept override { return indices.extent(0); }
734787
};
735788

736789
template <typename ListT, class T = void>
@@ -755,6 +808,10 @@ using enable_if_valid_list_t = typename enable_if_valid_list<ListT, T>::type;
755808
/**
756809
* Resize a list by the given id, so that it can contain the given number of records;
757810
* copy the data if necessary.
811+
*
812+
* @note This is an internal function that requires the concrete list type.
813+
* For IVF-PQ indexes, prefer using the helper functions in
814+
* `cuvs::neighbors::ivf_pq::helpers::resize_list` which handle type casting internally.
758815
*/
759816
template <typename ListT>
760817
void resize_list(raft::resources const& res,
@@ -763,13 +820,23 @@ void resize_list(raft::resources const& res,
763820
typename ListT::size_type new_used_size,
764821
typename ListT::size_type old_used_size);
765822

823+
/**
824+
* Serialize a list to an output stream.
825+
*
826+
* @note This function requires the concrete list type (not the base class) because:
827+
* 1. It needs access to the spec_type to determine the data layout for serialization
828+
* 2. The serialized format depends on the spec's make_list_extents() method
829+
* When calling from code that only has a base class pointer, use std::static_pointer_cast
830+
* to obtain the typed pointer first.
831+
*/
766832
template <typename ListT>
767833
enable_if_valid_list_t<ListT> serialize_list(
768834
const raft::resources& handle,
769835
std::ostream& os,
770836
const ListT& ld,
771837
const typename ListT::spec_type& store_spec,
772838
std::optional<typename ListT::size_type> size_override = std::nullopt);
839+
773840
template <typename ListT>
774841
enable_if_valid_list_t<ListT> serialize_list(
775842
const raft::resources& handle,

cpp/include/cuvs/neighbors/ivf_flat.hpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -2933,17 +2933,17 @@ void reset_index(const raft::resources& res, index<uint8_t, int64_t>* index);
29332933
* ivf_flat::index<uint8_t, int64_t> index(res, index_params, D);
29342934
* ivf_flat::helpers::reset_index(res, &index);
29352935
* // resize the first IVF list to hold 5 records
2936-
* auto spec = list_spec<uint32_t, uint8_t, int64_t>{
2937-
* index->dim(), index->conservative_memory_allocation()};
2936+
* auto spec = list_spec<uint32_t, float, int64_t>{
2937+
* index.dim(), index.conservative_memory_allocation()};
29382938
* uint32_t new_size = 5;
2939-
* ivf::resize_list(res, list, spec, new_size, 0);
2939+
* ivf::resize_list(res, index.lists()[0], spec, new_size, 0);
29402940
* raft::update_device(index.list_sizes(), &new_size, 1, stream);
29412941
* // recompute the internal state of the index
29422942
* ivf_flat::helpers::recompute_internal_state(res, index);
29432943
* @endcode
29442944
*
29452945
* @param[in] res raft resource
2946-
* @param[inout] index pointer to IVF-PQ index
2946+
* @param[inout] index pointer to IVF-Flat index
29472947
*/
29482948
void recompute_internal_state(const raft::resources& res, index<float, int64_t>* index);
29492949

@@ -2961,17 +2961,17 @@ void recompute_internal_state(const raft::resources& res, index<float, int64_t>*
29612961
* ivf_flat::index<uint8_t, int64_t> index(res, index_params, D);
29622962
* ivf_flat::helpers::reset_index(res, &index);
29632963
* // resize the first IVF list to hold 5 records
2964-
* auto spec = list_spec<uint32_t, uint8_t, int64_t>{
2965-
* index->dim(), index->conservative_memory_allocation()};
2964+
* auto spec = list_spec<uint32_t, half, int64_t>{
2965+
* index.dim(), index.conservative_memory_allocation()};
29662966
* uint32_t new_size = 5;
2967-
* ivf::resize_list(res, list, spec, new_size, 0);
2967+
* ivf::resize_list(res, index.lists()[0], spec, new_size, 0);
29682968
* raft::update_device(index.list_sizes(), &new_size, 1, stream);
29692969
* // recompute the internal state of the index
29702970
* ivf_flat::helpers::recompute_internal_state(res, index);
29712971
* @endcode
29722972
*
29732973
* @param[in] res raft resource
2974-
* @param[inout] index pointer to IVF-PQ index
2974+
* @param[inout] index pointer to IVF-Flat index
29752975
*/
29762976
void recompute_internal_state(const raft::resources& res, index<half, int64_t>* index);
29772977

@@ -2989,17 +2989,17 @@ void recompute_internal_state(const raft::resources& res, index<half, int64_t>*
29892989
* ivf_flat::index<uint8_t, int64_t> index(res, index_params, D);
29902990
* ivf_flat::helpers::reset_index(res, &index);
29912991
* // resize the first IVF list to hold 5 records
2992-
* auto spec = list_spec<uint32_t, uint8_t, int64_t>{
2993-
* index->dim(), index->conservative_memory_allocation()};
2992+
* auto spec = list_spec<uint32_t, int8_t, int64_t>{
2993+
* index.dim(), index.conservative_memory_allocation()};
29942994
* uint32_t new_size = 5;
2995-
* ivf::resize_list(res, list, spec, new_size, 0);
2995+
* ivf::resize_list(res, index.lists()[0], spec, new_size, 0);
29962996
* raft::update_device(index.list_sizes(), &new_size, 1, stream);
29972997
* // recompute the internal state of the index
29982998
* ivf_flat::helpers::recompute_internal_state(res, index);
29992999
* @endcode
30003000
*
30013001
* @param[in] res raft resource
3002-
* @param[inout] index pointer to IVF-PQ index
3002+
* @param[inout] index pointer to IVF-Flat index
30033003
*/
30043004
void recompute_internal_state(const raft::resources& res, index<int8_t, int64_t>* index);
30053005

@@ -3018,9 +3018,9 @@ void recompute_internal_state(const raft::resources& res, index<int8_t, int64_t>
30183018
* ivf_flat::helpers::reset_index(res, &index);
30193019
* // resize the first IVF list to hold 5 records
30203020
* auto spec = list_spec<uint32_t, uint8_t, int64_t>{
3021-
* index->dim(), index->conservative_memory_allocation()};
3021+
* index.dim(), index.conservative_memory_allocation()};
30223022
* uint32_t new_size = 5;
3023-
* ivf::resize_list(res, list, spec, new_size, 0);
3023+
* ivf::resize_list(res, index.lists()[0], spec, new_size, 0);
30243024
* raft::update_device(index.list_sizes(), &new_size, 1, stream);
30253025
* // recompute the internal state of the index
30263026
* ivf_flat::helpers::recompute_internal_state(res, index);

0 commit comments

Comments
 (0)