-
Notifications
You must be signed in to change notification settings - Fork 74
Expand file tree
/
Copy pathonnxruntime_ep_c_api.h
More file actions
2112 lines (1960 loc) · 100 KB
/
onnxruntime_ep_c_api.h
File metadata and controls
2112 lines (1960 loc) · 100 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Do not include this file directly. Please include "onnxruntime_c_api.h" instead.
#if defined(__DOXYGEN__)
// When running a Doxygen build, include onnxruntime_c_api.h. Doxygen expects header files to be self-contained.
#include "onnxruntime_c_api.h"
#else
// In normal usage, do not include onnxruntime_c_api.h. This file is explicitly included in onnxruntime_c_api.h.
#endif
#ifdef __cplusplus
extern "C" {
#endif
ORT_RUNTIME_CLASS(Ep);
ORT_RUNTIME_CLASS(EpFactory);
ORT_RUNTIME_CLASS(EpGraphSupportInfo);
ORT_RUNTIME_CLASS(MemoryDevice); // opaque class to wrap onnxruntime::OrtDevice
ORT_RUNTIME_CLASS(NodeComputeContext);
ORT_RUNTIME_CLASS(DataTransferImpl);
ORT_RUNTIME_CLASS(SyncNotificationImpl);
ORT_RUNTIME_CLASS(SyncStreamImpl);
ORT_RUNTIME_CLASS(ExternalResourceImporterImpl);
/** \brief Base struct for imported external memory handles.
*
* EPs derive from this struct to add EP-specific fields (e.g., CUdeviceptr for CUDA).
* EP is responsible for creating and releasing instances of the derived type.
*
* Example derived type for CUDA EP:
* \code
* struct MyCudaExternalMemoryHandle : OrtExternalMemoryHandle {
* CUexternalMemory ext_memory;
* CUdeviceptr mapped_ptr;
* bool is_dedicated;
* };
* \endcode
*
* \since Version 1.24.
*/
struct OrtExternalMemoryHandle {
uint32_t version; ///< Must be ORT_API_VERSION
const OrtEpDevice* ep_device; ///< EP device that created this handle
OrtExternalMemoryDescriptor descriptor; ///< External memory descriptor
/** \brief Release callback for this handle. EP sets this to its release function.
*
* ORT calls this when ReleaseExternalMemoryHandle is invoked. The EP's callback
* should cast the handle to its derived type and delete it.
*/
void(ORT_API_CALL* Release)(_In_ OrtExternalMemoryHandle* handle);
};
/** \brief Base struct for imported external semaphore handles.
*
* EPs derive from this struct to add EP-specific fields (e.g., CUexternalSemaphore for CUDA).
* EP is responsible for creating and releasing instances of the derived type.
*
* Example derived type for CUDA EP:
* \code
* struct MyCudaExternalSemaphoreHandle : OrtExternalSemaphoreHandle {
* CUexternalSemaphore ext_semaphore;
* };
* \endcode
*
* \since Version 1.24.
*/
struct OrtExternalSemaphoreHandle {
uint32_t version; ///< Must be ORT_API_VERSION
const OrtEpDevice* ep_device; ///< EP device that created this handle
OrtExternalSemaphoreDescriptor descriptor; ///< External semaphore descriptor
/** \brief Release callback for this handle. EP sets this to its release function.
*
* ORT calls this when ReleaseExternalSemaphoreHandle is invoked. The EP's callback
* should cast the handle to its derived type and delete it.
*/
void(ORT_API_CALL* Release)(_In_ OrtExternalSemaphoreHandle* handle);
};
// Opaque types for kernel-based EPs
ORT_RUNTIME_CLASS(KernelRegistry);
ORT_RUNTIME_CLASS(KernelDefBuilder);
ORT_RUNTIME_CLASS(KernelDef);
ORT_RUNTIME_CLASS(DataType); // combination of ONNXType (e.g., Tensor, Map, Sequence) and ONNXTensorElementDataType
ORT_RUNTIME_CLASS(SharedPrePackedWeightCache);
/** \brief Struct that an EP implements for IDataTransfer to copy between devices it uses and CPU.
*
* \since Version 1.23.
*/
struct OrtDataTransferImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Release the OrtDataTransferImpl instance.
*
* This is called by ORT when the OrtDataTransferImpl instance is no longer needed.
* The implementation should release any resources held by the instance.
*
* \param[in] this_ptr Pointer to the OrtDataTransferImpl instance.
*
* \since Version 1.23.
*/
ORT_API_T(void, Release, _In_ OrtDataTransferImpl* this_ptr);
/** \brief Check if the implementation can copy between the source and destination memory devices.
*
* \param[in] this_ptr Pointer to the OrtDataTransferImpl instance.
* \param[in] src_memory_device Source OrtMemoryDevice to copy from.
* \param[in] dst_memory_device Destination OrtMemoryDevice to copy to.
* \return True if the implementation can copy between the devices.
*
* \since Version 1.23.
*/
ORT_API_T(bool, CanCopy, _In_ const OrtDataTransferImpl* this_ptr,
_In_ const OrtMemoryDevice* src_memory_device, _In_ const OrtMemoryDevice* dst_memory_device);
/** \brief Copy tensors from src_tensors to dst_tensors using the provided streams.
*
* The implementation can use the provided streams to perform asynchronous copies if supported.
* If a stream is not available, the copy is performed synchronously.
*
* \param[in] this_ptr Pointer to the OrtDataTransferImpl instance.
* \param[in] src_tensors Array of source OrtValue pointers to copy from.
* \param[in] dst_tensors Array of destination OrtValue pointers to copy to.
* \param[in] streams Array of OrtSyncStream pointers for the copy operations, if the execution provider is stream
* aware. nullptr if it is not.
* \param[in] num_tensors Number of tensors to copy.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CopyTensors, _In_ OrtDataTransferImpl* this_ptr,
_In_reads_(num_tensors) const OrtValue** src_tensors,
_In_reads_(num_tensors) OrtValue** dst_tensors,
_In_reads_(num_tensors) OrtSyncStream** streams,
_In_ size_t num_tensors);
};
/** \brief Struct that an EP implements for Stream Notifications.
*
* \since Version 1.23.
*/
struct OrtSyncNotificationImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Release the OrtSyncNotificationImpl instance.
*
* This is called by ORT when the OrtSyncNotificationImpl instance is no longer needed.
* The implementation should release any resources held by the instance.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API_T(void, Release, _In_ OrtSyncNotificationImpl* this_ptr);
/** \brief Called by ORT to activate the notification.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Activate, _In_ OrtSyncNotificationImpl* this_ptr);
/** \brief Wait for a device to device operation to complete.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
* \param[in] consumer_stream The OrtSyncStream instance that will wait on this notification to be activated.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(WaitOnDevice, _In_ OrtSyncNotificationImpl* this_ptr, _In_ OrtSyncStream* consumer_stream);
/** \brief Wait for a device to host operation to complete.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(WaitOnHost, _In_ OrtSyncNotificationImpl* this_ptr);
};
/** \brief Struct that an EP implements if it wishes to implement Stream support.
*
* This struct provides the overrides for onnxruntime::Stream's virtual methods.
*
* \since Version 1.23.
*/
struct OrtSyncStreamImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Release the OrtSyncStreamImpl instance.
*
* This is called by ORT when the OrtSyncStreamImpl instance is no longer needed.
* The implementation should release any resources held by the instance.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
*
* \since Version 1.23.
*/
ORT_API_T(void, Release, _In_ OrtSyncStreamImpl* this_ptr);
/** \brief Get the handle of the stream.
*
* This returns the native handle for the stream. e.g. cudaStream_t for CUDA streams.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
* \return The handle of the stream.
*
* \since Version 1.23.
*/
ORT_API_T(void*, GetHandle, _In_ OrtSyncStreamImpl* this_ptr);
/** \brief Create an OrtSyncNotificationImpl for the OrtSyncStreamImpl instance.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance
* \param[out] notification The new OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CreateNotification, _In_ OrtSyncStreamImpl* this_ptr,
_Outptr_ OrtSyncNotificationImpl** notification);
/** \brief Flush the stream.
*
* This is called by ORT to flush the stream, ensuring that all operations submitted to the stream are completed.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Flush, _In_ OrtSyncStreamImpl* this_ptr);
/** \brief Notify the stream that a session run has ended.
*
* This is called by ORT to notify the stream that a session run has ended, allowing the stream to perform any
* necessary cleanup or finalization.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(OnSessionRunEnd, _In_ OrtSyncStreamImpl* this_ptr);
};
/** \brief Struct that an EP implements for external resource import (memory + semaphore import).
*
* This capability object provides methods for importing external GPU memory and semaphores
* for zero-copy import. EPs that support D3D12, CUDA, HIP, or Vulkan external resource APIs
* can implement this interface.
*
* \since Version 1.24.
*/
struct OrtExternalResourceImporterImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
// Memory operations (stream-independent)
/** \brief Check if the implementation can import external memory of the given handle type.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] handle_type The type of external memory handle to check.
* \return True if the handle type is supported.
*
* \since Version 1.24.
*/
ORT_API_T(bool, CanImportMemory,
_In_ const OrtExternalResourceImporterImpl* this_ptr,
_In_ OrtExternalMemoryHandleType handle_type);
/** \brief Import external memory.
*
* The EP creates a derived type of OrtExternalMemoryHandle and returns a pointer to the base.
* EP is responsible for the lifetime of the handle (release via ReleaseMemory).
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] desc Descriptor containing the external memory handle and properties.
* \param[out] out_handle Output parameter set to the created OrtExternalMemoryHandle (EP's derived type).
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ImportMemory,
_In_ OrtExternalResourceImporterImpl* this_ptr,
_In_ const OrtExternalMemoryDescriptor* desc,
_Outptr_ OrtExternalMemoryHandle** out_handle);
/** \brief Release an imported external memory handle.
*
* The EP deletes its derived type instance.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] handle The OrtExternalMemoryHandle to release (EP casts to its derived type).
*
* \since Version 1.24.
*/
ORT_API_T(void, ReleaseMemory,
_In_ OrtExternalResourceImporterImpl* this_ptr,
_In_ OrtExternalMemoryHandle* handle);
/** \brief Create a tensor backed by imported external memory.
*
* The created tensor is a view over the imported memory and does not copy data.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] mem_handle The imported external memory handle (EP casts to its derived type).
* \param[in] tensor_desc Descriptor specifying tensor element type, shape, and optional offset.
* \param[out] out_tensor Output parameter set to the created OrtValue containing the tensor.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(CreateTensorFromMemory,
_In_ OrtExternalResourceImporterImpl* this_ptr,
_In_ const OrtExternalMemoryHandle* mem_handle,
_In_ const OrtExternalTensorDescriptor* tensor_desc,
_Outptr_ OrtValue** out_tensor);
// Semaphore operations (require stream)
/** \brief Check if the implementation can import external semaphores of the given type.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] type The type of external semaphore to check.
* \return True if the semaphore type is supported.
*
* \since Version 1.24.
*/
ORT_API_T(bool, CanImportSemaphore,
_In_ const OrtExternalResourceImporterImpl* this_ptr,
_In_ OrtExternalSemaphoreType type);
/** \brief Import an external semaphore.
*
* The EP creates a derived type of OrtExternalSemaphoreHandle and returns a pointer to the base.
* EP is responsible for the lifetime of the handle (release via ReleaseSemaphore).
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] desc Descriptor containing the external semaphore handle and type.
* \param[out] out_handle Output parameter set to the created OrtExternalSemaphoreHandle (EP's derived type).
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ImportSemaphore,
_In_ OrtExternalResourceImporterImpl* this_ptr,
_In_ const OrtExternalSemaphoreDescriptor* desc,
_Outptr_ OrtExternalSemaphoreHandle** out_handle);
/** \brief Release an imported external semaphore handle.
*
* The EP deletes its derived type instance.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] handle The OrtExternalSemaphoreHandle to release (EP casts to its derived type).
*
* \since Version 1.24.
*/
ORT_API_T(void, ReleaseSemaphore,
_In_ OrtExternalResourceImporterImpl* this_ptr,
_In_ OrtExternalSemaphoreHandle* handle);
/** \brief Wait on an external semaphore on the EP's stream.
*
* Inserts a wait operation into the EP's stream that blocks until the semaphore
* reaches the specified value.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] handle The imported external semaphore (EP casts to its derived type).
* \param[in] stream The OrtSyncStream to wait on.
* \param[in] value The fence/semaphore value to wait for.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(WaitSemaphore,
_In_ OrtExternalResourceImporterImpl* this_ptr,
_In_ OrtExternalSemaphoreHandle* handle,
_In_ OrtSyncStream* stream,
_In_ uint64_t value);
/** \brief Signal an external semaphore from the EP's stream.
*
* Inserts a signal operation into the EP's stream that sets the semaphore
* to the specified value when reached.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
* \param[in] handle The imported external semaphore (EP casts to its derived type).
* \param[in] stream The OrtSyncStream to signal from.
* \param[in] value The fence/semaphore value to signal.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(SignalSemaphore,
_In_ OrtExternalResourceImporterImpl* this_ptr,
_In_ OrtExternalSemaphoreHandle* handle,
_In_ OrtSyncStream* stream,
_In_ uint64_t value);
// Release the capability object itself
/** \brief Release the OrtExternalResourceImporterImpl instance.
*
* This is called by ORT when the OrtExternalResourceImporterImpl instance is no longer needed.
* The implementation should release any resources held by the instance.
*
* \param[in] this_ptr Pointer to the OrtExternalResourceImporterImpl instance.
*
* \since Version 1.24.
*/
ORT_API_T(void, Release, _In_ OrtExternalResourceImporterImpl* this_ptr);
};
struct OrtNodeFusionOptions;
typedef struct OrtNodeFusionOptions OrtNodeFusionOptions;
struct OrtNodeComputeInfo;
typedef struct OrtNodeComputeInfo OrtNodeComputeInfo;
/**
* \brief The OrtNodeFusionOptions struct specifies options for fusing nodes supported by an execution provider.
*
* Refer to OrtEpApi::EpGraphSupportInfo_AddNodesToFuse.
*
* \since Version 1.23.
*/
struct OrtNodeFusionOptions {
/** \brief The ONNX Runtime version the OrtNodeFusionOptions was compiled with.
*
* Implementation should set to ORT_API_VERSION.
* ORT will use this to ensure it does not use members that were not available when the EP library was compiled.
*
* \since Version 1.23.
*/
uint32_t ort_version_supported;
/** \brief If set to true, specify that the execution provider does not require ONNX Runtime to provide constant
* initializers as inputs to the fused node during model inference. This is used when the execution
* provider saves a copy of constant initializers, and allows ONNX Runtime to release constant initializers that
* are not used by any execution provider.
*
* If not specified, defaults to false. That is, ONNX Runtime provides constant initializers as inputs to
* the fused node by default.
*
* \since Version 1.23.
*/
bool drop_constant_initializers;
// const OrtNode* fused_node_schema;
};
/**
* \brief The OrtNodeComputeInfo struct provides functions that an OrtEp implements to specify the compute
* function for a compiled OrtGraph instance.
* \since Version 1.23.
*/
struct OrtNodeComputeInfo {
/** \brief The ONNX Runtime version the OrtNodeComputeInfo was compiled with.
*
* Implementation should set to ORT_API_VERSION.
* ORT will use this to ensure it does not call functions that were not available when the EP library was compiled.
*
* \since Version 1.23.
*/
uint32_t ort_version_supported;
/** \brief Creates an opaque compute state object that is then passed to the Compute() function during inference.
* \param[in] this_ptr The OrtNodeComputeInfo instance.
* \param[in] compute_context OrtNodeComputeContext instance that contains compiled/fused node's name and host
* memory allocation functions. Can optionally be used to build the compute state.
* \param[out] compute_state Output parameter that is assigned the opaque computation state. ONNX Runtime calls
* ReleaseState() (after calling Compute()) to allow the implementer to release the
* compute state.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
OrtStatus*(ORT_API_CALL* CreateState)(_In_ OrtNodeComputeInfo* this_ptr,
_In_ OrtNodeComputeContext* compute_context,
_Outptr_ void** compute_state);
/** \brief Computation function called to execute the fused node compiled by an OrtEp instance.
* \param[in] this_ptr The OrtNodeComputeInfo instance.
* \param[in] compute_state The opaque computation state returned by CreateState().
* \param[in] kernel_context The OrtKernelContext instance used to access inputs/outputs.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
OrtStatus*(ORT_API_CALL* Compute)(_In_ OrtNodeComputeInfo* this_ptr, _In_ void* compute_state,
_In_ OrtKernelContext* kernel_context);
/** \brief Releases the compute state returned by CreateState().
* \param[in] this_ptr The OrtNodeComputeInfo instance.
* \param[inout] compute_state The opaque compute state returned by CreateState().
*
* \since Version 1.23.
*/
void(ORT_API_CALL* ReleaseState)(_In_ OrtNodeComputeInfo* this_ptr, _Frees_ptr_opt_ void* compute_state);
};
struct OrtKernelImpl;
typedef struct OrtKernelImpl OrtKernelImpl;
/**
* \brief Contains functions that an OrtEp implements to specify the computation for an operator kernel.
* \since Version 1.24.
*/
struct OrtKernelImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
uint32_t flags; ///< EP must initialize to 0. Used internally by ORT.
/** \brief Computation function called to execute the kernel on an EP.
*
* \note Implementation of this function is required.
*
* \param[in] this_ptr The OrtKernelImpl instance.
* \param[in] context The OrtKernelContext instance that provides access to the inputs and outputs.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(Compute, _In_ OrtKernelImpl* this_ptr, _In_ OrtKernelContext* context);
/** \brief Called by ORT to release the OrtKernelImpl instance and its resources.
*
* \note Implementation of this function is required.
*
* \param[in] this_ptr The OrtKernelImpl instance.
*
* \since Version 1.24.
*/
ORT_API_T(void, Release, _In_ OrtKernelImpl* this_ptr);
/** \brief Optional function to pre-pack a constant tensor (i.e., a weight) to the kernel's preferred data layout.
*
* For example, a Conv kernel can define this function to pack input W to the channel-last data layout
* before inference.
*
* Pre-packing can operate in three different modes: no pre-packing mode, sharing mode, and non-sharing mode.
* 1) No pre-packing mode: The kernel can forgo any weight pre-packing for the given `input_index` by setting
* `is_packed` to false and returning a successful OrtStatus. In this mode, the kernel's
* OrtKernelImpl::SetSharedPrePackedWeight() function is not called for that specific
* `input_index`.
* 2) Sharing mode: Sharing is allowed if the `prepacked_weight_cache` argument is not NULL and the EP stores
* weight data in CPU-accessible memory. In this case, the kernel can optionally choose
* to share the packed weight with other kernels that use the same weight
* (compared by content hash). To do so, the kernel must allocate the packed weight with the
* provided `allocator`, then it stores the packed weight data into `prepacked_weight_cache`
* via SharedPrePackedWeightCache_StoreWeightData(), sets `is_packed` to true, and returns a
* successful OrtStatus. ORT will subsequently call OrtKernelImpl::SetSharedPrePackedWeight()
* to provide this kernel with the actual shared weight data, whose memory location could
* differ (i.e., if shared data was allocated by a previously processed kernel).
* 3) Non-sharing mode: In non-sharing mode, the `prepacked_weight_cache` argument is ignored. In this mode,
* the implementation allocates the packed data with the provided `allocator`, sets
* `is_packed` to true, and returns a successful OrtStatus. The kernel is ultimately
* responsible for releasing the packed data for the weight with `allocator`.
* ORT may release the original (unpacked) weight, which must not be accessed in
* OrtKernelImpl::Compute(). Note that in this mode, the kernel's
* OrtKernelImpl::SetSharedPrePackedWeight() function is not called by ORT for that specific
* `input_index`.
*
* \note This function is based on the internal OpKernel::PrePack() virtual function used within ORT.
*
* \param[in] this_ptr The OrtKernelImpl instance.
* \param[in] tensor The OrtValue instance representing the constant tensor (weight). Do not cache in the kernel.
* \param[in] input_index The input index of the tensor in this kernel.
* \param[in] allocator Allocator for allocating the pre-packed data. Its use is required in sharing mode and
* recommended, but not required, in the non-sharing mode. This will be an allocator set by
* the application for the session/environment (e.g., via CreateAndRegisterAllocator[V2]
* or RegisterAllocator), or an allocator on the OrtEpDevice (read-only or default) otherwise.
* The allocator remains valid throughout the lifetime of the OrtKernelImpl instance.
* \param[in] prepacked_weight_cache May be NULL. If not NULL, the kernel may choose to share a packed weight by
* first storing it in the OrtSharedPrePackedWeightCache instance and then
* receiving the actual shared weight data in the call to
* OrtKernelImpl::SetSharedPrePackedWeight(). See the above description for
* "sharing mode".
* \param[out] is_packed Output parameter that the implementation sets to true if the kernel packed the tensor data.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \note Implementation of this function is optional. If not implemented (set to NULL), ORT assumes the kernel
* does not pre-pack weight data (i.e., `is_packed` defaults to false).
*
* \since Version 1.24.
*/
ORT_API2_STATUS(PrePackWeight, _In_ OrtKernelImpl* this_ptr, _In_ const OrtValue* tensor,
_In_ int input_index, _Inout_ OrtAllocator* allocator,
_In_opt_ OrtSharedPrePackedWeightCache* prepacked_weight_cache, _Out_ bool* is_packed);
/** \brief Optional function that receives data for a shared pre-packed weight from ORT.
*
* ORT calls this function after calling OrtKernelImpl::PrePackWeight for a specific `input_index` if:
* - OrtKernelImpl::PrePackWeight set the output parameter `is_packed` to true.
* - OrtKernelImpl::PrePackWeight stored weight data to share into the provided OrtSharedPrePackedWeightCache
* parameter (`prepacked_weight_cache`) via the API SharedPrePackedWeightCache_StoreWeightData.
*
* Refer to the description of the "sharing-mode" in the documentation for OrtKernelImpl::PrePackWeight().
*
* \note ORT will not call this function for an `input_index` that a previous call to
* OrtKernelImpl::PrePackWeight() did not elect to pre-pack and share.
*
* \note This function is based on the internal OpKernel::UseSharedPrePackedBuffers() virtual function used
* within ORT.
*
* \param[in] this_ptr The OrtKernelImpl instance.
* \param[in] buffer_data_ptrs An array of buffer data pointers that collectively hold the pre-packed data for a
* single shared weight. The buffers are provided in the same order and with the same
* contents (in a potentially different memory location) as the buffers
* passed into SharedPrePackedWeightCache_StoreWeightData() within the
* OrtKernelImpl::PrePackWeight() call for the same `input_index`.
* \param[in] buffer_data_sizes An array of buffer byte sizes, one per element in `buffer_data_ptrs`.
* \param[in] num_buffers The number of buffers used to store the data for the shared pre-packed weight.
* Specifies the number of elements in the `buffer_data_ptrs` and `buffer_data_sizes` arrays.
* \param[in] input_index The input index of the tensor in this kernel. This index identifies the identity of
* the weight.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \note Implementation of this function is generally optional. It is only required if OrtKernelImpl::PrePack()
* elects to share pre-packed weights.
*
* \since Version 1.24.
*/
ORT_API2_STATUS(SetSharedPrePackedWeight, _In_ OrtKernelImpl* this_ptr,
_In_reads_(num_buffers) const void* const* buffer_data_ptrs,
_In_reads_(num_buffers) const size_t* buffer_data_sizes,
_In_ size_t num_buffers, _In_ int input_index);
};
/** \brief Type definition for a function that creates an OrtKernelImpl instance for an operator kernel.
*
* \param[in] kernel_create_func_state Opaque state initially provided by the EP that registered the kernel.
* Refer to OrtEpApi::KernelRegistry_AddKernel(). May be null.
* \param[in] info The OrtKernelInfo instance that provides access to the kernel's input and output characteristics.
* \param[out] kernel_out Output parameter set to the new OrtKernelImpl instance. On success, ownership of this
* OrtKernelImpl instance transfers to ORT, which will call OrtKernelImpl::Release() to
* release the instance when it is no longer used.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
typedef OrtStatus*(ORT_API_CALL* OrtKernelCreateFunc)(_In_ void* kernel_create_func_state,
_In_ const OrtKernelInfo* info,
_Outptr_result_maybenull_ OrtKernelImpl** kernel_out);
struct OrtLoopKernelHelper;
typedef struct OrtLoopKernelHelper OrtLoopKernelHelper;
/**
* \brief Contains helper functions for a Loop OrtKernelImpl created via OrtEpApi::CreateLoopKernel.
* \since Version 1.24.
*/
struct OrtLoopKernelHelper {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Called by ORT to release the OrtLoopKernelHelper instance and its resources.
*
* \param[in] this_ptr The OrtLoopKernelHelper instance.
*
* \since Version 1.24.
*/
ORT_API_T(void, Release, _In_ OrtLoopKernelHelper* this_ptr);
/** \brief Helper function that concatenates OrtValue instances from each loop iteration into a single
* pre-allocated output buffer.
*
* \note Implementing this function is required for all Loop opset versions.
*
* \param[in] this_ptr The OrtLoopKernelHelper instance.
* \param[in] stream_handle Optional native stream handle that enables asynchronous operations. May be NULL.
* \param[in] per_iteration_outputs Array of OrtValue instances from each iteration. All OrtValue elements have the
* same shape.
* \param[in] num_per_iteration_outputs The number of OrtValue* elements in the `per_iteration_outputs` array.
* \param[out] output The pre-allocated output buffer. Memory is allocated on the device for the EP running the
* Loop node.
* \param[in] output_size_in_bytes The size in bytes of the `output` buffer. It is guaranteed to be large enough
* to hold the concatenated data of each element in `per_iteration_outputs`.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ConcatOutput, _In_ OrtLoopKernelHelper* this_ptr, _In_opt_ void* stream_handle,
_In_reads_(num_per_iteration_outputs) const OrtValue* const* per_iteration_outputs,
_In_ size_t num_per_iteration_outputs, _Out_writes_bytes_all_(output_size_in_bytes) void* output,
_In_ size_t output_size_in_bytes);
};
struct OrtScanKernelHelper;
typedef struct OrtScanKernelHelper OrtScanKernelHelper;
/**
* \brief Contains helper functions for a Scan OrtKernelImpl created via OrtEpApi::CreateScanKernel.
* \since Version 1.24.
*/
struct OrtScanKernelHelper {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Called by ORT to release the OrtScanKernelHelper instance and its resources.
*
* \param[in] this_ptr The OrtScanKernelHelper instance.
*
* \since Version 1.24.
*/
ORT_API_T(void, Release, _In_ OrtScanKernelHelper* this_ptr);
/** \brief Helper function that transposes an OrtValue instance during execution of a Scan kernel.
*
* \note Called for Scan (opset >= 9) when the 'scan_input_axes' or 'scan_output_axes' attributes contain
* non-zero values. Implementing this function is required for Scan opset versions >= 9.
*
* \param[in] this_ptr The OrtScanKernelHelper instance.
* \param[in] permutation An array of integers that defines how the input tensor's axes should be permuted.
* \param[in] num_permutation_elems The number of integer elements in the `permutation` array.
* \param[in] input The input OrtValue tensor to transpose.
* \param[in] stream An optional OrtSyncStream instance to be used for asynchronous operations. May be NULL.
* \param[out] output The pre-allocated output OrtValue instance into which to store the results of the
* transpose operation. Must not be released as it is owned by ORT.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(Transpose, _In_ OrtScanKernelHelper* this_ptr,
_In_reads_(num_permutation_elems) const size_t* permutation, _In_ size_t num_permutation_elems,
_In_ const OrtValue* input, _In_opt_ OrtSyncStream* stream, _Inout_ OrtValue* output);
};
/**
* \brief The OrtEpApi struct provides functions that are relevant to the implementation of an execution provider.
*
* \since Version 1.22.
*/
struct OrtEpApi {
/** \brief Create an OrtEpDevice for the EP and an OrtHardwareDevice.
* \param[in] ep_factory Execution provider factory that is creating the instance.
* \param[in] hardware_device Hardware device that the EP can utilize.
* \param[in] ep_metadata Optional OrtKeyValuePairs instance for execution provider metadata that may be used
* during execution provider selection and passed to CreateEp.
* ep_device will copy this instance and the user should call ReleaseKeyValuePairs.
* \param[in] ep_options Optional OrtKeyValuePairs instance for execution provider options that will be added
* to the Session configuration options if the execution provider is selected.
* ep_device will copy this instance and the user should call ReleaseKeyValuePairs.
* \param ep_device OrtExecutionDevice that is created.
*
* \since Version 1.22.
*/
ORT_API2_STATUS(CreateEpDevice, _In_ OrtEpFactory* ep_factory,
_In_ const OrtHardwareDevice* hardware_device,
_In_opt_ const OrtKeyValuePairs* ep_metadata,
_In_opt_ const OrtKeyValuePairs* ep_options,
_Out_ OrtEpDevice** ep_device);
ORT_CLASS_RELEASE(EpDevice);
/** \brief Specify nodes that are supported by an OrtEp and should be fused into one node.
*
* Because the nodes will be fused into one "fused node", there must not exist an unsupported node in
* a path between two of the provided nodes. Otherwise, the graph will become invalid.
*
* This function can be called multiple times. A subsequent call to this function will force the next set of
* nodes to be fused into a different node.
*
* \param[in] graph_support_info OrtEpGraphSupportInfo instance to which to add the supported nodes.
* \param[in] nodes Array of nodes supported by the EP that should be fused/compiled.
* \param[in] num_nodes The number of supported nodes.
* \param[in] node_fusion_options Optional node fusion options. Ignored if set to NULL.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(EpGraphSupportInfo_AddNodesToFuse, _In_ OrtEpGraphSupportInfo* graph_support_info,
_In_reads_(num_nodes) const OrtNode* const* nodes, _In_ size_t num_nodes,
_In_opt_ const OrtNodeFusionOptions* node_fusion_options);
/** \brief Specify a node that is supported by an OrtEp and should be run with a registered EP kernel.
*
* \param[in] graph_support_info OrtEpGraphSupportInfo instance to which to add the supported node.
* \param[in] node The supported OrtNode instance.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(EpGraphSupportInfo_AddSingleNode, _In_ OrtEpGraphSupportInfo* graph_support_info,
_In_ const OrtNode* node);
/** \brief Query a OrtNodeComputeContext for the name of the node that encapsulates the compiled/fused node.
*
* Used in OrtNodeComputeInfo::CreateComputeState().
*
* \param[in] context The OrtNodeComputeContext instance to query.
* \return The node's name.
*
* \note Returned string is owned by ORT and valid only while OrtNodeComputeInfo::CreateComputeState() is called.
*
* \since Version 1.23.
*/
ORT_API_T(const char*, NodeComputeContext_NodeName, _In_ const OrtNodeComputeContext* context);
/** \brief Register an allocator with the OrtEpDevice.
*
* This allows an EP to provide OrtMemoryInfo for DEFAULT and HOST_ACCESSIBLE memory type as needed.
* The registered values will be used in calls to OrtEpFactory::CreateAllocator to ensure the required allocator/s
* are available for EP usage.
*
* Multiple calls for the same entry type will replace a previous entry.
*
* Available entries:
* - OrtDeviceAllocator with type of OrtDeviceMemoryType_DEFAULT
* - OrtDeviceAllocator with type of OrtDeviceMemoryType_HOST_ACCESSIBLE
* - OrtReadOnlyAllocator with type of OrtDeviceMemoryType_DEFAULT
* - if provided this allocator will only be used to copy initializers to the device the EP uses.
* ORT will use the OrtDeviceAllocator if not provided.
*
* \param[in] ep_device The OrtEpDevice instance to register the OrtMemoryInfo with.
* \param[in] allocator_memory_info The OrtMemoryInfo information for the allocator.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(EpDevice_AddAllocatorInfo, _In_ OrtEpDevice* ep_device,
_In_ const OrtMemoryInfo* allocator_memory_info);
/** \brief Get the OrtMemoryDevice from an OrtMemoryInfo instance.
*
* This is required for OrtDataTransferImpl (which implements onnxruntime::IDataTransfer) where the OrtMemoryDevice
* is used in the CanCopy and CopyTensors functions.
*
* \param[in] memory_info The OrtMemoryInfo instance to get the memory device from.
* \return The OrtMemoryDevice associated with the OrtMemoryInfo instance.
*
* \since Version 1.23.
*/
ORT_API_T(const OrtMemoryDevice*, MemoryInfo_GetMemoryDevice, _In_ const OrtMemoryInfo* memory_info);
/** \brief Get the OrtMemoryDevice from an OrtValue instance if it contains a Tensor.
*
* \param[in] value The OrtValue instance to get the memory device from.
* \return Memory device if OrtValue contains a Tensor, nullptr otherwise.
*
* \since Version 1.23.
*/
ORT_API_T(const OrtMemoryDevice*, Value_GetMemoryDevice, _In_ const OrtValue* value);
/** \brief Compare two OrtMemoryDevice instances for equality.
*
* This is used to check if two memory devices are the same.
* Used to implement DataTransferImpl::CanCopy.
*
* \param[in] a The first OrtMemoryDevice instance to compare.
* \param[in] b The second OrtMemoryDevice instance to compare.
* \return True if the two OrtMemoryDevice instances are equal, false otherwise.
*
* \since Version 1.23.
*/
ORT_API_T(bool, MemoryDevice_AreEqual, _In_ const OrtMemoryDevice* a, _In_ const OrtMemoryDevice* b);
/** \brief Get the OrtMemoryInfoDeviceType value from an OrtMemoryDevice instance.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The OrtMemoryInfoDeviceType value.
*
* \since Version 1.23.
*/
ORT_API_T(OrtMemoryInfoDeviceType, MemoryDevice_GetDeviceType, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the OrtDeviceMemoryType value from an OrtMemoryDevice instance.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The OrtDeviceMemoryType value.
*
* \since Version 1.23.
*/
ORT_API_T(OrtDeviceMemoryType, MemoryDevice_GetMemoryType, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the vendor ID from an OrtMemoryDevice instance.
*
* The vendor ID is used to identify the vendor of the device, and is typically set to the PCI vendor ID.
*
* If the device is not vendor specific (e.g. CPU memory) the vendor ID is set to 0.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The vendor ID value.
*
* \since Version 1.23.
*/
ORT_API_T(uint32_t, MemoryDevice_GetVendorId, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the device ID from an OrtMemoryDevice instance.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The device ID.
*
* \since Version 1.23.
*/
ORT_API_T(uint32_t, MemoryDevice_GetDeviceId, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the OrtSyncStreamImpl associated with an OrtSyncStream instance.
*
* This allows an the plugin library to connect its OrtSyncStreamImpl instance with an OrtSyncStream if needed.
*
* \param[in] stream The OrtSyncStream instance to find an OrtSyncStreamImpl for.
* \return The associated OrtSyncStreamImpl if found. nullptr otherwise.
*
* \since Version 1.23.
*
* \remarks There should always be an OrtSyncStreamImpl associated with an OrtSyncStream instance that the EP gets.
*/
ORT_API_T(const OrtSyncStreamImpl*, SyncStream_GetImpl, _In_ const OrtSyncStream* stream);
/** \brief Get the current sync ID for a stream.
*
* \param[in] stream The OrtSyncStream to get the sync ID for.
* \return Current sync ID.
*
* \since Version 1.23.
*/
ORT_API_T(uint64_t, SyncStream_GetSyncId, _In_ const OrtSyncStream* stream);
/** \brief Get the sync ID for the last time the consumer_stream waited on the producer_stream.
*
* When two streams are synchronized, the sync id represents the event used in that synchronization.
*
* \param[in] producer_stream The OrtSyncStream that produced the data.
* \param[in] consumer_stream The OrtSyncStream that waited on the producer_stream.
* \return ID for last sync. 0 if no sync has occurred between the two streams.
*
* \since Version 1.23.
*/
ORT_API_T(uint64_t, GetSyncIdForLastWaitOnSyncStream,
_In_ const OrtSyncStream* producer_stream, _In_ const OrtSyncStream* consumer_stream);
/** \brief Create an OrtHardwareDevice.
*
* \note Called within OrtEpFactory::GetSupportedDevices to create a new hardware device (e.g., virtual).
*
* \param[in] type The hardware device type.
* \param[in] vendor_id The hardware device's vendor identifier.
* \param[in] device_id The hardware device's identifier.
* \param[in] vendor_name The hardware device's vendor name as a null-terminated string. Copied by ORT.
* \param[in] metadata Optional OrtKeyValuePairs instance for hardware device metadata that may be queried by
* applications via OrtApi::GetEpDevices().
* Refer to onnxruntime_ep_device_ep_metadata_keys.h for common OrtHardwareDevice metadata keys.
* \param[out] hardware_device Output parameter set to the new OrtHardwareDevice instance that is created.
* Must be release with ReleaseHardwareDevice().
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(CreateHardwareDevice, _In_ OrtHardwareDeviceType type,
_In_ uint32_t vendor_id,
_In_ uint32_t device_id,
_In_ const char* vendor_name,
_In_opt_ const OrtKeyValuePairs* metadata,
_Out_ OrtHardwareDevice** hardware_device);
ORT_CLASS_RELEASE(HardwareDevice);
/** \brief Creates an empty kernel registry. A kernel registry contains kernel creation information for
* every operator kernel supported by an EP.
*
* \remarks Refer to OrtEp::GetKernelRegistry, which returns an EP's kernel registry to ORT.
*
* \param[out] kernel_registry Output parameter set to the new OrtKernelRegistry instance.
* Must be released with OrtEpApi::ReleaseKernelRegistry.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(CreateKernelRegistry, _Outptr_ OrtKernelRegistry** kernel_registry);
ORT_CLASS_RELEASE(KernelRegistry);
/** \brief Adds kernel creation information for a supported operator kernel to the given kernel registry.
*
* \remarks Refer to OrtEp::GetKernelRegistry, which returns an EP's kernel registry to ORT.
*
* \param[in] kernel_registry The OrtKernelRegistry instance.