Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 11 additions & 30 deletions src/driver/amdxdna/amdxdna_cma_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,16 +201,6 @@ bool amdxdna_use_cma(void)
#endif
}

static int get_cma_mem_index(u64 flags)
{
/*
* Extract lower 8 bits for memory index (0-255 range).
* Valid indexes: 0-15
* Invalid indexes (16-255): trigger fallback to default CMA region
*/
return flags & 0xFF;
}

static bool get_cacheable_flag(u64 flags)
{
return (flags & AMDXDNA_BO_FLAGS_CACHEABLE) != 0;
Expand All @@ -222,12 +212,11 @@ static bool get_cacheable_flag(u64 flags)
* @max_regions: Maximum number of regions in the array
* @fallback_dev: Device to use as final fallback (system default CMA)
* @size: Size of buffer to allocate
* @flags: Flags containing region index in bits [7:0]
* @flags: Cacheable and region index bitmap
*
* Attempts allocation in order:
* 1. Requested region (extracted from flags)
* 2. Other available initialized regions
* 3. System default CMA (fallback_dev)
* 1. Requested region/s (extracted from flags)
* 2. System default CMA (fallback_dev)
*
* Return: dma_buf pointer on success, ERR_PTR on failure
*/
Expand All @@ -237,28 +226,20 @@ struct dma_buf *amdxdna_get_cma_buf_with_fallback(struct device *const *region_d
size_t size, u64 flags)
{
struct dma_buf *dma_buf;
int mem_index;
bool cacheable;
int mem_index;
int i;

mem_index = get_cma_mem_index(flags);
cacheable = get_cacheable_flag(flags);
mem_index = (int)(flags & 0xFFULL);

/* Try requested region first */
if (mem_index < max_regions && region_devs[mem_index]) {
dma_buf = amdxdna_get_cma_buf(region_devs[mem_index], size, cacheable);
if (!IS_ERR(dma_buf))
return dma_buf;
}

/* Try any other available initialized region */
/* Try to allocate from the requested region(s) in bitmap order (bit 0, then 1, ...). */
for (i = 0; i < max_regions; i++) {
if (i == mem_index || !region_devs[i])
continue;

dma_buf = amdxdna_get_cma_buf(region_devs[i], size, cacheable);
if (!IS_ERR(dma_buf))
return dma_buf;
if ((mem_index & (1U << i)) && region_devs[i]) {
dma_buf = amdxdna_get_cma_buf(region_devs[i], size, cacheable);
if (!IS_ERR(dma_buf))
return dma_buf;
}
}

/* Final fallback to system default CMA */
Expand Down
2 changes: 1 addition & 1 deletion src/driver/amdxdna/amdxdna_drm.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "amdxdna_dpt.h"
#include "amdxdna_gem.h"

#define MAX_MEM_REGIONS 16
#define MAX_MEM_REGIONS 8

#define XDNA_INFO(xdna, fmt, args...) dev_info((xdna)->ddev.dev, fmt, ##args)
#define XDNA_WARN(xdna, fmt, args...) \
Expand Down
46 changes: 46 additions & 0 deletions src/driver/amdxdna/ve2_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,48 @@ static int ve2_get_clock_metadata(struct amdxdna_client *client, struct amdxdna_
return 0;
}

static int ve2_get_hwctx_mem_index(struct amdxdna_client *client,
struct amdxdna_drm_get_array *args)
{
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_ctx *hwctx;
u32 context_id;
u32 mem_index;
int idx;

/* Context ID is passed via element_size */
context_id = args->element_size;

XDNA_DBG(xdna, "Query mem_index for context_id=%u", context_id);

idx = srcu_read_lock(&client->ctx_srcu);
hwctx = xa_load(&client->ctx_xa, context_id);
if (!hwctx) {
XDNA_ERR(xdna, "Context %u not found", context_id);
srcu_read_unlock(&client->ctx_srcu, idx);
return -EINVAL;
}

if (!hwctx->priv) {
XDNA_ERR(xdna, "Context %u has no private data", context_id);
srcu_read_unlock(&client->ctx_srcu, idx);
return -EINVAL;
}

mem_index = hwctx->priv->mem_index;
srcu_read_unlock(&client->ctx_srcu, idx);

XDNA_DBG(xdna, "Returning mem_index=0x%x for context_id=%u", mem_index, context_id);

if (copy_to_user(u64_to_user_ptr(args->buffer), &mem_index, sizeof(mem_index))) {
XDNA_ERR(xdna, "Failed to copy mem_index to user");
return -EFAULT;
}

args->num_element = 1;
return 0;
}

int ve2_get_aie_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
{
struct amdxdna_dev *xdna = client->xdna;
Expand Down Expand Up @@ -759,6 +801,10 @@ int ve2_get_array(struct amdxdna_client *client, struct amdxdna_drm_get_array *a
case DRM_AMDXDNA_HWCTX_AIE_PART_FD:
ret = ve2_get_aie_part_fd(client, args);
break;
case DRM_AMDXDNA_HWCTX_MEM_INDEX:
XDNA_DBG(xdna, "Getting hardware context mem_index");
ret = ve2_get_hwctx_mem_index(client, args);
break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
Expand Down
2 changes: 2 additions & 0 deletions src/driver/amdxdna/ve2_host_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ struct ve2_hsa_queue {
// hq_lock protects [read | write]_index and reserved_write_index
struct mutex hq_lock;
u64 reserved_write_index;
/* Device used for host queue allocation */
struct device *alloc_dev;
};

/* handshake */
Expand Down
55 changes: 37 additions & 18 deletions src/driver/amdxdna/ve2_hwctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,17 +371,16 @@ static inline void hsa_queue_pkt_set_invalid(struct host_queue_packet *pkt)

static void ve2_free_hsa_queue(struct amdxdna_dev *xdna, struct ve2_hsa_queue *queue)
{
struct platform_device *pdev = to_platform_device(xdna->ddev.dev);

if (queue->hsa_queue_p) {
XDNA_DBG(xdna, "Freeing host queue: dma_addr=0x%llx",
queue->hsa_queue_mem.dma_addr);
dma_free_coherent(&pdev->dev,
dma_free_coherent(queue->alloc_dev,
sizeof(struct hsa_queue) + sizeof(u64) * HOST_QUEUE_ENTRY,
queue->hsa_queue_p,
queue->hsa_queue_mem.dma_addr);
queue->hsa_queue_p = NULL;
queue->hsa_queue_mem.dma_addr = 0;
queue->alloc_dev = NULL;
mutex_destroy(&queue->hq_lock);
}
}
Expand Down Expand Up @@ -474,24 +473,41 @@ void packet_dump(struct amdxdna_dev *xdna, struct hsa_queue *queue, u64 slot_id)
/*
* Create hsa queue in kernel and initialize queue slots.
*/
static int ve2_create_host_queue(struct amdxdna_dev *xdna, struct ve2_hsa_queue *queue)
static int ve2_create_host_queue(struct amdxdna_dev *xdna, struct amdxdna_ctx *hwctx,
struct ve2_hsa_queue *queue)
{
struct platform_device *pdev = to_platform_device(xdna->ddev.dev);
int nslots = HOST_QUEUE_ENTRY;
struct device *alloc_dev;
dma_addr_t dma_handle;
size_t alloc_size;

alloc_size = sizeof(struct hsa_queue) + sizeof(u64) * nslots;
XDNA_DBG(xdna, "Creating host queue: nslots=%d, alloc_size=%zu", nslots, alloc_size);

/* Allocate a single contiguous block of memory */
queue->hsa_queue_p = dma_alloc_coherent(&pdev->dev,
for (int i = 0; i < MAX_MEM_REGIONS; i++) {
alloc_dev = xdna->cma_region_devs[i];
if ((hwctx->priv->mem_index & (1 << i)) && alloc_dev) {
queue->hsa_queue_p = dma_alloc_coherent(alloc_dev, alloc_size, &dma_handle, GFP_KERNEL);
if (!queue->hsa_queue_p)
continue;
queue->alloc_dev = alloc_dev;
break;
}
}

/* if no allocation was successful, allocate from the default device */
if (!queue->hsa_queue_p) {
queue->hsa_queue_p = dma_alloc_coherent(&pdev->dev,
alloc_size,
&dma_handle,
GFP_KERNEL);
if (!queue->hsa_queue_p) {
XDNA_ERR(xdna, "Failed to allocate host queue memory, size=%zu", alloc_size);
return -ENOMEM;
if (!queue->hsa_queue_p) {
XDNA_ERR(xdna, "Failed to allocate host queue memory, size=%zu", alloc_size);
return -ENOMEM;
}
queue->alloc_dev = &pdev->dev;
}

/* Initialize mutex here */
Expand Down Expand Up @@ -1234,17 +1250,20 @@ int ve2_hwctx_init(struct amdxdna_ctx *hwctx)
hwctx->priv = priv;
init_waitqueue_head(&priv->waitq);

/* one host_queue entry per hwctx */
ret = ve2_create_host_queue(xdna, &priv->hwctx_hsa_queue);
ret = ve2_xrs_request(xdna, hwctx);
if (ret) {
XDNA_ERR(xdna, "Failed to create host queue, ret=%d", ret);
goto free_priv;
XDNA_ERR(xdna, "XRS resource request failed, ret=%d", ret);
goto cleanup_priv;
}

ret = ve2_xrs_request(xdna, hwctx);
/* Auto-select memory index based on start_col */
ve2_auto_select_mem_index(xdna, hwctx);
Comment on lines 1253 to 1260
Copy link

Copilot AI Feb 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The automatic mem_index selection logic introduced by ve2_auto_select_mem_index() lacks test coverage. Consider adding tests to verify correct mem_index assignment for various start_col values and topology configurations.

Copilot uses AI. Check for mistakes.

/* one host_queue entry per hwctx */
ret = ve2_create_host_queue(xdna, hwctx, &priv->hwctx_hsa_queue);
if (ret) {
XDNA_ERR(xdna, "XRS resource request failed, ret=%d", ret);
goto free_hsa_queue;
XDNA_ERR(xdna, "Failed to create host queue, ret=%d", ret);
goto cleanup_xrs;
}

if (enable_polling) {
Expand All @@ -1267,9 +1286,9 @@ int ve2_hwctx_init(struct amdxdna_ctx *hwctx)

return 0;

free_hsa_queue:
ve2_free_hsa_queue(xdna, &hwctx->priv->hwctx_hsa_queue);
free_priv:
cleanup_xrs:
ve2_mgmt_destroy_partition(hwctx);
cleanup_priv:
kfree(hwctx->priv);

return ret;
Expand Down
Loading