// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2020 MediaTek Inc. */ #include "vpu_cfg.h" #include "vpu_hw.h" #include #include #include #include #include #include #include #include #include #include "vpu_algo.h" #include "vpu_cmd.h" #include "vpu_mem.h" #include "vpu_power.h" #include "vpu_debug.h" #include "vpu_cmn.h" #include "vpu_dump.h" #include "vpu_trace.h" #include "vpu_met.h" #include "vpu_tag.h" #define CREATE_TRACE_POINTS #include "vpu_events.h" #define VPU_TS_INIT(a) \ struct timespec64 a##_s, a##_e #define VPU_TS_START(a) \ ktime_get_ts64(&a##_s) #define VPU_TS_END(a) \ ktime_get_ts64(&a##_e) #define VPU_TS_NS(a) \ ((uint64_t)(timespec64_to_ns(&a##_e) - timespec64_to_ns(&a##_s))) #define VPU_TS_US(a) \ (VPU_TS_NS(a) / 1000) static int wait_idle(struct vpu_device *vd, uint32_t latency, uint32_t retry); static int vpu_set_ftrace(struct vpu_device *vd); static void vpu_run(struct vpu_device *vd, int prio, uint32_t cmd) { struct vpu_register *r = vd_reg(vd); vpu_reg_write(vd, xtensa_info01, cmd); vpu_cmd_run(vd, prio, cmd); vpu_reg_clr(vd, ctrl, r->stall); } static inline void __vpu_stall(struct vpu_device *vd) { struct vpu_register *r = vd_reg(vd); vpu_reg_set(vd, ctrl, r->stall); } static inline void vpu_stall(struct vpu_device *vd) { if (xos_type(vd) == VPU_NON_XOS) __vpu_stall(vd); /* XOS doesn't need stall */ } static void vpu_cmd(struct vpu_device *vd, int prio, uint32_t cmd) { vpu_cmd_debug("%s: vpu%d: cmd: 0x%x (%s)\n", __func__, vd->id, cmd, vpu_debug_cmd_str(cmd)); vpu_run(vd, prio, cmd); wmb(); /* make sure register committed */ vpu_reg_set(vd, ctl_xtensa_int, 1); } #define XOS_UNLOCKED 0 #define XOS_LOCKED 1 /** * vpu_xos_lock() - lock vpu for control * * @vd: vpu device * * Returns: * 0: lock succeed * -ETIME: lock timeout * -EAGAIN: crashed by other priority */ static int vpu_xos_lock(struct vpu_device *vd) { int s = XOS_LOCKED; uint64_t t; int ret = 0; VPU_TS_INIT(xos); VPU_TS_START(xos); if (xos_type(vd) != VPU_XOS) return 0; while (1) { s = atomic_cmpxchg(&vd->xos_state, XOS_UNLOCKED, XOS_LOCKED); if (s == XOS_UNLOCKED) break; VPU_TS_END(xos); t = VPU_TS_US(xos); if (t >= XOS_TIMEOUT_US) { pr_info("%s: vpu%d: xos lock timeout: %lld us\n", __func__, vd->id, t); break; } } /* Other priorites had timeout, shutdown by vpu_excp */ if (s == XOS_UNLOCKED) { if ((vd->state <= VS_DOWN) || (vd->state >= VS_REMOVING)) { ret = -EAGAIN; atomic_set(&vd->xos_state, XOS_UNLOCKED); } } /* XOS lock timeout */ else if (s == XOS_LOCKED) ret = -ETIME; VPU_TS_END(xos); vpu_cmd_debug("%s: vpu%d: %d, %lld us\n", __func__, vd->id, ret, VPU_TS_US(xos)); return ret; } /** * vpu_xos_unlock - unlock xos when forced power down * @vd: vpu device */ static void vpu_xos_unlock(struct vpu_device *vd) { if (xos_type(vd) != VPU_XOS) return; vpu_cmd_debug("%s: vpu%d\n", __func__, vd->id); atomic_set(&vd->xos_state, XOS_UNLOCKED); } /** * vpu_xos_wait_idle - wait for xos idle when forced power down * @vd: vpu device * * Retruns -EBUSY, if device is still busy after * (WAIT_XOS_LATENCY_US * WAIT_XOS_RETRY) us */ static int vpu_xos_wait_idle(struct vpu_device *vd) { struct vpu_config *cfg = vd_cfg(vd); if (cfg->xos != VPU_XOS) return 0; return wait_idle(vd, cfg->wait_xos_latency_us, cfg->wait_xos_retry); } /** * vpu_reg_lock() - lock vpu register for control * * @vd: vpu device * @boot: called at boot sequence, Eq. vd->lock is acquired * @flags: saved irq flags * * Returns: * 0: lock succeed * -ETIME: lock timeout * -EAGAIN: crashed by other priority */ static inline int vpu_reg_lock(struct vpu_device *vd, bool boot, unsigned long *flags) { int ret = 0; struct vpu_sys_ops *sops = vd_sops(vd); if (sops->xos_lock) ret = sops->xos_lock(vd); if (ret == 0) { spin_lock_irqsave(&vd->reg_lock, (*flags)); } else if (ret == -ETIME) { if (boot) { vpu_excp_locked(vd, NULL, "VPU Timeout", "vpu%d: XOS lock timeout (boot)\n", vd->id); } else { vpu_excp(vd, NULL, "VPU Timeout", "vpu%d: XOS lock timeout\n", vd->id); } } else if (ret == -EAGAIN) { pr_info("%s: vpu%d: failed caused by other thread\n", __func__, vd->id); } return ret; } static inline void vpu_reg_unlock(struct vpu_device *vd, unsigned long *flags) { spin_unlock_irqrestore(&vd->reg_lock, (*flags)); } static int wait_idle(struct vpu_device *vd, uint32_t latency, uint32_t retry) { uint32_t st = 0; uint32_t pwait = 0; unsigned int count = 0; struct vpu_register *r = vd_reg(vd); do { st = vpu_reg_read(vd, done_st); count++; pwait = !!(st & r->pwaitmode); if (pwait) return 0; udelay(latency); trace_vpu_wait(vd->id, st, vpu_reg_read(vd, xtensa_info00), vpu_reg_read(vd, xtensa_info25), vpu_reg_read(vd, debug_info05)); } while (count < retry); pr_info("%s: vpu%d: %d us: done_st: 0x%x, pwaitmode: %d, info00: 0x%x, info25: 0x%x\n", __func__, vd->id, (latency * retry), st, pwait, vpu_reg_read(vd, xtensa_info00), vpu_reg_read(vd, xtensa_info25)); return -EBUSY; } static int wait_command(struct vpu_device *vd, int prio_s) { int ret = 0; unsigned int prio = prio_s; bool retry = true; struct vpu_config *cfg = vd_cfg(vd); start: ret = wait_event_interruptible_timeout( vd->cmd[prio].wait, vd->cmd[prio].done, msecs_to_jiffies(vd->cmd_timeout)); if (ret == -ERESTARTSYS) { pr_info("%s: vpu%d: interrupt by signal: ret=%d\n", __func__, vd->id, ret); if (retry) { pr_info("%s: vpu%d: try wait again\n", __func__, vd->id); retry = false; goto start; } goto out; } /* Other priorites had timeout, shutdown by vpu_excp */ if (vd->state <= VS_DOWN) { ret = -EAGAIN; goto out; } /* command success */ if (ret) { ret = 0; goto out; } /* timeout handling */ ret = -ETIMEDOUT; /* debug: check PWAITMODE */ wait_idle(vd, cfg->wait_cmd_latency_us, cfg->wait_cmd_retry); out: return ret; } static void vpu_exit_dev_algo_general(struct vpu_device *vd, struct vpu_algo_list *al) { struct __vpu_algo *alg; struct list_head *ptr, *tmp; if (!al) return; vpu_alg_debug("%s: %s\n", __func__, al->name); list_for_each_safe(ptr, tmp, &al->a) { alg = list_entry(ptr, struct __vpu_algo, list); al->ops->put(alg); } } static void *bin_header_legacy(int i) { struct vpu_img_hdr_legacy *hl; unsigned long bin = (unsigned long)vpu_drv->bin_va; hl = (struct vpu_img_hdr_legacy *)(bin + vpu_drv->vp->cfg->bin_ofs_header); return &hl[i]; } static struct vpu_algo_info *bin_alg_info_legacy(void *h, int j) { struct vpu_img_hdr_legacy *hl = h; return &hl->algo_infos[j]; } static int bin_alg_info_cnt_legacy(void *h) { struct vpu_img_hdr_legacy *hl = h; return hl->algo_info_count; } static void *bin_header_preload(int index) { int i; uint64_t ptr = (unsigned long)vpu_drv->bin_va; struct vpu_img_hdr_preload *hp; ptr += vpu_drv->bin_head_ofs; hp = (void *)ptr; for (i = 0; i < index; i++) { ptr += hp->header_size; hp = (void *)ptr; } return (void *)hp; } static struct vpu_algo_info *bin_algo_info_preload( void *h, int j) { struct vpu_img_hdr_preload *hp = h; struct vpu_algo_info *algo_info; algo_info = (void *)((unsigned long)hp + hp->alg_info); return &algo_info[j]; } static int bin_alg_info_cnt_preload(void *h) { struct vpu_img_hdr_preload *hp = h; return hp->algo_info_count; } static uint32_t bin_pre_info_preload(void *h) { struct vpu_img_hdr_preload *hp = h; return hp->pre_info; } static int bin_pre_info_cnt_preload(void *h) { struct vpu_img_hdr_preload *hp = h; return hp->pre_info_count; } static int preload_iova_check(struct vpu_iova *i) { unsigned int bin_end = vpu_drv->bin_pa + vpu_drv->bin_size; #define is_align_4k(x) ((x & 0xFFF)) #define is_align_64k(x) ((x & 0xFFFF)) if (is_align_64k(i->size) && i->addr) { pr_info("%s: size(0x%x) not 64k aligned\n", __func__, i->size); return -EINVAL; } if (is_align_4k(i->addr) || is_align_4k(i->size) || is_align_4k(i->bin)) { pr_info("%s: addr/size not 4k aligned\n", __func__); return -EINVAL; } if ((i->bin + i->size) > bin_end) { pr_info("%s: wrong size\n", __func__); return -EINVAL; } #undef is_align_4k #undef is_align_64k return 0; } static dma_addr_t preload_iova_alloc( struct vpu_device *vd, struct vpu_iova *vi, uint32_t addr, uint32_t size, uint32_t bin) { dma_addr_t mva; vi->addr = addr; vi->size = size; vi->bin = bin; if (preload_iova_check(vi)) return 0; mva = vd_mops(vd)->alloc(vd->dev, vi); if (!mva) pr_info("%s: vpu%d: iova allcation failed\n", __func__, vd->id); vpu_drv_debug( "%s: vpu%d: addr:0x%X, size: 0x%X, bin: 0x%X, mva: 0x%lx\n", __func__, vd->id, vi->addr, vi->size, vi->bin, (unsigned long) mva); return mva; } static int vpu_algo_cmp(void *priv, const struct list_head *a, const struct list_head *b) { struct __vpu_algo *la, *lb; la = list_entry(a, struct __vpu_algo, list); lb = list_entry(b, struct __vpu_algo, list); return strcmp(la->a.name, lb->a.name); } static int vpu_init_algo_info(struct vpu_device *vd, struct vpu_iova *iova_algo_info, struct vpu_algo_list *al) { struct algo_head *head; struct algo_list *algo, *array; struct __vpu_algo *alg, *tmp; dma_addr_t iova = 0; unsigned int size = al->cnt * sizeof(struct algo_list) + sizeof(struct algo_head); struct vpu_mem_ops *mops = vd_mops(vd); if (iova_algo_info->m.va) return 0; memset(iova_algo_info, 0, sizeof(struct vpu_iova)); iova_algo_info->size = __ALIGN_KERNEL(size, 0x1000); iova_algo_info->bin = 0xFFFFFFFF; iova = mops->alloc(vd->dev, iova_algo_info); if (!iova) { pr_info("%s: vpu%d: iova allcation failed\n", __func__, vd->id); return 0; } head = (struct algo_head *)iova_algo_info->m.va; memset(head, 0, size); head->size = sizeof(struct algo_head); head->id = vd->id; head->number = al->cnt; array = (void *)((unsigned long)head + sizeof(struct algo_head)); algo = array; spin_lock(&al->lock); list_sort(NULL, &al->a, vpu_algo_cmp); list_for_each_entry_safe(alg, tmp, &al->a, list) { strncpy(algo->name, alg->a.name, (ALGO_NAMELEN - 1)); algo->mva = alg->a.mva; algo->len = alg->a.len; algo->entry_off = alg->a.entry_off; algo->iram_len = alg->a.iram_len; algo->iram_mva = alg->a.iram_mva; if (algo->entry_off) head->preload = 1; vpu_drv_debug("%s: vpu%d algo(0x%lX): %s, va: 0x%08X, len: 0x%X, entry: 0x%X\n", __func__, vd->id, (unsigned long)algo, algo->name, algo->mva, algo->len, algo->entry_off); algo++; } spin_unlock(&al->lock); mops->sync_for_device(vd->dev, iova_algo_info); vpu_drv_debug("%s: vpu%d [%s] iova: 0x%lX, size: 0x%X, num: %d\n", __func__, vd->id, (head->preload) ? "preload" : "normal", (unsigned long)iova, size, vd->aln.cnt); return 0; } static int vpu_free_algo_info(struct vpu_device *vd) { struct vpu_mem_ops *mops = vd_mops(vd); if (!vd->iova_algo_info.m.va) return 0; mops->alloc(vd->dev, &vd->iova_algo_info); mops->alloc(vd->dev, &vd->iova_preload_info); vd->iova_algo_info.m.va = 0; vd->iova_preload_info.m.va = 0; return 0; } #define PRELOAD_IRAM 0xFFFFFFFF static uint32_t vpu_init_dev_algo_preload_entry( struct vpu_device *vd, struct vpu_algo_list *al, struct vpu_pre_info *info, uint32_t bin) { struct __vpu_algo *alg; struct vpu_iova dummy_iova; struct vpu_iova *vi; uint64_t mva; uint32_t addr; uint32_t size = __ALIGN_KERNEL(info->file_sz, 0x1000); alg = al->ops->get(al, info->name, NULL); /* algo is already existed in the list */ if (alg) { al->ops->put(alg); if (info->pAddr == PRELOAD_IRAM) { addr = 0; /* dynamic alloc iova */ vi = &alg->iram; mva = preload_iova_alloc(vd, vi, addr, size, info->off); alg->a.iram_mva = mva; goto added; } /* other segments had been merged into EXE_SEG */ size = 0; goto out; } /* add new algo to the list */ addr = info->start_addr & 0xFFFF0000; /* static alloc iova */ if (info->info) size = info->info; // already aligned at packing stage alg = vpu_alg_alloc(al); if (!alg) goto out; al->cnt++; strncpy(alg->a.name, info->name, (ALGO_NAMELEN - 1)); if (info->flag & 0x1 /* EXE_SEG */) { vi = &alg->prog; alg->a.entry_off = info->pAddr - addr; } else { size = 0; vi = &dummy_iova; pr_info("%s: vpu%d: unexpected segment: flags: %x\n", __func__, vd->id, info->flag); } mva = preload_iova_alloc(vd, vi, addr, size, info->off); alg->a.mva = mva; if (!alg->a.mva) { vpu_alg_free(alg); goto out; } alg->builtin = true; list_add_tail(&alg->list, &al->a); added: vpu_drv_debug("%s: vpu%d(%xh): %s <%s>: off: %x, mva: %llx, size: %x, addr: %x\n", __func__, vd->id, info->vpu_core, info->name, (info->pAddr == PRELOAD_IRAM) ? "IRAM" : "PROG", info->off, mva, size, addr); out: return bin + size; } static int vpu_init_dev_algo_preload( struct vpu_device *vd, struct vpu_algo_list *al) { int i, j, ret = 0; uint32_t offset; struct vpu_pre_info *info = NULL; struct vpu_bin_ops *bops = vd_bops(vd); if (!bops->pre_info || !bops->pre_info_cnt) return 0; vpu_algo_list_init(vd, al, &vpu_prelaod_aops, "Preload"); offset = vpu_drv->bin_preload_ofs; for (i = 0; i < VPU_NUMS_IMAGE_HEADER; i++) { void *h = bops->header(i); int cnt; info = (void *)((unsigned long)h + bops->pre_info(h)); cnt = bops->pre_info_cnt(h); for (j = 0; j < cnt; j++, info++) { if (!((info->vpu_core & 0xF) & (1 << vd->id))) continue; offset = vpu_init_dev_algo_preload_entry( vd, al, info, offset); } } vpu_init_algo_info(vd, &vd->iova_preload_info, al); return ret; } static int vpu_init_dev_algo_normal( struct vpu_device *vd, struct vpu_algo_list *al) { int i, j; int ret = 0; unsigned int mva; struct vpu_algo_info *algo_info; struct vpu_bin_ops *bops = vd_bops(vd); vpu_algo_list_init(vd, al, &vpu_normal_aops, "Normal"); /* for each algo in the image header, add them to device's algo list */ for (i = 0; i < VPU_NUMS_IMAGE_HEADER; i++) { void *h = bops->header(i); int cnt = bops->alg_info_cnt(h); for (j = 0; j < cnt; j++) { struct __vpu_algo *alg; algo_info = bops->alg_info(h, j); mva = algo_info->offset - vpu_drv->iova_algo.bin + vpu_drv->mva_algo; /* skips, if the core mask mismatch */ if (!((algo_info->vpu_core & 0xF) & (1 << vd->id))) continue; vpu_drv_debug("%s: vpu%d(%xh): %s: off: %x, mva: %x, len: %x\n", __func__, vd->id, algo_info->vpu_core, algo_info->name, algo_info->offset, mva, algo_info->length); alg = vpu_alg_alloc(al); if (!alg) { ret = -ENOMEM; goto out; } strncpy(alg->a.name, algo_info->name, (ALGO_NAMELEN - 1)); alg->a.mva = mva; alg->a.len = algo_info->length; alg->builtin = true; list_add_tail(&alg->list, &al->a); al->cnt++; } vpu_drv_debug("%s: vpu%d, total algo count: %d\n", __func__, vd->id, al->cnt); } vpu_init_algo_info(vd, &vd->iova_algo_info, al); out: return ret; } /* called by vpu_probe() */ int vpu_init_dev_algo(struct platform_device *pdev, struct vpu_device *vd) { int ret; ret = vpu_init_dev_algo_normal(vd, &vd->aln); if (ret) goto out; if (bin_type(vd) == VPU_IMG_PRELOAD) ret = vpu_init_dev_algo_preload(vd, &vd->alp); out: return ret; } /* called by vpu_remove() */ void vpu_exit_dev_algo(struct platform_device *pdev, struct vpu_device *vd) { vpu_exit_dev_algo_general(vd, &vd->aln); if (bin_type(vd) == VPU_IMG_PRELOAD) vpu_exit_dev_algo_general(vd, &vd->alp); vpu_free_algo_info(vd); } static wait_queue_head_t *vpu_isr_check_cmd_xos(struct vpu_device *vd, uint32_t inf, uint32_t prio) { wait_queue_head_t *wake = NULL; uint32_t ret = 0; if ((DS_PREEMPT_DONE | DS_ALG_DONE | DS_ALG_RDY | DS_DSP_RDY | DS_DBG_RDY | DS_FTRACE_RDY) & inf) { ret = vpu_reg_read(vd, xtensa_info00); ret = (ret >> (prio * 8)) & 0xFF; vpu_cmd_done(vd, prio, ret, vpu_reg_read(vd, xtensa_info02)); wake = vpu_cmd_waitq(vd, prio); } #define DS(a) \ ((inf & DS_##a) ? ":"#a : "") vpu_cmd_debug( "%s: vpu%d: wake: %d, val: %d, prio: %d, %xh%s%s%s%s%s%s%s%s\n", __func__, vd->id, (wake) ? 1 : 0, ret, prio, inf, DS(DSP_RDY), /* boot-up done */ DS(DBG_RDY), /* set-debug done */ DS(ALG_RDY), /* do-loader done */ DS(ALG_DONE), /* d2d done */ DS(ALG_GOT), /* get-algo done */ DS(PREEMPT_RDY), /* context switch done */ DS(PREEMPT_DONE), /* d2d-ext done */ DS(FTRACE_RDY) /* set-ftrace done */); #undef DS vd->dev_state = inf; /* for debug */ return wake; } static bool vpu_isr_check_unlock_xos(uint32_t inf) { return (DS_PREEMPT_RDY & inf); } static wait_queue_head_t *vpu_isr_check_cmd(struct vpu_device *vd, uint32_t inf, uint32_t prio) { wait_queue_head_t *wake = NULL; uint32_t ret = 0; ret = vpu_reg_read(vd, xtensa_info00); if (ret != VPU_STATE_BUSY) { ret = vpu_reg_read(vd, xtensa_info00); vpu_cmd_done(vd, prio, ret, vpu_reg_read(vd, xtensa_info02)); wake = vpu_cmd_waitq(vd, prio); } vpu_cmd_debug( "%s: vpu%d: wake: %d, val: %d, prio: %d, %xh\n", __func__, vd->id, (wake) ? 1 : 0, ret, prio, inf); vd->dev_state = inf; /* for debug */ return wake; } static bool vpu_isr_check_unlock(uint32_t inf) { return true; } static irqreturn_t vpu_isr(int irq, void *dev_id) { struct vpu_device *vd = (struct vpu_device *)dev_id; struct vpu_sys_ops *sops = vd_sops(vd); uint32_t ret, req_cmd, prio, inf, val; wait_queue_head_t *waitq = NULL; bool unlock = false; /* INFO17: device state */ ret = vpu_reg_read(vd, xtensa_info17); vpu_reg_write(vd, xtensa_info17, 0); req_cmd = ret & 0xFFFF; /* request cmd */ inf = ret & 0x00FF0000; /* dev state */ prio = (ret >> 28) & 0xF; /* priroity */ vpu_cmd_debug("%s: vpu%d: INFO17: %0xh, req: %xh, inf: %xh, prio: %xh\n", __func__, vd->id, ret, req_cmd, inf, prio); if (req_cmd == VPU_REQ_DO_CHECK_STATE) { waitq = sops->isr_check_cmd(vd, inf, prio); unlock = sops->isr_check_unlock(inf); } /* MET */ vpu_met_isr(vd); /* clear int */ val = vpu_reg_read(vd, xtensa_int); if (val) { vpu_cmd_debug("%s: vpu%d: xtensa_int = (%d)\n", __func__, vd->id, val); } vpu_reg_write(vd, xtensa_int, 1); if (unlock && sops->xos_unlock) sops->xos_unlock(vd); if (waitq && (waitqueue_active(waitq))) /* cmd wait */ wake_up_interruptible(waitq); return IRQ_HANDLED; } // vd->cmd_lock, should be acquired before calling this function static int vpu_execute_d2d(struct vpu_device *vd, struct vpu_request *req) { int ret; int result = 0; uint32_t cmd = 0; unsigned long flags; uint64_t start_t; VPU_TS_INIT(d2d); VPU_TS_START(d2d); start_t = sched_clock(); req->busy_time = 0; req->algo_ret = 0; vpu_cmd_debug("%s: vpu%d: prio: %d, %s: bw: %d, buf_cnt: %x, sett: %llx +%x\n", __func__, vd->id, req->prio, vpu_cmd_alg_name(vd, req->prio), req->power_param.bw, req->buffer_count, req->sett_ptr, req->sett_length); ret = vpu_cmd_buf_set(vd, req->prio, req->buffers, sizeof(struct vpu_buffer) * req->buffer_count); if (ret) goto out; ret = vpu_reg_lock(vd, false, &flags); if (ret) goto out; /* D2D_EXT: preload algorithm */ if (VPU_REQ_FLAG_TST(req, ALG_PRELOAD)) { struct __vpu_algo *pre; vd->state = VS_CMD_D2D_EXT; cmd = VPU_CMD_DO_D2D_EXT; pre = vpu_cmd_alg(vd, req->prio); vpu_reg_write(vd, xtensa_info11, req->prio); vpu_reg_write(vd, xtensa_info16, pre->a.mva + pre->a.entry_off); vpu_reg_write(vd, xtensa_info19, pre->a.iram_mva); } else { /* D2D: normal algorithm */ vpu_reg_write(vd, xtensa_info11, 0); vd->state = VS_CMD_D2D; cmd = VPU_CMD_DO_D2D; } vpu_reg_write(vd, xtensa_info12, req->buffer_count); vpu_reg_write(vd, xtensa_info13, vpu_cmd_buf_iova(vd, req->prio)); vpu_reg_write(vd, xtensa_info14, req->sett_ptr); vpu_reg_write(vd, xtensa_info15, req->sett_length); vpu_trace_begin("vpu_%d|hw_processing_request(%s),prio:%d", vd->id, req->algo, req->prio); vpu_cmd(vd, req->prio, cmd); vpu_reg_unlock(vd, &flags); ret = wait_command(vd, req->prio); vpu_stall(vd); vpu_trace_end("vpu_%d|hw_processing_request(%s),prio:%d", vd->id, req->algo, req->prio); result = vpu_cmd_result(vd, req->prio); req->status = result ? VPU_REQ_STATUS_FAILURE : VPU_REQ_STATUS_SUCCESS; if ((ret == -ERESTARTSYS) || (ret == -EAGAIN)) goto err_cmd; /* Error handling */ if (ret) { req->status = VPU_REQ_STATUS_TIMEOUT; vpu_excp(vd, req, "VPU Timeout", "vpu%d: request (%s) timeout, priority: %d, algo: %s\n", vd->id, (cmd == VPU_CMD_DO_D2D_EXT) ? "D2D_EXT" : "D2D", req->prio, req->algo); goto err_cmd; } err_cmd: req->algo_ret = vpu_cmd_alg_ret(vd, req->prio); out: VPU_TS_END(d2d); req->busy_time = VPU_TS_NS(d2d); trace_vpu_cmd(vd->id, req->prio, req->algo, cmd, vpu_cmd_boost(vd, req->prio), start_t, ret, req->algo_ret, result); vpu_cmd_debug("%s: vpu%d: prio: %d, %s: time: %llu ns, ret: %d, alg_ret: %d, result: %d\n", __func__, vd->id, req->prio, req->algo, req->busy_time, ret, req->algo_ret, result); return ret; } // reference: vpu_boot_up // vd->cmd_lock, should be acquired before calling this function int vpu_dev_boot(struct vpu_device *vd) { int ret = 0; if (vd->state <= VS_DOWN) { pr_info("%s: unexpected state: %d\n", __func__, vd->state); ret = -ENODEV; goto err; } if (vd->state != VS_UP && vd->state != VS_BOOT) return ret; vd->state = VS_BOOT; vd->dev_state = 0; vpu_trace_begin("vpu_%d|%s", vd->id, __func__); /* VPU boot up sequence */ ret = vpu_dev_boot_sequence(vd); if (ret) { pr_info("%s: vpu_dev_boot_sequence: %d\n", __func__, ret); goto err; } /* VPU set debug log */ ret = vpu_dev_set_debug(vd); if (ret) { pr_info("%s: vpu_dev_set_debug: %d\n", __func__, ret); goto err; } /* MET: ftrace setup */ ret = vpu_set_ftrace(vd); if (ret) { pr_info("%s: vpu_met_set_ftrace: %d\n", __func__, ret); goto err; } /* MET: pm setup */ vpu_met_pm_get(vd); vd->state = VS_IDLE; err: vpu_trace_end("vpu_%d|%s", vd->id, __func__); return ret; } /** * vpu_execute - execute a vpu request * @vd: vpu device * @req: vpu request * * returns: 0: success, otherwise: fail */ int vpu_execute(struct vpu_device *vd, struct vpu_request *req) { int ret = 0; int ret_pwr = 0; int boost = 0; uint64_t flags; struct vpu_algo_list *al; enum vpu_state state; vpu_cmd_lock(vd, req->prio); /* Backup/Restore request flags for elevate to preload algo */ flags = req->flags; /* Bootup VPU */ mutex_lock_nested(&vd->lock, VPU_MUTEX_DEV); if (!vd_is_available(vd)) { ret_pwr = -ENODEV; goto nodev; } boost = vpu_cmd_boost_set(vd, req->prio, req->power_param.boost_value); ret_pwr = vpu_pwr_get_locked(vd, boost); if (!ret_pwr) ret = vpu_dev_boot(vd); /* Backup original state */ state = vd->state; nodev: mutex_unlock(&vd->lock); if (ret_pwr || (ret == -ETIMEDOUT)) goto err_remove; if (ret) goto err_boot; /* Load Algorithm (DO_LOADER) */ if (!req->prio && !VPU_REQ_FLAG_TST(req, ALG_PRELOAD) && !VPU_REQ_FLAG_TST(req, ALG_RELOAD) && vd->cmd[0].alg && (vd->cmd[0].alg->al == &vd->aln) && (!strcmp(vd->cmd[0].alg->a.name, req->algo))) goto send_req; al = VPU_REQ_FLAG_TST(req, ALG_PRELOAD) ? (&vd->alp) : (&vd->aln); retry: ret = al->ops->load(al, req->algo, NULL, req->prio); /* Elevate to preloaded algo, if normal algo was not found */ if ((ret == -ENOENT) && (al == &vd->aln)) { VPU_REQ_FLAG_SET(req, ALG_PRELOAD); al = &vd->alp; goto retry; } if (ret) { pr_info("%s: vpu%d: \"%s\" was not found\n", __func__, al->vd->id, req->algo); goto err_alg; } send_req: /* Send Request (DO_D2D) */ ret = vpu_execute_d2d(vd, req); err_alg: if (ret == -ETIMEDOUT) goto err_remove; err_boot: mutex_lock_nested(&vd->lock, VPU_MUTEX_DEV); /* Skip, if other priorities had timeout and powered down */ if (vd->state != VS_DOWN) { /* There's no more active commands => idle */ if (!atomic_read(&vd->cmd_active)) vd->state = VS_IDLE; /* Otherwise, restore to original state */ else vd->state = state; boost = vpu_cmd_boost_put(vd, req->prio); vpu_pwr_put_locked(vd, boost); } mutex_unlock(&vd->lock); err_remove: req->flags = flags; vpu_cmd_unlock(vd, req->prio); return ret; } /** * vpu_preempt - Run vpu request of a preloaded algorithm * @vd: vpu device * @req: vpu request * * returns: 0: success, otherwise: fail */ int vpu_preempt(struct vpu_device *vd, struct vpu_request *req) { int ret = 0; int prio; prio = atomic_inc_return(&vd->cmd_prio); if (prio >= vd->cmd_prio_max) { ret = -EPERM; goto out; } req->prio = prio; ret = vpu_execute(vd, req); out: atomic_dec(&vd->cmd_prio); return ret; } /* driver hw init */ int vpu_init_drv_hw(void) { return 0; } /* device hw init */ int vpu_init_dev_hw(struct platform_device *pdev, struct vpu_device *vd) { int ret = 0; struct vpu_sys_ops *sops = vd_sops(vd); struct vpu_cmd_ops *cmd_ops = vd_cmd_ops(vd); ret = request_irq(vd->irq_num, sops->isr, irq_get_trigger_type(vd->irq_num), vd->name, vd); if (ret) { pr_info("%s: %s: fail to request irq: %d\n", __func__, vd->name, ret); goto out; } mutex_init(&vd->lock); spin_lock_init(&vd->reg_lock); if (sops->xos_unlock) sops->xos_unlock(vd); ret = cmd_ops->init(vd); if (ret) { pr_info("%s: %s: fail to init commands: %d\n", __func__, vd->name, ret); vpu_cmd_exit(vd); free_irq(vd->irq_num, vd); goto out; } out: return ret; } /* driver hw exit function */ int vpu_exit_drv_hw(void) { return 0; } /* device hw exit function */ int vpu_exit_dev_hw(struct platform_device *pdev, struct vpu_device *vd) { /* stall vpu to prevent iommu translation faults after free iovas */ __vpu_stall(vd); vpu_cmd_exit(vd); free_irq(vd->irq_num, vd); return 0; } int vpu_dev_boot_sequence(struct vpu_device *vd) { uint64_t start_t; int ret; struct vpu_register *r = vd_reg(vd); struct vpu_config *cfg = vd_cfg(vd); start_t = sched_clock(); /* No need to take vd->reg_lock, * 1. boot-up may take a while. * 2. Only one process can do boot sequence, since it'sprotected * by device lock vd->lock, */ vpu_trace_begin("vpu_%d|%s", vd->id, __func__); vpu_reg_write(vd, xtensa_altresetvec, vd->iova_reset.addr); vpu_reg_set(vd, ctrl, r->p_debug_enable | r->state_vector_select); vpu_reg_set(vd, ctrl, r->pbclk_enable); vpu_reg_clr(vd, ctrl, r->prid); vpu_reg_set(vd, ctrl, (vd->id << 1) & r->prid); vpu_reg_set(vd, sw_rst, r->ocdhaltonreset); vpu_reg_clr(vd, sw_rst, r->ocdhaltonreset); vpu_reg_set(vd, sw_rst, r->apu_b_rst | r->apu_d_rst); vpu_reg_clr(vd, sw_rst, r->apu_b_rst | r->apu_d_rst); /* pif gated disable, to prevent unknown propagate to bus */ vpu_reg_clr(vd, ctrl, r->pif_gated); /* axi control */ vpu_reg_set(vd, default0, r->awuser | r->aruser); vpu_reg_set(vd, default1, r->aruser_idma | r->awuser_idma); /* default set pre-ultra instead of ultra */ vpu_reg_set(vd, default0, r->qos_swap); /* jtag enable */ if (vd->jtag_enabled) { vpu_reg_set(vd, cg_clr, r->jtag_cg_clr); vpu_reg_set(vd, default2, r->dbg_en); } /* mbox interrupt mask */ if (vd_cfg(vd)->dmp_reg_cnt_mbox) vpu_reg_set(vd, mbox_inbox_mask, 0xffffffff); /* set log buffer */ vpu_reg_write(vd, xtensa_info19, vd->mva_iram); vpu_reg_write(vd, xtensa_info21, vd->iova_work.m.pa + cfg->log_ofs); vpu_reg_write(vd, xtensa_info22, vd->wb_log_size); vpu_run(vd, 0, 0); pr_info("%s: vpu%d: ALTRESETVEC: 0x%x\n", __func__, vd->id, vpu_reg_read(vd, xtensa_altresetvec)); ret = wait_command(vd, 0); vpu_stall(vd); if (ret == -ETIMEDOUT) goto err_timeout; if ((ret == -ERESTARTSYS) || (ret == -EAGAIN)) goto out; ret = vpu_cmd_result(vd, 0); err_timeout: if (ret) { vpu_excp_locked(vd, NULL, "VPU Timeout", "vpu%d: boot-up timeout\n", vd->id); } out: trace_vpu_cmd(vd->id, 0, "", 0, atomic_read(&vd->pw_boost), start_t, ret, 0, 0); vpu_trace_end("vpu_%d|%s", vd->id, __func__); return ret; } // vd->cmd_lock, should be acquired before calling this function int vpu_dev_set_debug(struct vpu_device *vd) { int ret; struct timespec64 now; unsigned int device_version = 0x0; unsigned long flags; uint64_t start_t; vpu_cmd_debug("%s: vpu%d\n", __func__, vd->id); start_t = sched_clock(); ktime_get_ts64(&now); /* SET_DEBUG */ ret = vpu_reg_lock(vd, true, &flags); if (ret) goto out; vpu_reg_write(vd, xtensa_info01, VPU_CMD_SET_DEBUG); vpu_reg_write(vd, xtensa_info23, now.tv_sec * 1000000 + now.tv_nsec / 1000); vpu_reg_write(vd, xtensa_info29, HOST_VERSION); vpu_trace_begin("vpu_%d|%s", vd->id, __func__); vpu_cmd(vd, 0, VPU_CMD_SET_DEBUG); vpu_reg_unlock(vd, &flags); vpu_cmd_debug("%s: vpu%d: iram: %x, log: %x, time: %x\n", __func__, vd->id, vpu_reg_read(vd, xtensa_info19), vpu_reg_read(vd, xtensa_info21), vpu_reg_read(vd, xtensa_info23)); vpu_cmd_debug("%s: vpu%d: timestamp: %.2lu:%.2lu:%.2lu:%.6lu\n", __func__, vd->id, (now.tv_sec / 3600) % (24), (now.tv_sec / 60) % (60), now.tv_sec % 60, now.tv_nsec / 1000); /* 3. wait until done */ ret = wait_command(vd, 0); vpu_stall(vd); vpu_trace_end("vpu_%d|%s", vd->id, __func__); if ((ret == -ERESTARTSYS) || (ret == -EAGAIN)) goto out; if (ret) goto err; /*3-additional. check vpu device/host version is matched or not*/ device_version = vpu_reg_read(vd, xtensa_info20); if ((int)device_version < (int)HOST_VERSION) { pr_info("%s: vpu%d: incompatible ftrace version: vd: %x, host: %x\n", __func__, vd->id, vpu_reg_read(vd, xtensa_info20), vpu_reg_read(vd, xtensa_info29)); vd->ftrace_avail = false; } else { vd->ftrace_avail = true; } err: if (ret) { vpu_excp_locked(vd, NULL, "VPU Timeout", "vpu%d: set debug (SET_DEBUG) timeout\n", vd->id); goto out; } /* 4. check the result */ ret = vpu_cmd_result(vd, 0); out: trace_vpu_cmd(vd->id, 0, "", VPU_CMD_SET_DEBUG, atomic_read(&vd->pw_boost), start_t, ret, 0, 0); if (ret) pr_info("%s: vpu%d: fail to set debug: %d\n", __func__, vd->id, ret); return ret; } // vd->cmd_lock, should be acquired before calling this function int vpu_hw_alg_init(struct vpu_algo_list *al, struct __vpu_algo *algo) { struct vpu_device *vd = al->vd; unsigned long flags; uint64_t start_t; int ret; start_t = sched_clock(); vpu_cmd_debug("%s: vpu%d: %s: mva/length (0x%lx/0x%x)\n", __func__, vd->id, algo->a.name, (unsigned long)algo->a.mva, algo->a.len); /* DO_LOADER */ ret = vpu_reg_lock(vd, false, &flags); if (ret) goto out; vd->state = VS_CMD_ALG; vpu_reg_write(vd, xtensa_info12, algo->a.mva); vpu_reg_write(vd, xtensa_info13, algo->a.len); vpu_reg_write(vd, xtensa_info15, 0); vpu_reg_write(vd, xtensa_info16, 0); vpu_trace_begin("vpu_%d|%s", vd->id, __func__); vpu_cmd(vd, 0, VPU_CMD_DO_LOADER); vpu_reg_unlock(vd, &flags); ret = wait_command(vd, 0); vpu_stall(vd); vpu_trace_end("vpu_%d|%s", vd->id, __func__); if ((ret == -ERESTARTSYS) || (ret == -EAGAIN)) goto out; if (ret) { vpu_excp(vd, NULL, "VPU Timeout", "vpu%d: load algo (DO_LOADER) timeout, algo: %s\n", vd->id, algo->a.name); goto out; } out: trace_vpu_cmd(vd->id, 0, algo->a.name, VPU_CMD_DO_LOADER, vpu_cmd_boost(vd, 0), start_t, ret, 0, 0); vpu_cmd_debug("%s: vpu%d: %s: %d\n", __func__, vd->id, algo->a.name, ret); return ret; } static int vpu_set_ftrace(struct vpu_device *vd) { int ret = 0; unsigned long flags; uint64_t start_t; start_t = sched_clock(); /* SET_FTRACE */ ret = vpu_reg_lock(vd, true, &flags); if (ret) goto out; vpu_reg_write(vd, xtensa_info05, (vpu_drv->met) ? 1 : 0); /* set vpu internal log level */ vpu_reg_write(vd, xtensa_info06, vpu_drv->ilog); /* clear info18 */ vpu_reg_write(vd, xtensa_info18, 0); vpu_cmd(vd, 0, VPU_CMD_SET_FTRACE_LOG); vpu_reg_unlock(vd, &flags); ret = wait_command(vd, 0); vpu_stall(vd); if ((ret == -ERESTARTSYS) || (ret == -EAGAIN)) goto out; /* Error handling */ if (ret) { vpu_excp_locked(vd, NULL, "VPU Timeout", "vpu%d: request (SET_FTRACE) timeout\n", vd->id); goto out; } ret = vpu_cmd_result(vd, 0); out: trace_vpu_cmd(vd->id, 0, "", VPU_CMD_SET_FTRACE_LOG, atomic_read(&vd->pw_boost), start_t, ret, 0, 0); if (ret) pr_info("%s: vpu%d: fail to set ftrace: %d\n", __func__, vd->id, ret); return ret; } struct vpu_sys_ops vpu_sops_mt68xx = { .xos_lock = vpu_xos_lock, .xos_unlock = vpu_xos_unlock, .xos_wait_idle = vpu_xos_wait_idle, .isr = vpu_isr, .isr_check_cmd = vpu_isr_check_cmd_xos, .isr_check_unlock = vpu_isr_check_unlock_xos, }; struct vpu_sys_ops vpu_sops_mt67xx = { .xos_lock = NULL, .xos_unlock = NULL, .xos_wait_idle = NULL, .isr = vpu_isr, .isr_check_cmd = vpu_isr_check_cmd, .isr_check_unlock = vpu_isr_check_unlock, }; struct vpu_bin_ops vpu_bops_legacy = { .header = bin_header_legacy, .alg_info = bin_alg_info_legacy, .alg_info_cnt = bin_alg_info_cnt_legacy, .pre_info = NULL, .pre_info_cnt = NULL, }; struct vpu_bin_ops vpu_bops_preload = { .header = bin_header_preload, .alg_info = bin_algo_info_preload, .alg_info_cnt = bin_alg_info_cnt_preload, .pre_info = bin_pre_info_preload, .pre_info_cnt = bin_pre_info_cnt_preload, };