kernel-brax3-ubuntu-touch/drivers/misc/mediatek/apusys/vpu/4.5/vpu_met.c
erascape f319b992b1 kernel-5.15: Initial import brax3 UT kernel
* halium configs enabled

Signed-off-by: erascape <erascape@proton.me>
2025-09-23 15:17:10 +00:00

618 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2020 MediaTek Inc.
*/
#include <linux/slab.h>
#include <linux/debugfs.h>
#include "vpu_met.h"
#include "vpu_reg.h"
#include "vpu_debug.h"
#include "vpu_trace.h"
#define CREATE_TRACE_POINTS
#include "met_vpusys_events.h"
#define vpu_trace_dump(fmt, args...) \
{ \
if (vpu_drv->met & VPU_MET_LEGACY) \
trace_printk("MET_DUMP|" fmt "\n", ##args); \
}
/* log format */
#define VPUBUF_MARKER (0xBEEF)
#define VPULOG_START_MARKER (0x55AA)
#define VPULOG_END_MARKER (0xAA55)
#define MX_LEN_STR_DESC (128)
#pragma pack(push)
#pragma pack(2)
struct vpu_met_header_t {
unsigned short start_mark;
unsigned char desc_len;
unsigned char action_id;
unsigned int sys_timer_h;
unsigned int sys_timer_l;
unsigned short sessid;
};
#pragma pack(pop)
struct vpu_met_log {
struct list_head list;
unsigned int buf_addr;
unsigned int buf_size;
void *buf;
};
static void vpu_met_packet(long long wclk, char action, int core,
int sessid, char *str_desc, int val)
{
vpu_pef_debug("%s: wclk: %lld, action: %c, core: %d, ssid: %d, val: %d, desc: %s\n",
__func__, wclk, action, core, sessid, val, str_desc);
trace___MET_PACKET__(wclk, action, core, sessid, str_desc, val);
}
static void __MET_PACKET__(int vpu_core, unsigned long long wclk,
unsigned char action_id, char *str_desc, unsigned int sessid)
{
char action = 'Z';
char null_str[] = "null";
char *__str_desc = str_desc;
int val = 0;
switch (action_id) {
/* For Sync Maker Begin/End */
case 0x01:
action = 'B';
break;
case 0x02:
action = 'E';
break;
/* For Counter Maker */
case 0x03:
action = 'C';
/* counter type: */
/* bit 0~11: string desc */
/* bit 12~15: count val */
val = *(unsigned int *)(str_desc + 12);
break;
/* for Async Marker Start/Finish */
case 0x04:
action = 'S';
break;
case 0x05:
action = 'F';
break;
}
if (str_desc[0] == '\0') {
/* null string handle */
__str_desc = null_str;
}
vpu_met_packet(wclk, action, vpu_core,
sessid + 0x8000 * vpu_core, __str_desc, val);
}
static void dump_buf(void *ptr, int leng)
{
int idx = 0;
unsigned short *usaddr;
for (idx = 0; idx < leng; idx += 16) {
usaddr = (unsigned short *)(ptr + idx);
vpu_trace_dump("%08x: %04x%04x %04x%04x %04x%04x %04x%04x,",
idx,
usaddr[0], usaddr[1], usaddr[2], usaddr[3],
usaddr[4], usaddr[5], usaddr[6], usaddr[7]
);
}
}
static int vpu_met_cpy(struct vpu_device *vd, unsigned int addr,
unsigned int size, void *ptr)
{
void *m = (void *)(((unsigned long)vd->dmem.m) + addr);
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_begin("vpu_%d|%s|%08x/%u->%p", vd->id, __func__,
addr, size, ptr);
memcpy_fromio(ptr, m, size);
/* notify VPU buffer copy is finished */
vpu_reg_write(vd, xtensa_info18, 0x00000000);
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_end("vpu_%d|%s|%08x/%u->%p", vd->id,
__func__, addr, size, ptr);
return 0;
}
static bool vpu_met_log_ready(void *buf, int buf_len)
{
if (VPUBUF_MARKER != *(unsigned short *)buf) {
/* front marker is invalid*/
vpu_trace_dump("Error front maker: %04x",
*(unsigned short *)buf);
return false;
}
if (VPUBUF_MARKER != *(unsigned short *)(buf + buf_len - 2)) {
vpu_trace_dump("Error end maker: %04x",
*(unsigned short *)(buf + buf_len - 2));
return false;
}
return true;
}
static bool vpu_met_log_valid(struct vpu_met_header_t *h)
{
if (h->start_mark != VPULOG_START_MARKER) {
vpu_trace_dump("Error h->start_mark: %02x", h->start_mark);
return false;
}
if (h->desc_len > 0x10) {
vpu_trace_dump("Error h->desc_len: %02x", h->desc_len);
return false;
}
return true;
}
static void vpu_met_log_show(struct vpu_device *vd, void *ptr, int buf_leng)
{
int idx = 0;
void *start_ptr = ptr;
int valid_len = 0;
struct vpu_met_header_t *h;
char trace_data[MX_LEN_STR_DESC];
int header_size = sizeof(struct vpu_met_header_t);
/* check buffer status */
if (!vpu_met_log_ready(start_ptr, buf_leng)) {
vpu_trace_dump("vpu_met_log_ready: false, %p %d",
ptr, buf_leng);
dump_buf(start_ptr, buf_leng);
return;
}
/* get valid length*/
valid_len = *(unsigned short *)(start_ptr + 2);
if (valid_len >= buf_leng) {
vpu_trace_dump("valid_len: %d large than buf_leng: %d",
valid_len, buf_leng);
return;
}
/*data offset start after Marker,Length*/
idx += 4;
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_begin("vpu_%d|%s|@%p/%d",
vd->id, __func__, ptr, buf_leng);
while (1) {
unsigned long long sys_t;
int packet_size = 0;
void *data_ptr;
if (idx >= valid_len)
break;
h = (struct vpu_met_header_t *)(start_ptr + idx);
data_ptr = (start_ptr + idx) + header_size;
if (!vpu_met_log_valid(h)) {
vpu_trace_dump("vpu_met_log_valid: false");
dump_buf(start_ptr, buf_leng);
break;
}
/*calculate packet size: header + data + end_mark*/
packet_size = header_size + h->desc_len + 2;
if (idx + packet_size > valid_len) {
vpu_trace_dump(
"error length (idx: %d, packet_size: %d)",
idx, packet_size);
vpu_trace_dump(
"out of bound: valid_len: %d", valid_len);
dump_buf(start_ptr, buf_leng);
break;
}
if (h->desc_len > MX_LEN_STR_DESC) {
vpu_trace_dump(
"h->desc_len(%d) > MX_LEN_STR_DESC(%d)",
h->desc_len, MX_LEN_STR_DESC);
dump_buf(start_ptr, buf_leng);
break;
}
memset(trace_data, 0x00, MX_LEN_STR_DESC);
if (h->desc_len > 0) {
/*copy data buffer*/
memcpy(trace_data, data_ptr, h->desc_len);
}
sys_t = h->sys_timer_h;
sys_t = (sys_t << 32) + (h->sys_timer_l & 0xFFFFFFFF);
__MET_PACKET__(
vd->id,
sys_t,
h->action_id,
trace_data,
h->sessid
);
idx += packet_size;
}
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_end("vpu_%d|%s|@%p/%d",
vd->id, __func__, ptr, buf_leng);
}
static void vpu_met_wq(struct work_struct *work)
{
unsigned long flags;
struct vpu_met_work *w =
container_of(work, struct vpu_met_work, work);
struct vpu_device *vd = container_of(w, struct vpu_device, met);
struct vpu_met_log *mlog, *tmp;
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_begin("vpu_%d|%s", vd->id, __func__);
restart:
spin_lock_irqsave(&w->lock, flags);
if (list_empty(&w->list)) {
mlog = NULL;
} else {
list_for_each_entry_safe(mlog, tmp, &w->list, list) {
list_del(&mlog->list);
break;
}
}
spin_unlock_irqrestore(&w->lock, flags);
if (!mlog)
goto out;
vpu_trace_dump("%s %d addr/size/buf: %08x/%08x/%p",
__func__, __LINE__, mlog->buf_addr,
mlog->buf_size, mlog->buf);
vpu_met_log_show(vd, mlog->buf, mlog->buf_size);
kfree(mlog);
goto restart;
out:
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_end("vpu_%d|%s", vd->id, __func__);
}
static void vpu_met_log_dump(struct vpu_device *vd)
{
char *ptr;
unsigned long flags;
unsigned int apmcu_log_buf_ofst;
unsigned int log_buf_addr = 0x0;
unsigned int log_buf_size = 0x0;
struct vpu_met_log *mlog;
/* handle output log */
log_buf_addr = vpu_reg_read(vd, xtensa_info05);
log_buf_size = vpu_reg_read(vd, xtensa_info06);
/* translate vpu address to apmcu */
apmcu_log_buf_ofst = log_buf_addr & 0x000fffff;
/* in ISR we need use ATOMIC flag to alloc memory */
ptr = kmalloc(sizeof(struct vpu_met_log) + log_buf_size,
GFP_ATOMIC);
if (!ptr) {
pr_info("%s: met log alloc fail: %zu, %d\n",
__func__, sizeof(struct vpu_met_log),
log_buf_size);
return;
}
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_begin("vpu_%d|%s", vd->id, __func__);
/* fill vpu_log reader's information */
mlog = (struct vpu_met_log *)ptr;
mlog->buf_addr = log_buf_addr;
mlog->buf_size = log_buf_size;
mlog->buf = (void *)(ptr + sizeof(struct vpu_met_log));
vpu_met_debug("%s: vpu%d: addr/size/buf: %08x/%08x/%p\n",
__func__, vd->id,
log_buf_addr,
log_buf_size,
mlog->buf);
/* clone buffer in isr*/
vpu_met_cpy(vd,
apmcu_log_buf_ofst, log_buf_size, mlog->buf);
spin_lock_irqsave(&vd->met.lock, flags);
list_add_tail(&(mlog->list), &vd->met.list);
spin_unlock_irqrestore(&vd->met.lock, flags);
/* dump log to ftrace on BottomHalf */
schedule_work(&vd->met.work);
if (vpu_debug_on(VPU_DBG_MET))
vpu_trace_end("vpu_%d|%s", vd->id, __func__);
}
void vpu_met_isr(struct vpu_device *vd)
{
int dump = 0;
if (!vpu_drv->met)
return;
/* INFO18 was used to dump MET Log */
dump = vpu_reg_read(vd, xtensa_info18);
/* dispatch interrupt by INFO18 */
switch (dump) {
case 0:
break;
case VPU_REQ_DO_DUMP_LOG:
vpu_met_log_dump(vd);
break;
case VPU_REQ_DO_CLOSED_FILE:
break;
default:
pr_info("%s: vpu%d: unsupported cmd: %d\n",
__func__, vd->id, dump);
break;
}
}
#define PM_CTRL_SEL(sel, mask) \
((PERF_PMCTRL_TRACELEVEL) | \
((sel) << PERF_PMCTRL_SELECT_SHIFT) | \
((mask) << PERF_PMCTRL_MASK_SHIFT))
static uint32_t pm_sel[VPU_MET_PM_MAX] = {
PM_CTRL_SEL(XTPERF_CNT_INSN, XTPERF_MASK_INSN_ALL),
PM_CTRL_SEL(XTPERF_CNT_IDMA, XTPERF_MASK_IDMA_ACTIVE_CYCLES),
PM_CTRL_SEL(XTPERF_CNT_D_STALL, XTPERF_MASK_D_STALL_UNCACHED_LOAD),
PM_CTRL_SEL(XTPERF_CNT_I_STALL, XTPERF_MASK_I_STALL_CACHE_MISS),
0,
0,
0,
0
};
#define PMG_EN 0x1000
#define PM_COUNTER 0x1080
#define PM_CTRL 0x1100
#define PM_STAT 0x1180
static inline
unsigned long vpu_dbg_base(struct vpu_device *vd)
{
return (unsigned long)vd->dbg.m;
}
static inline
uint32_t vpu_dbg_read(struct vpu_device *vd, int offset)
{
return ioread32((void *) (vpu_dbg_base(vd) + offset));
}
static inline
void vpu_dbg_write(struct vpu_device *vd, int offset, uint32_t val)
{
iowrite32(val, (void *) (vpu_dbg_base(vd) + offset));
}
static inline
void vpu_dbg_clr(struct vpu_device *vd, int offset, uint32_t mask)
{
vpu_dbg_write(vd, offset, vpu_dbg_read(vd, offset) & ~mask);
}
static inline
void vpu_dbg_set(struct vpu_device *vd, int offset, uint32_t mask)
{
vpu_dbg_write(vd, offset, vpu_dbg_read(vd, offset) | mask);
}
#define VPU_MET_PM_LATENCY_NS (1000000)
#define VPU_MET_PM_LATENCY_MIN (50000)
static int vpu_met_pm_hrt_start(void)
{
if (vpu_drv->met_hrt.latency < VPU_MET_PM_LATENCY_MIN)
return 0;
hrtimer_start(&vpu_drv->met_hrt.t,
ns_to_ktime(vpu_drv->met_hrt.latency),
HRTIMER_MODE_REL);
vpu_met_debug("%s:\n", __func__);
return 0;
}
static int vpu_met_pm_hrt_stop(int sync)
{
int ret = 0;
if (sync)
hrtimer_cancel(&vpu_drv->met_hrt.t);
else
ret = hrtimer_try_to_cancel(&vpu_drv->met_hrt.t);
vpu_met_debug("%s: sync: %d, ret: %d\n", __func__, sync, ret);
return ret;
}
void vpu_met_pm_get(struct vpu_device *vd)
{
int i;
unsigned long flags;
uint32_t offset;
if (!vpu_drv->met)
return;
/* read register and send to met */
spin_lock_irqsave(&vpu_drv->met_hrt.lock, flags);
for (i = 0; i < VPU_MET_PM_MAX; i++) {
if (!pm_sel[i])
continue;
offset = i * 4;
vpu_dbg_write(vd, PM_CTRL + offset, pm_sel[i]);
vpu_dbg_write(vd, PM_COUNTER + offset, 0);
vpu_dbg_write(vd, PM_STAT + offset, 0);
vd->pm.val[i] = 0;
}
vpu_dbg_set(vd, PMG_EN, 0x1);
if (kref_get_unless_zero(&vpu_drv->met_hrt.ref)) {
vpu_met_debug("%s: vpu%d: ref: %d\n",
__func__, vd->id, kref_read(&vpu_drv->met_hrt.ref));
goto out;
}
kref_init(&vpu_drv->met_hrt.ref);
vpu_met_debug("%s: vpu%d: ref: 1\n", __func__, vd->id);
vpu_met_pm_hrt_start();
out:
spin_unlock_irqrestore(&vpu_drv->met_hrt.lock, flags);
}
static void vpu_met_pm_release(struct kref *ref)
{
vpu_met_pm_hrt_stop(1 /* sync */);
}
void vpu_met_pm_put(struct vpu_device *vd)
{
if (!vpu_drv->met)
return;
if (!kref_read(&vpu_drv->met_hrt.ref)) {
vpu_met_debug("%s: vpu%d: ref is already zero\n",
__func__, vd->id);
return;
}
vpu_met_debug("%s: vpu%d: ref: %d--\n",
__func__, vd->id, kref_read(&vpu_drv->met_hrt.ref));
kref_put(&vpu_drv->met_hrt.ref, vpu_met_pm_release);
}
static void vpu_met_pm_dbg_read(struct vpu_device *vd)
{
int i;
uint32_t offset;
uint32_t tmp[VPU_MET_PM_MAX];
uint32_t df[VPU_MET_PM_MAX];
bool dump = false;
for (i = 0; i < VPU_MET_PM_MAX; i++) {
df[i] = 0;
if (!pm_sel[i])
continue;
offset = i * 4;
tmp[i] = vpu_dbg_read(vd, PM_COUNTER + offset);
if (tmp[i] != vd->pm.val[i]) {
dump = true;
df[i] = tmp[i] - vd->pm.val[i];
vd->pm.val[i] = tmp[i];
}
}
if (vpu_drv->met & VPU_MET_LEGACY)
trace_VPU__polling(vd->id, df[0], df[1], df[2], df[3]);
if (dump && (vpu_drv->met & VPU_MET_COMPACT))
trace_VPU__pm(vd->id, vd->pm.val);
}
static enum hrtimer_restart vpu_met_pm_hrt_func(struct hrtimer *timer)
{
struct vpu_device *vd;
unsigned long flags;
struct list_head *ptr, *tmp;
/* for all vpu cores, dump their registers */
spin_lock_irqsave(&vpu_drv->met_hrt.lock, flags);
list_for_each_safe(ptr, tmp, &vpu_drv->devs) {
vd = list_entry(ptr, struct vpu_device, list);
if (vd->state > VS_BOOT && vd->state < VS_REMOVING)
vpu_met_pm_dbg_read(vd);
}
hrtimer_forward_now(&vpu_drv->met_hrt.t,
ns_to_ktime(VPU_MET_PM_LATENCY_NS));
spin_unlock_irqrestore(&vpu_drv->met_hrt.lock, flags);
return HRTIMER_RESTART;
}
int vpu_init_drv_met(void)
{
struct dentry *droot = vpu_drv->droot;
struct dentry *dpm;
int i;
spin_lock_init(&vpu_drv->met_hrt.lock);
hrtimer_init(&vpu_drv->met_hrt.t,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vpu_drv->met_hrt.t.function = vpu_met_pm_hrt_func;
refcount_set(&vpu_drv->met_hrt.ref.refcount, 0);
vpu_drv->ilog = 0;
vpu_drv->met = VPU_MET_DISABLED;
vpu_drv->met_hrt.latency = VPU_MET_PM_LATENCY_NS;
if (!droot)
goto out;
debugfs_create_u32("ilog", 0660, droot, &vpu_drv->ilog);
debugfs_create_u32("met", 0660, droot, &vpu_drv->met);
dpm = debugfs_create_dir("met_pm", droot);
if (IS_ERR_OR_NULL(dpm))
goto out;
for (i = 0; i < VPU_MET_PM_MAX; i++) {
char name[32];
if (snprintf(name, sizeof(name), "ctrl%d", i) < 0) {
name[0] = '\0';
vpu_met_debug("%s: snprintf fail\n", __func__);
}
debugfs_create_u32(name, 0660, dpm, &pm_sel[i]);
}
debugfs_create_u64("met_pm_latency", 0660, droot,
&vpu_drv->met_hrt.latency);
out:
return 0;
}
int vpu_exit_drv_met(void)
{
vpu_met_pm_hrt_stop(1 /* sync */);
return 0;
}
int vpu_init_dev_met(struct platform_device *pdev,
struct vpu_device *vd)
{
memset(&vd->met, 0, sizeof(struct vpu_met_work));
spin_lock_init(&vd->met.lock);
INIT_LIST_HEAD(&vd->met.list);
INIT_WORK(&vd->met.work, vpu_met_wq);
return 0;
}
void vpu_exit_dev_met(struct platform_device *pdev,
struct vpu_device *vd)
{
if (vd->met.work.func)
cancel_work_sync(&vd->met.work);
}