#include <media/v4l2-h264.h>
#include <media/v4l2-mem2mem.h>
#include "rkvdec.h"
#include "rkvdec-cabac.h"
#include "rkvdec-rcb.h"
#include "rkvdec-h264-common.h"
#include "rkvdec-vdpu381-regs.h"
struct rkvdec_sps {
u16 seq_parameter_set_id: 4;
u16 profile_idc: 8;
u16 constraint_set3_flag: 1;
u16 chroma_format_idc: 2;
u16 bit_depth_luma: 3;
u16 bit_depth_chroma: 3;
u16 qpprime_y_zero_transform_bypass_flag: 1;
u16 log2_max_frame_num_minus4: 4;
u16 max_num_ref_frames: 5;
u16 pic_order_cnt_type: 2;
u16 log2_max_pic_order_cnt_lsb_minus4: 4;
u16 delta_pic_order_always_zero_flag: 1;
u16 pic_width_in_mbs: 12;
u16 pic_height_in_mbs: 12;
u16 frame_mbs_only_flag: 1;
u16 mb_adaptive_frame_field_flag: 1;
u16 direct_8x8_inference_flag: 1;
u16 mvc_extension_enable: 1;
u16 num_views: 2;
u16 reserved_bits: 12;
u16 reserved[11];
} __packed;
struct rkvdec_pps {
u16 pic_parameter_set_id: 8;
u16 pps_seq_parameter_set_id: 5;
u16 entropy_coding_mode_flag: 1;
u16 bottom_field_pic_order_in_frame_present_flag: 1;
u16 num_ref_idx_l0_default_active_minus1: 5;
u16 num_ref_idx_l1_default_active_minus1: 5;
u16 weighted_pred_flag: 1;
u16 weighted_bipred_idc: 2;
u16 pic_init_qp_minus26: 7;
u16 pic_init_qs_minus26: 6;
u16 chroma_qp_index_offset: 5;
u16 deblocking_filter_control_present_flag: 1;
u16 constrained_intra_pred_flag: 1;
u16 redundant_pic_cnt_present: 1;
u16 transform_8x8_mode_flag: 1;
u16 second_chroma_qp_index_offset: 5;
u16 scaling_list_enable_flag: 1;
u32 scaling_list_address;
u16 is_longterm;
u8 reserved[3];
} __packed;
struct rkvdec_sps_pps {
struct rkvdec_sps sps;
struct rkvdec_pps pps;
} __packed;
struct rkvdec_h264_priv_tbl {
s8 cabac_table[4][464][2];
struct rkvdec_h264_scaling_list scaling_list;
struct rkvdec_sps_pps param_set[256];
struct rkvdec_rps rps;
};
struct rkvdec_h264_ctx {
struct rkvdec_aux_buf priv_tbl;
struct rkvdec_h264_reflists reflists;
struct rkvdec_vdpu381_regs_h264 regs;
};
static void assemble_hw_pps(struct rkvdec_ctx *ctx,
struct rkvdec_h264_run *run)
{
struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
const struct v4l2_ctrl_h264_sps *sps = run->sps;
const struct v4l2_ctrl_h264_pps *pps = run->pps;
const struct v4l2_ctrl_h264_decode_params *dec_params = run->decode_params;
const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb;
struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu;
struct rkvdec_sps_pps *hw_ps;
dma_addr_t scaling_list_address;
u32 scaling_distance;
u32 i;
hw_ps = &priv_tbl->param_set[pps->pic_parameter_set_id];
memset(hw_ps, 0, sizeof(*hw_ps));
hw_ps->sps.seq_parameter_set_id = sps->seq_parameter_set_id;
hw_ps->sps.profile_idc = sps->profile_idc;
hw_ps->sps.constraint_set3_flag = !!(sps->constraint_set_flags & (1 << 3));
hw_ps->sps.chroma_format_idc = sps->chroma_format_idc;
hw_ps->sps.bit_depth_luma = sps->bit_depth_luma_minus8;
hw_ps->sps.bit_depth_chroma = sps->bit_depth_chroma_minus8;
hw_ps->sps.qpprime_y_zero_transform_bypass_flag =
!!(sps->flags & V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS);
hw_ps->sps.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
hw_ps->sps.max_num_ref_frames = sps->max_num_ref_frames;
hw_ps->sps.pic_order_cnt_type = sps->pic_order_cnt_type;
hw_ps->sps.log2_max_pic_order_cnt_lsb_minus4 =
sps->log2_max_pic_order_cnt_lsb_minus4;
hw_ps->sps.delta_pic_order_always_zero_flag =
!!(sps->flags & V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO);
hw_ps->sps.mvc_extension_enable = 1;
hw_ps->sps.num_views = 1;
hw_ps->sps.pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
hw_ps->sps.pic_height_in_mbs = sps->pic_height_in_map_units_minus1 + 1;
hw_ps->sps.frame_mbs_only_flag =
!!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY);
hw_ps->sps.mb_adaptive_frame_field_flag =
!!(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
hw_ps->sps.direct_8x8_inference_flag =
!!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE);
hw_ps->pps.pic_parameter_set_id = pps->pic_parameter_set_id;
hw_ps->pps.pps_seq_parameter_set_id = pps->seq_parameter_set_id;
hw_ps->pps.entropy_coding_mode_flag =
!!(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE);
hw_ps->pps.bottom_field_pic_order_in_frame_present_flag =
!!(pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT);
hw_ps->pps.num_ref_idx_l0_default_active_minus1 =
pps->num_ref_idx_l0_default_active_minus1;
hw_ps->pps.num_ref_idx_l1_default_active_minus1 =
pps->num_ref_idx_l1_default_active_minus1;
hw_ps->pps.weighted_pred_flag =
!!(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED);
hw_ps->pps.weighted_bipred_idc = pps->weighted_bipred_idc;
hw_ps->pps.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
hw_ps->pps.pic_init_qs_minus26 = pps->pic_init_qs_minus26;
hw_ps->pps.chroma_qp_index_offset = pps->chroma_qp_index_offset;
hw_ps->pps.deblocking_filter_control_present_flag =
!!(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT);
hw_ps->pps.constrained_intra_pred_flag =
!!(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED);
hw_ps->pps.redundant_pic_cnt_present =
!!(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT);
hw_ps->pps.transform_8x8_mode_flag =
!!(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE);
hw_ps->pps.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
hw_ps->pps.scaling_list_enable_flag =
!!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT);
scaling_distance = offsetof(struct rkvdec_h264_priv_tbl, scaling_list);
scaling_list_address = h264_ctx->priv_tbl.dma + scaling_distance;
hw_ps->pps.scaling_list_address = scaling_list_address;
for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
hw_ps->pps.is_longterm |= (1 << i);
}
}
static void rkvdec_write_regs(struct rkvdec_ctx *ctx)
{
struct rkvdec_dev *rkvdec = ctx->dev;
struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
rkvdec_memcpy_toio(rkvdec->regs + OFFSET_COMMON_REGS,
&h264_ctx->regs.common,
sizeof(h264_ctx->regs.common));
rkvdec_memcpy_toio(rkvdec->regs + OFFSET_CODEC_PARAMS_REGS,
&h264_ctx->regs.h264_param,
sizeof(h264_ctx->regs.h264_param));
rkvdec_memcpy_toio(rkvdec->regs + OFFSET_COMMON_ADDR_REGS,
&h264_ctx->regs.common_addr,
sizeof(h264_ctx->regs.common_addr));
rkvdec_memcpy_toio(rkvdec->regs + OFFSET_CODEC_ADDR_REGS,
&h264_ctx->regs.h264_addr,
sizeof(h264_ctx->regs.h264_addr));
rkvdec_memcpy_toio(rkvdec->regs + OFFSET_POC_HIGHBIT_REGS,
&h264_ctx->regs.h264_highpoc,
sizeof(h264_ctx->regs.h264_highpoc));
}
static void config_registers(struct rkvdec_ctx *ctx,
struct rkvdec_h264_run *run)
{
const struct v4l2_ctrl_h264_decode_params *dec_params = run->decode_params;
const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb;
struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
dma_addr_t priv_start_addr = h264_ctx->priv_tbl.dma;
const struct v4l2_pix_format_mplane *dst_fmt;
struct vb2_v4l2_buffer *src_buf = run->base.bufs.src;
struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst;
struct rkvdec_vdpu381_regs_h264 *regs = &h264_ctx->regs;
const struct v4l2_format *f;
dma_addr_t rlc_addr;
dma_addr_t dst_addr;
u32 hor_virstride;
u32 ver_virstride;
u32 y_virstride;
u32 offset;
u32 pixels;
u32 i;
memset(regs, 0, sizeof(*regs));
regs->common.reg009_dec_mode.dec_mode = VDPU381_MODE_H264;
regs->common.reg011_important_en.buf_empty_en = 1;
regs->common.reg011_important_en.dec_clkgate_e = 1;
regs->common.reg011_important_en.dec_timeout_e = 1;
regs->common.reg011_important_en.pix_range_det_e = 1;
regs->common.reg012_secondary_en.scanlist_addr_valid_en = 1;
regs->common.reg013_en_mode_set.cur_pic_is_idr =
!!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC);
regs->common.reg016_stream_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
regs->common.reg017_slice_number.slice_num = MAX_SLICE_NUMBER;
f = &ctx->decoded_fmt;
dst_fmt = &f->fmt.pix_mp;
hor_virstride = dst_fmt->plane_fmt[0].bytesperline;
ver_virstride = dst_fmt->height;
y_virstride = hor_virstride * ver_virstride;
regs->common.reg018_y_hor_stride.y_hor_virstride = hor_virstride / 16;
regs->common.reg019_uv_hor_stride.uv_hor_virstride = hor_virstride / 16;
regs->common.reg020_y_stride.y_virstride = y_virstride / 16;
regs->common.reg026_block_gating_en.inter_auto_gating_e = 1;
regs->common.reg026_block_gating_en.filterd_auto_gating_e = 1;
regs->common.reg026_block_gating_en.strmd_auto_gating_e = 1;
regs->common.reg026_block_gating_en.mcp_auto_gating_e = 1;
regs->common.reg026_block_gating_en.busifd_auto_gating_e = 0;
regs->common.reg026_block_gating_en.dec_ctrl_auto_gating_e = 1;
regs->common.reg026_block_gating_en.intra_auto_gating_e = 1;
regs->common.reg026_block_gating_en.mc_auto_gating_e = 1;
regs->common.reg026_block_gating_en.transd_auto_gating_e = 1;
regs->common.reg026_block_gating_en.sram_auto_gating_e = 1;
regs->common.reg026_block_gating_en.cru_auto_gating_e = 1;
regs->common.reg026_block_gating_en.reg_cfg_gating_en = 1;
pixels = dst_fmt->height * dst_fmt->width;
if (pixels < RKVDEC_1080P_PIXELS)
regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_1080p;
else if (pixels < RKVDEC_4K_PIXELS)
regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_4K;
else if (pixels < RKVDEC_8K_PIXELS)
regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_8K;
else
regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_MAX;
regs->h264_param.reg065_cur_top_poc = dec_params->top_field_order_cnt;
regs->h264_param.reg066_cur_bot_poc = dec_params->bottom_field_order_cnt;
for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
struct vb2_buffer *vb_buf = run->ref_buf[i];
dma_addr_t buf_dma;
if (!vb_buf)
vb_buf = &dst_buf->vb2_buf;
buf_dma = vb2_dma_contig_plane_dma_addr(vb_buf, 0);
regs->h264_addr.reg164_180_ref_base[i] = buf_dma;
regs->h264_addr.reg182_198_colmv_base[i] = buf_dma + ctx->colmv_offset;
struct rkvdec_vdpu381_h264_ref_info *ref_info =
®s->h264_param.reg099_102_ref_info_regs[i / 4].ref_info[i % 4];
ref_info->ref_field =
!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD);
ref_info->ref_colmv_use_flag =
!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE);
ref_info->ref_topfield_used =
!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF);
ref_info->ref_botfield_used =
!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF);
regs->h264_param.reg067_098_ref_poc[i * 2] =
dpb[i].top_field_order_cnt;
regs->h264_param.reg067_098_ref_poc[i * 2 + 1] =
dpb[i].bottom_field_order_cnt;
}
rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
regs->common_addr.rlc_base = rlc_addr;
regs->common_addr.rlcwrite_base = rlc_addr;
dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
regs->common_addr.decout_base = dst_addr;
regs->common_addr.error_ref_base = dst_addr;
regs->common_addr.colmv_cur_base = dst_addr + ctx->colmv_offset;
for (i = 0; i < rkvdec_rcb_buf_count(ctx); i++)
regs->common_addr.rcb_base[i] = rkvdec_rcb_buf_dma_addr(ctx, i);
offset = offsetof(struct rkvdec_h264_priv_tbl, param_set);
regs->h264_addr.reg161_pps_base = priv_start_addr + offset;
offset = offsetof(struct rkvdec_h264_priv_tbl, rps);
regs->h264_addr.reg163_rps_base = priv_start_addr + offset;
offset = offsetof(struct rkvdec_h264_priv_tbl, cabac_table);
regs->h264_addr.reg199_cabactbl_base = priv_start_addr + offset;
offset = offsetof(struct rkvdec_h264_priv_tbl, scaling_list);
regs->h264_addr.reg181_scanlist_addr = priv_start_addr + offset;
rkvdec_write_regs(ctx);
}
static int rkvdec_h264_start(struct rkvdec_ctx *ctx)
{
struct rkvdec_dev *rkvdec = ctx->dev;
struct rkvdec_h264_priv_tbl *priv_tbl;
struct rkvdec_h264_ctx *h264_ctx;
struct v4l2_ctrl *ctrl;
int ret;
ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
V4L2_CID_STATELESS_H264_SPS);
if (!ctrl)
return -EINVAL;
ret = rkvdec_h264_validate_sps(ctx, ctrl->p_new.p_h264_sps);
if (ret)
return ret;
h264_ctx = kzalloc_obj(*h264_ctx);
if (!h264_ctx)
return -ENOMEM;
priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl),
&h264_ctx->priv_tbl.dma, GFP_KERNEL);
if (!priv_tbl) {
ret = -ENOMEM;
goto err_free_ctx;
}
h264_ctx->priv_tbl.size = sizeof(*priv_tbl);
h264_ctx->priv_tbl.cpu = priv_tbl;
memcpy(priv_tbl->cabac_table, rkvdec_h264_cabac_table,
sizeof(rkvdec_h264_cabac_table));
ctx->priv = h264_ctx;
return 0;
err_free_ctx:
kfree(h264_ctx);
return ret;
}
static void rkvdec_h264_stop(struct rkvdec_ctx *ctx)
{
struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
struct rkvdec_dev *rkvdec = ctx->dev;
dma_free_coherent(rkvdec->dev, h264_ctx->priv_tbl.size,
h264_ctx->priv_tbl.cpu, h264_ctx->priv_tbl.dma);
kfree(h264_ctx);
}
static int rkvdec_h264_run(struct rkvdec_ctx *ctx)
{
struct v4l2_h264_reflist_builder reflist_builder;
struct rkvdec_dev *rkvdec = ctx->dev;
struct rkvdec_h264_ctx *h264_ctx = ctx->priv;
struct rkvdec_h264_priv_tbl *tbl = h264_ctx->priv_tbl.cpu;
struct rkvdec_h264_run run;
rkvdec_h264_run_preamble(ctx, &run);
v4l2_h264_init_reflist_builder(&reflist_builder, run.decode_params,
run.sps, run.decode_params->dpb);
v4l2_h264_build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
v4l2_h264_build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
h264_ctx->reflists.b1);
assemble_hw_scaling_list(&run, &tbl->scaling_list);
assemble_hw_pps(ctx, &run);
lookup_ref_buf_idx(ctx, &run);
assemble_hw_rps(&reflist_builder, &run, &h264_ctx->reflists, &tbl->rps);
config_registers(ctx, &run);
rkvdec_run_postamble(ctx, &run.base);
rkvdec_schedule_watchdog(rkvdec, h264_ctx->regs.common.reg032_timeout_threshold);
writel(VDPU381_DEC_E_BIT, rkvdec->regs + VDPU381_REG_DEC_E);
return 0;
}
static int rkvdec_h264_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl)
{
if (ctrl->id == V4L2_CID_STATELESS_H264_SPS)
return rkvdec_h264_validate_sps(ctx, ctrl->p_new.p_h264_sps);
return 0;
}
const struct rkvdec_coded_fmt_ops rkvdec_vdpu381_h264_fmt_ops = {
.adjust_fmt = rkvdec_h264_adjust_fmt,
.get_image_fmt = rkvdec_h264_get_image_fmt,
.start = rkvdec_h264_start,
.stop = rkvdec_h264_stop,
.run = rkvdec_h264_run,
.try_ctrl = rkvdec_h264_try_ctrl,
};