MLK-17257-1: drm: imx: dcss: Add WRSCL/RDSRC modules
authorLaurentiu Palcu <laurentiu.palcu@nxp.com>
Tue, 19 Dec 2017 11:33:32 +0000 (13:33 +0200)
committerLeonard Crestez <leonard.crestez@nxp.com>
Wed, 17 Apr 2019 23:51:34 +0000 (02:51 +0300)
WRSCL and RDSRC modules will be needed when downscaling ratios starting
from 3:1 up to 7:1 are needed. Otherwise, if the usual scaling path is
used, the DRAM bandwidth needed will be too much and performance will be
affected.

Signed-off-by: Laurentiu Palcu <laurentiu.palcu@nxp.com>
drivers/gpu/imx/dcss/Makefile
drivers/gpu/imx/dcss/dcss-prv.h
drivers/gpu/imx/dcss/dcss-rdsrc.c [new file with mode: 0644]
drivers/gpu/imx/dcss/dcss-wrscl.c [new file with mode: 0644]

index 822aa7b..70d05d3 100644 (file)
@@ -1,6 +1,6 @@
 obj-$(CONFIG_IMX_DCSS_CORE) += imx-dcss-core.o
 
 imx-dcss-core-objs := dcss-common.o dcss-blkctl.o dcss-ctxld.o \
-                     dcss-dpr.o dcss-dtg.o dcss-ss.o \
-                     dcss-hdr10.o dcss-scaler.o \
-                     dcss-dtrc.o dcss-dec400d.o
+                     dcss-dpr.o dcss-dtg.o dcss-ss.o dcss-hdr10.o \
+                     dcss-scaler.o dcss-dtrc.o dcss-dec400d.o dcss-wrscl.o \
+                     dcss-rdsrc.o
index d2b739d..f0c4baf 100644 (file)
@@ -45,6 +45,8 @@ struct dcss_soc {
        struct dcss_scaler_priv *scaler_priv;
        struct dcss_dtrc_priv *dtrc_priv;
        struct dcss_dec400d_priv *dec400d_priv;
+       struct dcss_wrscl_priv *wrscl_priv;
+       struct dcss_rdsrc_priv *rdsrc_priv;
 
        struct clk *apb_clk;
        struct clk *axi_clk;
@@ -107,6 +109,54 @@ void dcss_dtrc_exit(struct dcss_soc *dcss);
 int dcss_dec400d_init(struct dcss_soc *dcss, unsigned long dec400d_base);
 void dcss_dec400d_exit(struct dcss_soc *dcss);
 
+/* enums common to both WRSCL and RDSRC */
+enum dcss_wrscl_rdsrc_psize {
+       PSIZE_64,
+       PSIZE_128,
+       PSIZE_256,
+       PSIZE_512,
+       PSIZE_1024,
+       PSIZE_2048,
+       PSIZE_4096,
+};
+
+enum dcss_wrscl_rdsrc_tsize {
+       TSIZE_64,
+       TSIZE_128,
+       TSIZE_256,
+       TSIZE_512,
+};
+
+enum dcss_wrscl_rdsrc_fifo_size {
+       FIFO_512,
+       FIFO_1024,
+       FIFO_2048,
+       FIFO_4096,
+};
+
+enum dcss_wrscl_rdsrc_bpp {
+       BPP_38, /* 38 bit unpacked components */
+       BPP_32_UPCONVERT,
+       BPP_32_10BIT_OUTPUT,
+       BPP_20, /* 10-bit YUV422 */
+       BPP_16, /* 8-bit YUV422 */
+};
+
+/* WRSCL */
+int dcss_wrscl_init(struct dcss_soc *dcss, unsigned long wrscl_base);
+void dcss_wrscl_exit(struct dcss_soc *dcss);
+u32 dcss_wrscl_setup(struct dcss_soc *dcss, u32 pix_format, u32 pix_clk_hz,
+                    u32 dst_xres, u32 dst_yres);
+void dcss_wrscl_enable(struct dcss_soc *dcss, bool en);
+
+/* RDSRC */
+int dcss_rdsrc_init(struct dcss_soc *dcss, unsigned long rdsrc_base);
+void dcss_rdsrc_exit(struct dcss_soc *dcss);
+void dcss_rdsrc_setup(struct dcss_soc *dcss, u32 pix_format, u32 dst_xres,
+                     u32 dst_yres, u32 base_addr);
+void dcss_rdsrc_enable(struct dcss_soc *dcss, bool en);
+
+/* debug: dump registers routines */
 void dcss_blkctl_dump_regs(struct seq_file *s, void *data);
 void dcss_dtrc_dump_regs(struct seq_file *s, void *data);
 void dcss_dpr_dump_regs(struct seq_file *s, void *data);
@@ -115,4 +165,7 @@ void dcss_ss_dump_regs(struct seq_file *s, void *data);
 void dcss_scaler_dump_regs(struct seq_file *s, void *data);
 void dcss_ctxld_dump_regs(struct seq_file *s, void *data);
 void dcss_hdr10_dump_regs(struct seq_file *s, void *data);
+void dcss_wrscl_dump_regs(struct seq_file *s, void *data);
+void dcss_rdsrc_dump_regs(struct seq_file *s, void *data);
+
 #endif /* __DCSS_PRV_H__ */
diff --git a/drivers/gpu/imx/dcss/dcss-rdsrc.c b/drivers/gpu/imx/dcss/dcss-rdsrc.c
new file mode 100644 (file)
index 0000000..d011e33
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+
+#include <video/imx-dcss.h>
+#include "dcss-prv.h"
+
+#define USE_CTXLD
+
+#define DCSS_RDSRC_CTRL_STATUS                 0x00
+#define   RDSRC_RD_ERR                         BIT(31)
+#define   RDSRC_FRAME_COMP                     BIT(30)
+#define   RDSRC_FIFO_SIZE_POS                  16
+#define   RDSRC_FIFO_SIZE_MASK                 GENMASK(22, 16)
+#define   RDSRC_RD_ERR_EN                      BIT(15)
+#define   RDSRC_FRAME_COMP_EN                  BIT(14)
+#define   RDSRC_P_SIZE_POS                     7
+#define   RDSRC_P_SIZE_MASK                    GENMASK(9, 7)
+#define   RDSRC_T_SIZE_POS                     5
+#define   RDSRC_T_SIZE_MASK                    GENMASK(6, 5)
+#define   RDSRC_BPP_POS                                2
+#define   RDSRC_BPP_MASK                       GENMASK(4, 2)
+#define   RDSRC_ENABLE                         BIT(0)
+#define DCSS_RDSRC_BASE_ADDR                   0x10
+#define DCSS_RDSRC_PITCH                       0x14
+#define DCSS_RDSRC_WIDTH                       0x18
+#define DCSS_RDSRC_HEIGHT                      0x1C
+
+struct dcss_rdsrc_priv {
+       void __iomem *base_reg;
+       u32 base_ofs;
+       struct dcss_soc *dcss;
+
+       u32 ctx_id;
+
+       u32 buf_addr;
+
+       u32 ctrl_status;
+};
+
+#ifdef CONFIG_DEBUG_FS
+static struct dcss_debug_reg rdsrc_debug_reg[] = {
+       DCSS_DBG_REG(DCSS_RDSRC_CTRL_STATUS),
+       DCSS_DBG_REG(DCSS_RDSRC_BASE_ADDR),
+       DCSS_DBG_REG(DCSS_RDSRC_PITCH),
+       DCSS_DBG_REG(DCSS_RDSRC_WIDTH),
+       DCSS_DBG_REG(DCSS_RDSRC_HEIGHT),
+};
+
+void dcss_rdsrc_dump_regs(struct seq_file *s, void *data)
+{
+       struct dcss_soc *dcss = data;
+       int i;
+
+       seq_puts(s, ">> Dumping RD_SRC:\n");
+       for (i = 0; i < ARRAY_SIZE(rdsrc_debug_reg); i++) {
+               seq_printf(s, "%-35s(0x%04x) -> 0x%08x\n",
+                          rdsrc_debug_reg[i].name,
+                          rdsrc_debug_reg[i].ofs,
+                          dcss_readl(dcss->rdsrc_priv->base_reg +
+                                     rdsrc_debug_reg[i].ofs));
+       }
+}
+#endif
+
+static void dcss_rdsrc_write(struct dcss_rdsrc_priv *rdsrc, u32 val, u32 ofs)
+{
+#if !defined(USE_CTXLD)
+       dcss_writel(val, rdsrc->base_reg + ofs);
+#else
+       dcss_ctxld_write(rdsrc->dcss, rdsrc->ctx_id,
+                        val, rdsrc->base_ofs + ofs);
+#endif
+}
+
+int dcss_rdsrc_init(struct dcss_soc *dcss, unsigned long rdsrc_base)
+{
+       struct dcss_rdsrc_priv *rdsrc;
+
+       rdsrc = devm_kzalloc(dcss->dev, sizeof(*rdsrc), GFP_KERNEL);
+       if (!rdsrc)
+               return -ENOMEM;
+
+       rdsrc->base_reg = devm_ioremap(dcss->dev, rdsrc_base, SZ_4K);
+       if (!rdsrc->base_reg) {
+               dev_err(dcss->dev, "rdsrc: unable to remap base\n");
+               return -ENOMEM;
+       }
+
+       dcss->rdsrc_priv = rdsrc;
+       rdsrc->base_ofs = rdsrc_base;
+       rdsrc->dcss = dcss;
+
+#if defined(USE_CTXLD)
+       rdsrc->ctx_id = CTX_SB_HP;
+#endif
+
+       return 0;
+}
+
+void dcss_rdsrc_exit(struct dcss_soc *dcss)
+{
+}
+
+void dcss_rdsrc_setup(struct dcss_soc *dcss, u32 pix_format, u32 dst_xres,
+                     u32 dst_yres, u32 base_addr)
+{
+       struct dcss_rdsrc_priv *rdsrc = dcss->rdsrc_priv;
+       u32 buf_size, pitch, bpp;
+
+       /* since the scaler output is YUV444, the RDSRC output has to match */
+       bpp = 4;
+
+       rdsrc->ctrl_status = FIFO_512 << RDSRC_FIFO_SIZE_POS;
+       rdsrc->ctrl_status |= PSIZE_256 << RDSRC_P_SIZE_POS;
+       rdsrc->ctrl_status |= TSIZE_256 << RDSRC_T_SIZE_POS;
+       rdsrc->ctrl_status |= BPP_32_10BIT_OUTPUT << RDSRC_BPP_POS;
+
+       buf_size = dst_xres * dst_yres * bpp;
+       pitch = dst_xres * bpp;
+
+       rdsrc->buf_addr = base_addr;
+
+       dcss_rdsrc_write(rdsrc, rdsrc->buf_addr, DCSS_RDSRC_BASE_ADDR);
+       dcss_rdsrc_write(rdsrc, pitch, DCSS_RDSRC_PITCH);
+       dcss_rdsrc_write(rdsrc, dst_xres, DCSS_RDSRC_WIDTH);
+       dcss_rdsrc_write(rdsrc, dst_yres, DCSS_RDSRC_HEIGHT);
+}
+
+void dcss_rdsrc_enable(struct dcss_soc *dcss, bool en)
+{
+       struct dcss_rdsrc_priv *rdsrc = dcss->rdsrc_priv;
+
+       /* RDSRC is turned off by setting the width and height to 0 */
+       if (!en) {
+               dcss_rdsrc_write(rdsrc, 0, DCSS_RDSRC_WIDTH);
+               dcss_rdsrc_write(rdsrc, 0, DCSS_RDSRC_HEIGHT);
+       }
+
+       dcss_rdsrc_write(rdsrc, rdsrc->ctrl_status, DCSS_RDSRC_CTRL_STATUS);
+}
diff --git a/drivers/gpu/imx/dcss/dcss-wrscl.c b/drivers/gpu/imx/dcss/dcss-wrscl.c
new file mode 100644 (file)
index 0000000..8a28597
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+
+#include <video/imx-dcss.h>
+#include "dcss-prv.h"
+
+#define USE_CTXLD
+
+#define DCSS_WRSCL_CTRL_STATUS                 0x00
+#define   WRSCL_ERR                            BIT(31)
+#define   WRSCL_ERR_EN                         BIT(30)
+#define   WRSCL_FRAME_COMP                     BIT(29)
+#define   WRSCL_FRAME_COMP_EN                  BIT(28)
+#define   WRSCL_FIFO_SIZE_POS                  18
+#define   WRSCL_FIFO_SIZE_MASK                 GENMAK(24, 18)
+#define   WRSCL_P_FREQ_POS                     10
+#define   WRSCL_P_FREQ_MASK                    GENMASK(17, 10)
+#define   WRSCL_P_SIZE_POS                     7
+#define   WRSCL_P_SIZE_MASK                    GENMASK(9, 7)
+#define   WRSCL_T_SIZE_POS                     5
+#define   WRSCL_T_SIZE_MASK                    GENMASK(6, 5)
+#define   WRSCL_BPP_POS                                2
+#define   WRSCL_BPP_MASK                       GENMASK(4, 2)
+#define   WRSCL_REPEAT                         BIT(1)
+#define   WRSCL_ENABLE                         BIT(0)
+#define DCSS_WRSCL_BASE_ADDR                   0x10
+#define DCSS_WRSCL_PITCH                       0x14
+
+struct dcss_wrscl_priv {
+       void __iomem *base_reg;
+       u32 base_ofs;
+       struct dcss_soc *dcss;
+
+       u32 ctx_id;
+
+       u32 buf_size;
+       u32 buf_addr;
+       void *buf_vaddr;
+
+       u32 ctrl_status;
+};
+
+#ifdef CONFIG_DEBUG_FS
+static struct dcss_debug_reg wrscl_debug_reg[] = {
+       DCSS_DBG_REG(DCSS_WRSCL_CTRL_STATUS),
+       DCSS_DBG_REG(DCSS_WRSCL_BASE_ADDR),
+       DCSS_DBG_REG(DCSS_WRSCL_PITCH),
+};
+
+void dcss_wrscl_dump_regs(struct seq_file *s, void *data)
+{
+       struct dcss_soc *dcss = data;
+       int i;
+
+       seq_puts(s, ">> Dumping WR_SCL:\n");
+       for (i = 0; i < ARRAY_SIZE(wrscl_debug_reg); i++) {
+               seq_printf(s, "%-35s(0x%04x) -> 0x%08x\n",
+                          wrscl_debug_reg[i].name,
+                          wrscl_debug_reg[i].ofs,
+                          dcss_readl(dcss->wrscl_priv->base_reg +
+                                     wrscl_debug_reg[i].ofs));
+       }
+}
+#endif
+
+static void dcss_wrscl_write(struct dcss_wrscl_priv *wrscl, u32 val, u32 ofs)
+{
+#if !defined(USE_CTXLD)
+       dcss_writel(val, wrscl->base_reg + ofs);
+#else
+       dcss_ctxld_write(wrscl->dcss, wrscl->ctx_id,
+                        val, wrscl->base_ofs + ofs);
+#endif
+}
+
+int dcss_wrscl_init(struct dcss_soc *dcss, unsigned long wrscl_base)
+{
+       struct dcss_wrscl_priv *wrscl;
+
+       wrscl = devm_kzalloc(dcss->dev, sizeof(*wrscl), GFP_KERNEL);
+       if (!wrscl)
+               return -ENOMEM;
+
+       wrscl->base_reg = devm_ioremap(dcss->dev, wrscl_base, SZ_4K);
+       if (!wrscl->base_reg) {
+               dev_err(dcss->dev, "wrscl: unable to remap base\n");
+               return -ENOMEM;
+       }
+
+       dcss->wrscl_priv = wrscl;
+       wrscl->base_ofs = wrscl_base;
+       wrscl->dcss = dcss;
+
+#if defined(USE_CTXLD)
+       wrscl->ctx_id = CTX_SB_HP;
+#endif
+
+       return 0;
+}
+
+void dcss_wrscl_exit(struct dcss_soc *dcss)
+{
+}
+
+static const u16 dcss_wrscl_psize_map[] = {64, 128, 256, 512, 1024, 2048, 4096};
+
+u32 dcss_wrscl_setup(struct dcss_soc *dcss, u32 pix_format, u32 vrefresh_hz,
+                    u32 dst_xres, u32 dst_yres)
+{
+       struct dcss_wrscl_priv *wrscl = dcss->wrscl_priv;
+       u32 pitch, p_size, p_freq, bpp;
+       dma_addr_t dma_handle;
+       u32 b_clk = clk_get_rate(dcss->axi_clk);
+
+       /* we'd better release the old buffer */
+       if (wrscl->buf_addr)
+               dmam_free_coherent(dcss->dev, wrscl->buf_size,
+                                  wrscl->buf_vaddr, wrscl->buf_addr);
+
+       p_size = PSIZE_256;
+
+       /* scaler output is YUV444 */
+       bpp = 4;
+
+       /* spread the load over the entire frame */
+       p_freq = ((u64)b_clk * dcss_wrscl_psize_map[p_size]) /
+                ((u64)dst_xres * dst_yres * vrefresh_hz * bpp * 8);
+
+       /* choose a slightly smaller p_freq */
+       p_freq = p_freq - 3 > 255 ? 255 : p_freq - 3;
+
+       wrscl->ctrl_status = FIFO_512 << WRSCL_FIFO_SIZE_POS;
+       wrscl->ctrl_status |= p_size << WRSCL_P_SIZE_POS;
+       wrscl->ctrl_status |= TSIZE_256 << WRSCL_T_SIZE_POS;
+       wrscl->ctrl_status |= BPP_32_10BIT_OUTPUT << WRSCL_BPP_POS;
+       wrscl->ctrl_status |= p_freq << WRSCL_P_FREQ_POS;
+
+       wrscl->buf_size = dst_xres * dst_yres * bpp;
+       pitch = dst_xres * bpp;
+
+       wrscl->buf_vaddr = dmam_alloc_coherent(dcss->dev, wrscl->buf_size,
+                                   &dma_handle, GFP_KERNEL);
+       if (!wrscl->buf_vaddr) {
+               dev_err(dcss->dev, "wrscl: cannot alloc buf mem\n");
+               return 0;
+       }
+
+       wrscl->buf_addr = dma_handle;
+
+       dcss_wrscl_write(wrscl, wrscl->buf_addr, DCSS_WRSCL_BASE_ADDR);
+       dcss_wrscl_write(wrscl, pitch, DCSS_WRSCL_PITCH);
+
+       return wrscl->buf_addr;
+}
+
+void dcss_wrscl_enable(struct dcss_soc *dcss, bool en)
+{
+       struct dcss_wrscl_priv *wrscl = dcss->wrscl_priv;
+
+       if (en)
+               wrscl->ctrl_status |= WRSCL_ENABLE | WRSCL_REPEAT;
+       else
+               wrscl->ctrl_status &= ~(WRSCL_ENABLE | WRSCL_REPEAT);
+
+       dcss_wrscl_write(wrscl, wrscl->ctrl_status, DCSS_WRSCL_CTRL_STATUS);
+
+       if (!en && wrscl->buf_addr) {
+               dmam_free_coherent(dcss->dev, wrscl->buf_size,
+                                  wrscl->buf_vaddr, wrscl->buf_addr);
+               wrscl->buf_addr = 0;
+       }
+}
+