drm/msm: Allow a5xx to mark the RPTR shadow as privileged

author Jordan Crouse <jcrouse@codeaurora.org>

Mon, 14 Sep 2020 22:40:21 +0000 (16:40 -0600)

committer Rob Clark <robdclark@chromium.org>

Tue, 15 Sep 2020 17:47:44 +0000 (10:47 -0700)
author Jordan Crouse <jcrouse@codeaurora.org>
Mon, 14 Sep 2020 22:40:21 +0000 (16:40 -0600)
committer Rob Clark <robdclark@chromium.org>
Tue, 15 Sep 2020 17:47:44 +0000 (10:47 -0700)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c

index 616d9e7..835aaef 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -18,13 +18,24 @@ static void a5xx_dump(struct msm_gpu *gpu);
  
  #define GPU_PAS_ID 13
  
-static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+               bool sync)
  {
         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
         uint32_t wptr;
         unsigned long flags;
  
+       /*
+        * Most flush operations need to issue a WHERE_AM_I opcode to sync up
+        * the rptr shadow
+        */
+       if (a5xx_gpu->has_whereami && sync) {
+               OUT_PKT7(ring, CP_WHERE_AM_I, 2);
+               OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
+               OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
+       }
+
         spin_lock_irqsave(&ring->lock, flags);
  
         /* Copy the shadow to the actual register */
@@ -90,7 +101,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit
                 }
         }
  
-       a5xx_flush(gpu, ring);
+       a5xx_flush(gpu, ring, true);
         a5xx_preempt_trigger(gpu);
  
         /* we might not necessarily have a cmd from userspace to
@@ -204,7 +215,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
         /* Set bit 0 to trigger an interrupt on preempt complete */
         OUT_RING(ring, 0x01);
  
-       a5xx_flush(gpu, ring);
+       /* A WHERE_AM_I packet is not needed after a YIELD */
+       a5xx_flush(gpu, ring, false);
  
         /* Check to see if we need to start preemption */
         a5xx_preempt_trigger(gpu);
@@ -363,7 +375,7 @@ static int a5xx_me_init(struct msm_gpu *gpu)
         OUT_RING(ring, 0x00000000);
         OUT_RING(ring, 0x00000000);
  
-       gpu->funcs->flush(gpu, ring);
+       a5xx_flush(gpu, ring, true);
         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
  }
  
@@ -405,11 +417,31 @@ static int a5xx_preempt_start(struct msm_gpu *gpu)
         OUT_RING(ring, 0x01);
         OUT_RING(ring, 0x01);
  
-       gpu->funcs->flush(gpu, ring);
+       /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
+       a5xx_flush(gpu, ring, false);
  
         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
  }
  
+static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
+               struct drm_gem_object *obj)
+{
+       u32 *buf = msm_gem_get_vaddr_active(obj);
+
+       if (IS_ERR(buf))
+               return;
+
+       /*
+        * If the lowest nibble is 0xa that is an indication that this microcode
+        * has been patched. The actual version is in dword [3] but we only care
+        * about the patchlevel which is the lowest nibble of dword [3]
+        */
+       if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
+               a5xx_gpu->has_whereami = true;
+
+       msm_gem_put_vaddr(obj);
+}
+
  static int a5xx_ucode_init(struct msm_gpu *gpu)
  {
         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -445,6 +477,7 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
                 }
  
                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
+               a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
         }
  
         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
@@ -504,6 +537,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu)
  static int a5xx_hw_init(struct msm_gpu *gpu)
  {
         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+       struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
         int ret;
  
         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
@@ -712,9 +746,36 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
         gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
                 gpu->rb[0]->iova);
  
+       /*
+        * If the microcode supports the WHERE_AM_I opcode then we can use that
+        * in lieu of the RPTR shadow and enable preemption. Otherwise, we
+        * can't safely use the RPTR shadow or preemption. In either case, the
+        * RPTR shadow should be disabled in hardware.
+        */
         gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
  
+       /* Disable preemption if WHERE_AM_I isn't available */
+       if (!a5xx_gpu->has_whereami && gpu->nr_rings > 1) {
+               a5xx_preempt_fini(gpu);
+               gpu->nr_rings = 1;
+       } else {
+               /* Create a privileged buffer for the RPTR shadow */
+               if (!a5xx_gpu->shadow_bo) {
+                       a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
+                               sizeof(u32) * gpu->nr_rings,
+                               MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
+                               gpu->aspace, &a5xx_gpu->shadow_bo,
+                               &a5xx_gpu->shadow_iova);
+
+                       if (IS_ERR(a5xx_gpu->shadow))
+                               return PTR_ERR(a5xx_gpu->shadow);
+               }
+
+               gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
+                       REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
+       }
+
         a5xx_preempt_hw_init(gpu);
  
         /* Disable the interrupts through the initial bringup stage */
@@ -738,7 +799,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
                 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
  
-               gpu->funcs->flush(gpu, gpu->rb[0]);
+               a5xx_flush(gpu, gpu->rb[0], true);
                 if (!a5xx_idle(gpu, gpu->rb[0]))
                         return -EINVAL;
         }
@@ -756,7 +817,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
                 OUT_RING(gpu->rb[0], 0x00000000);
  
-               gpu->funcs->flush(gpu, gpu->rb[0]);
+               a5xx_flush(gpu, gpu->rb[0], true);
                 if (!a5xx_idle(gpu, gpu->rb[0]))
                         return -EINVAL;
         } else if (ret == -ENODEV) {
@@ -823,6 +884,11 @@ static void a5xx_destroy(struct msm_gpu *gpu)
                 drm_gem_object_put(a5xx_gpu->gpmu_bo);
         }
  
+       if (a5xx_gpu->shadow_bo) {
+               msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
+               drm_gem_object_put(a5xx_gpu->shadow_bo);
+       }
+
         adreno_gpu_cleanup(adreno_gpu);
         kfree(a5xx_gpu);
  }
@@ -1430,6 +1496,17 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
         return (unsigned long)busy_time;
  }
  
+static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+       struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+       if (a5xx_gpu->has_whereami)
+               return a5xx_gpu->shadow[ring->id];
+
+       return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
+}
+
  static const struct adreno_gpu_funcs funcs = {
         .base = {
                 .get_param = adreno_get_param,
@@ -1438,7 +1515,6 @@ static const struct adreno_gpu_funcs funcs = {
                 .pm_resume = a5xx_pm_resume,
                 .recover = a5xx_recover,
                 .submit = a5xx_submit,
-               .flush = a5xx_flush,
                 .active_ring = a5xx_active_ring,
                 .irq = a5xx_irq,
                 .destroy = a5xx_destroy,
@@ -1452,6 +1528,7 @@ static const struct adreno_gpu_funcs funcs = {
                 .gpu_state_get = a5xx_gpu_state_get,
                 .gpu_state_put = a5xx_gpu_state_put,
                 .create_address_space = adreno_iommu_create_address_space,
+               .get_rptr = a5xx_get_rptr,
         },
         .get_timestamp = a5xx_get_timestamp,
  };
@@ -1516,8 +1593,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
  
         check_speed_bin(&pdev->dev);
  
-       /* Restricting nr_rings to 1 to temporarily disable preemption */
-       ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
+       ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
         if (ret) {
                 a5xx_destroy(&(a5xx_gpu->base.base));
                 return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h

index 1e5b1a1..c7187bc 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -37,6 +37,13 @@ struct a5xx_gpu {
  
         atomic_t preempt_state;
         struct timer_list preempt_timer;
+
+       struct drm_gem_object *shadow_bo;
+       uint64_t shadow_iova;
+       uint32_t *shadow;
+
+       /* True if the microcode supports the WHERE_AM_I opcode */
+       bool has_whereami;
  };
  
  #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
@@ -141,6 +148,9 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs,
         return -ETIMEDOUT;
  }
  
+#define shadowptr(a5xx_gpu, ring) ((a5xx_gpu)->shadow_iova + \
+               ((ring)->id * sizeof(uint32_t)))
+
  bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
  void a5xx_set_hwcg(struct msm_gpu *gpu, bool state);
  
@@ -150,6 +160,8 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu);
  void a5xx_preempt_irq(struct msm_gpu *gpu);
  void a5xx_preempt_fini(struct msm_gpu *gpu);
  
+void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, bool sync);
+
  /* Return true if we are in a preempt state */
  static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu)
  {
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c

index 321a806..f176a6f 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a5xx_power.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c
@@ -240,7 +240,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu)
         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
         OUT_RING(ring, 1);
  
-       gpu->funcs->flush(gpu, ring);
+       a5xx_flush(gpu, ring, true);
  
         if (!a5xx_idle(gpu, ring)) {
                 DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n",
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c

index 9f3fe17..7e04509 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -259,8 +259,9 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
         ptr->magic = A5XX_PREEMPT_RECORD_MAGIC;
         ptr->info = 0;
         ptr->data = 0;
-       ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT;
-       ptr->rptr_addr = rbmemptr(ring, rptr);
+       ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE;
+
+       ptr->rptr_addr = shadowptr(a5xx_gpu, ring);
         ptr->counter = counters_iova;
  
         return 0;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c

index a833dd0..11635e3 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -422,6 +422,11 @@ int adreno_hw_init(struct msm_gpu *gpu)
  static uint32_t get_rptr(struct adreno_gpu *adreno_gpu,
                 struct msm_ringbuffer *ring)
  {
+       struct msm_gpu *gpu = &adreno_gpu->base;
+
+       if (gpu->funcs->get_rptr)
+               return gpu->funcs->get_rptr(gpu, ring);
+
         return ring->memptrs->rptr = adreno_gpu_read(
                 adreno_gpu, REG_ADRENO_CP_RB_RPTR);
  }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h

index 3931eec..59bb8c1 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
@@ -298,6 +298,7 @@ enum adreno_pm4_type3_packets {
         CP_SET_BIN_DATA5_OFFSET = 46,
         CP_SET_CTXSWITCH_IB = 85,
         CP_REG_WRITE = 109,
+       CP_WHERE_AM_I = 98,
  };
  
  enum adreno_state_block {
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h

index 5ee358b..6c9e1fd 100644 (file)
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -68,6 +68,7 @@ struct msm_gpu_funcs {
                 (struct msm_gpu *gpu, struct platform_device *pdev);
         struct msm_gem_address_space *(*create_private_address_space)
                 (struct msm_gpu *gpu);
+       uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
  };
  
  struct msm_gpu {
author	Jordan Crouse <jcrouse@codeaurora.org>
	Mon, 14 Sep 2020 22:40:21 +0000 (16:40 -0600)
committer	Rob Clark <robdclark@chromium.org>
	Tue, 15 Sep 2020 17:47:44 +0000 (10:47 -0700)
drivers/gpu/drm/msm/adreno/a5xx_gpu.c		patch \| blob \| history
drivers/gpu/drm/msm/adreno/a5xx_gpu.h		patch \| blob \| history
drivers/gpu/drm/msm/adreno/a5xx_power.c		patch \| blob \| history
drivers/gpu/drm/msm/adreno/a5xx_preempt.c		patch \| blob \| history
drivers/gpu/drm/msm/adreno/adreno_gpu.c		patch \| blob \| history
drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h		patch \| blob \| history
drivers/gpu/drm/msm/msm_gpu.h		patch \| blob \| history