habanalabs: flush only at the end of the map/unmap
authorPawel Piskorski <ppiskorski@habana.ai>
Fri, 6 Dec 2019 15:32:38 +0000 (17:32 +0200)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 24 Mar 2020 08:54:16 +0000 (10:54 +0200)
Optimize hl_mmu_map and hl_mmu_unmap by not calling flush(ctx)
within per-page loop.

Signed-off-by: Pawel Piskorski <ppiskorski@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/memory.c
drivers/misc/habanalabs/mmu.c

index b8a8de2..3c67948 100644 (file)
@@ -4776,7 +4776,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 
        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
                rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
-                               prop->dram_base_address + off, PAGE_SIZE_2MB);
+                               prop->dram_base_address + off, PAGE_SIZE_2MB,
+                               (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
                if (rc) {
                        dev_err(hdev->dev, "Map failed for address 0x%llx\n",
                                prop->dram_base_address + off);
@@ -4786,7 +4787,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 
        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
                rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
-                       hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
+                       hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
 
                if (rc) {
                        dev_err(hdev->dev,
@@ -4799,7 +4800,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
                        rc = hl_mmu_map(hdev->kernel_ctx,
                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
                                hdev->cpu_accessible_dma_address + cpu_off,
-                               PAGE_SIZE_4KB);
+                               PAGE_SIZE_4KB, true);
                        if (rc) {
                                dev_err(hdev->dev,
                                        "Map failed for CPU accessible memory\n");
@@ -4825,14 +4826,15 @@ unmap_cpu:
        for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
                if (hl_mmu_unmap(hdev->kernel_ctx,
                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
-                               PAGE_SIZE_4KB))
+                               PAGE_SIZE_4KB, true))
                        dev_warn_ratelimited(hdev->dev,
                                "failed to unmap address 0x%llx\n",
                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
 unmap:
        for (; off >= 0 ; off -= PAGE_SIZE_2MB)
                if (hl_mmu_unmap(hdev->kernel_ctx,
-                               prop->dram_base_address + off, PAGE_SIZE_2MB))
+                               prop->dram_base_address + off, PAGE_SIZE_2MB,
+                               true))
                        dev_warn_ratelimited(hdev->dev,
                                "failed to unmap address 0x%llx\n",
                                prop->dram_base_address + off);
@@ -4857,14 +4859,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
 
        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
                if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
-                               PAGE_SIZE_2MB))
+                               PAGE_SIZE_2MB, true))
                        dev_warn(hdev->dev,
                                "Failed to unmap CPU accessible memory\n");
        } else {
                for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
                        if (hl_mmu_unmap(hdev->kernel_ctx,
                                        VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
-                                       PAGE_SIZE_4KB))
+                                       PAGE_SIZE_4KB,
+                                       (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
                                dev_warn_ratelimited(hdev->dev,
                                        "failed to unmap address 0x%llx\n",
                                        VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
@@ -4872,7 +4875,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
 
        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
                if (hl_mmu_unmap(hdev->kernel_ctx,
-                               prop->dram_base_address + off, PAGE_SIZE_2MB))
+                               prop->dram_base_address + off, PAGE_SIZE_2MB,
+                               (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
                        dev_warn_ratelimited(hdev->dev,
                                        "Failed to unmap address 0x%llx\n",
                                        prop->dram_base_address + off);
index 00c949f..df34227 100644 (file)
@@ -1573,8 +1573,10 @@ int hl_mmu_init(struct hl_device *hdev);
 void hl_mmu_fini(struct hl_device *hdev);
 int hl_mmu_ctx_init(struct hl_ctx *ctx);
 void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+               u32 page_size, bool flush_pte);
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+               bool flush_pte);
 void hl_mmu_swap_out(struct hl_ctx *ctx);
 void hl_mmu_swap_in(struct hl_ctx *ctx);
 
index 6c72cb4..b612b1a 100644 (file)
@@ -747,7 +747,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
        for (i = 0 ; i < phys_pg_pack->npages ; i++) {
                paddr = phys_pg_pack->pages[i];
 
-               rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
+               rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+                               (i + 1) == phys_pg_pack->npages);
                if (rc) {
                        dev_err(hdev->dev,
                                "map failed for handle %u, npages: %llu, mapped: %llu",
@@ -765,7 +766,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 err:
        next_vaddr = vaddr;
        for (i = 0 ; i < mapped_pg_cnt ; i++) {
-               if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+                                       (i + 1) == mapped_pg_cnt))
                        dev_warn_ratelimited(hdev->dev,
                                "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
                                        phys_pg_pack->handle, next_vaddr,
@@ -794,7 +796,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
        next_vaddr = vaddr;
 
        for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
-               if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+                                      (i + 1) == phys_pg_pack->npages))
                        dev_warn_ratelimited(hdev->dev,
                        "unmap failed for vaddr: 0x%llx\n", next_vaddr);
 
index 6262b26..006eee4 100644 (file)
@@ -637,29 +637,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
                        clear_hop3 = true;
 
                if (!clear_hop3)
-                       goto flush;
+                       goto mapped;
 
                clear_pte(ctx, hop3_pte_addr);
 
                if (put_pte(ctx, hop3_addr))
-                       goto flush;
+                       goto mapped;
 
                clear_pte(ctx, hop2_pte_addr);
 
                if (put_pte(ctx, hop2_addr))
-                       goto flush;
+                       goto mapped;
 
                clear_pte(ctx, hop1_pte_addr);
 
                if (put_pte(ctx, hop1_addr))
-                       goto flush;
+                       goto mapped;
 
                clear_pte(ctx, hop0_pte_addr);
        }
 
-flush:
-       flush(ctx);
-
+mapped:
        return 0;
 
 not_mapped:
@@ -675,6 +673,7 @@ not_mapped:
  * @ctx: pointer to the context structure
  * @virt_addr: virt addr to map from
  * @page_size: size of the page to unmap
+ * @flush_pte: whether to do a PCI flush
  *
  * This function does the following:
  * - Check that the virt addr is mapped
@@ -685,15 +684,19 @@ not_mapped:
  * changes the MMU hash, it must be protected by a lock.
  * However, because it maps only a single page, the lock should be implemented
  * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after unmapping of
+ * large area.
  */
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+               bool flush_pte)
 {
        struct hl_device *hdev = ctx->hdev;
        struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct hl_mmu_properties *mmu_prop;
        u64 real_virt_addr;
        u32 real_page_size, npages;
-       int i, rc;
+       int i, rc = 0;
        bool is_dram_addr;
 
        if (!hdev->mmu_enable)
@@ -729,12 +732,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
        for (i = 0 ; i < npages ; i++) {
                rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
                if (rc)
-                       return rc;
+                       break;
 
                real_virt_addr += real_page_size;
        }
 
-       return 0;
+       if (flush_pte)
+               flush(ctx);
+
+       return rc;
 }
 
 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
@@ -885,8 +891,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
                get_pte(ctx, hop3_addr);
        }
 
-       flush(ctx);
-
        return 0;
 
 err:
@@ -909,6 +913,7 @@ err:
  * @virt_addr: virt addr to map from
  * @phys_addr: phys addr to map to
  * @page_size: physical page size
+ * @flush_pte: whether to do a PCI flush
  *
  * This function does the following:
  * - Check that the virt addr is not mapped
@@ -919,8 +924,12 @@ err:
  * changes the MMU hash, it must be protected by a lock.
  * However, because it maps only a single page, the lock should be implemented
  * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after mapping of
+ * large area.
  */
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
+               bool flush_pte)
 {
        struct hl_device *hdev = ctx->hdev;
        struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -976,6 +985,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
                mapped_cnt++;
        }
 
+       if (flush_pte)
+               flush(ctx);
+
        return 0;
 
 err:
@@ -988,6 +1000,8 @@ err:
                real_virt_addr += real_page_size;
        }
 
+       flush(ctx);
+
        return rc;
 }