Rate This Document
Findability
Accuracy
Completeness
Readability

Running and Verification

Run and verify TVM after it is compiled and installed.

To verify the performance improvement brought by integrating BiSheng Compiler and applying the softmax operator optimization patch in the open source TVM, this section provides a softmax operator test script to compare the running performance of the softmax operators compiled in the open source TVM 0.9.0 environment and in the TVM environment set in this document based on the new Kunpeng 920 processor model. Therefore, to complete the performance comparison, prepare an open source TVM 0.9.0 environment in addition to the TVM environment described in this document. For details, see the TVM official document.

  1. Create the /test directory in the /home directory and compile the softmax test script test_softmax.py in the directory.
    1. Create the test_softmax.py file.
      1
      2
      3
      4
      cd /home
      mkdir test
      cd test
      vi test_softmax.py
      
    2. Press i to enter the insert mode and add the following content to the test_softmax.py file:
       1
       2
       3
       4
       5
       6
       7
       8
       9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      import pytest
      import tvm
      from tvm import relay
      import numpy as np
      from tvm.contrib import graph_executor
      from tvm.contrib.debugger import debug_executor
      import os
      import argparse
      os.environ["TVM_BIND_THREADS"] = "0"
      def test_softmax(opt_level="3", repeat_time=50):
          # Define a larger and more complex shape and data for the softmax input
          shape = (100, 120, 50)  # Increase dimensions for greater complexity
          data = np.random.rand(*shape).astype(np.float32)  # Use random values for variability
          # Create a Relay expression for the softmax function
          data_var = relay.var("data", shape=shape, dtype="float32")
          softmax_expr = relay.nn.softmax(data_var, axis=-1)
          # Create a Relay function and module
          func = relay.Function([data_var], softmax_expr)
          mod = tvm.IRModule.from_expr(func)
          target = tvm.target.Target("llvm -mtriple=aarch64-linux-gnu -mattr=+neon")
          # Create and build the module
          with tvm.transform.PassContext(opt_level=int(3)):
              lib = relay.build(mod, target=target)
              try:
                  print(lib.function_metadata["tvmgen_default_fused_nn_softmax"])
              except:
                  print(lib.function_metadata["tvmgen_default_fused_nn_fast_softmax"])
          model_path = "/home/test/test_softmax_%s.so" % (opt_level)
          lib.export_library(model_path)
          lib = tvm.runtime.load_module(model_path)
          # Create TVM runtime
          dev = tvm.cpu()
          data = tvm.nd.array(data, device=dev)
          m = graph_executor.create(lib["get_graph_json"](), lib, dev)
          # Set input data
          m.set_input("data", data)
          # Run the module and get the output
          m.run()
          output = m.get_output(0).numpy()
          
          # Create debug executor (ensure `lib` and `dev` are correctly passed)
          m_debug = debug_executor.create(
              lib["get_graph_json"](), 
              lib, 
              dev, 
              dump_root="./tvmdbg_onnx"
          )
          
          m.set_input("data", data)
          results = [0] * repeat_time
          ops_sums = [0] * repeat_time
          for i in range(repeat_time):
              report = m_debug.profile(**{"data": data})
              r_t = report.table().split()
              for ind, tmp in enumerate(r_t):
                  if tmp.endswith("softmax"):
                      time_cons, time_port = r_t[ind + 1], r_t[ind + 2]
                      time_cons = time_cons.replace(",", "")
                      ops_sums[i] = float(time_cons)
                      
                  if tmp == "Total":
                      # Total save time (total inference duration).
                      total_time = r_t[ind + 1]
                      total_time = total_time.replace(",", "")
                      results[i] = float(total_time)
          print("Average time consuming in softmaxOp is %f ms" % (sum(ops_sums) / repeat_time / 1000))
          print("Average time consuming in profiling is %f ms" % (sum(results) / repeat_time / 1000))
          # Validate the output
          data = data.numpy()
          expected_output = np.exp(data) / np.sum(np.exp(data), axis=-1, keepdims=True)
          np.testing.assert_allclose(output, expected_output, atol=1e-5)
      if __name__ == "__main__":
          parser = argparse.ArgumentParser()
          parser.add_argument("--num_threads", type=str, default="4")
          parser.add_argument("--opt_level", type=str, default="1")
          parser.add_argument("--epoch", type=int, default=100)
          parser.add_argument("--repeat_time", type=int, default=50)
          args = parser.parse_args()
          # Set the number of threads for the test script.
          num_threads = args.num_threads
          os.environ["TVM_NUM_THREADS"] = num_threads
          # Set the optimization level.
          opt_level = args.opt_level
          # Set the number of inference epoch.
          epoch = args.epoch
          # Set the repeating times of profiling.
          repeat_time = args.repeat_time
          print("num of threads is :%d" % (tvm.runtime.num_threads()))
          print("opt level is :%s" % opt_level)
          print("epoch is %d" % epoch)
          print("repeat time is %d" % repeat_time)
          
          # Verify the correctness of the softmax operator.
          test_softmax(opt_level=opt_level, repeat_time=repeat_time)
          pytest.main([__file__])
      
    3. Press Esc, type :wq and press Enter to save the file and exit.
  2. Activate the Python environment of the TVM and set the TVM environment variables. (To activate the open source TVM 0.9.0, replace /path/to/TVM with its installation path.)
    1
    2
    3
    conda activate tvm
    export TVM_HOME=/path/to/TVM/apache-tvm-src-v0.9.0
    export PYTHONPATH=$TVM_HOME/python:${PYTHONPATH}
    
  3. Run the test_softmax.py test script.
    1
    taskset -c 0-15 python -u test_softmax.py --num_threads 4
    

    The test_softmax.py script prints the bottom-layer intermediate representation (IR) generated by the TVM for the softmax operator and the operator time consumption.

    Figure 1 and Figure 2 show the test script execution results. The result analysis is as follows:

    • For the bottom-layer IRs of the softmax operator, vectorization and parallelization are added to the operator computation in the TVM environment set in this document, and the division operation is extracted out of the final regularization operation loop.
    • For the time consumed by the softmax operator, the time consumed by the softmax operator in the TVM environment set in this document is shorter than that in the open source TVM 0.9.0 environment.
    Figure 1 Execution result of test_softmax.py in the open source TVM 0.9.0 environment
    Figure 2 Execution result of test_softmax.py in the TVM environment set in this document