본문 바로가기

HPC

AOCC Clang Performance 옵션 확인

우선 test.c 파일 작성

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define N 500  // Matrix size

void matmul(double A[N][N], double B[N][N], double C[N][N]) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            C[i][j] = 0.0;
            for (int k = 0; k < N; k++) {
                C[i][j] += A[i][k] * B[k][j];
            }
        }
    }
}

void init_matrix(double M[N][N]) {
    for (int i = 0; i < N; i++)
        for (int j = 0; j < N; j++)
            M[i][j] = rand() % 100 / 10.0;  // Random values between 0-10
}

int main() {
    static double A[N][N], B[N][N], C[N][N];

    srand(time(NULL));  // Seed random numbers
    init_matrix(A);
    init_matrix(B);

    printf("Starting matrix multiplication...\n");
    clock_t start = clock();

    matmul(A, B, C);  // Perform matrix multiplication

    clock_t end = clock();
    printf("Completed in %.3f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);

    return 0;
}

 

결과

$ clang -### -c test.c
AMD clang version 17.0.6 (CLANG: AOCC_5.0.0-Build#1377 2024_09_24)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/AMD/aocc-compiler-5.0.0/bin
 (in-process)
 "/opt/AMD/aocc-compiler-5.0.0/bin/clang-17" "-cc1" "-triple" "x86_64-unknown-linux-gnu" "-emit-obj" "-disable-free" "-clear-ast-before-backend" "-main-file-name" "test.c" "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie" "-mframe-pointer=none" "-fmath-errno" "-ffp-contract=on" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-tune-cpu" "generic" "-debugger-tuning=gdb" "-fcoverage-compilation-dir=/home/dell7875/Downloads" "-resource-dir" "/opt/AMD/aocc-compiler-5.0.0/lib/clang/17" "-c-isystem" "/opt/AMD/aocc-compiler-5.0.0/include" "-cxx-isystem" "/opt/AMD/aocc-compiler-5.0.0/include" "-internal-isystem" "/opt/AMD/aocc-compiler-5.0.0/lib/clang/17/include" "-internal-isystem" "/usr/local/include" "-internal-isystem" "/usr/lib/gcc/x86_64-linux-gnu/13/../../../../x86_64-linux-gnu/include" "-internal-externc-isystem" "/usr/include/x86_64-linux-gnu" "-internal-externc-isystem" "/include" "-internal-externc-isystem" "/usr/include" "-O2" "-fdebug-compilation-dir=/home/dell7875/Downloads" "-ferror-limit" "19" "-fgnuc-version=4.2.1" "-fcolor-diagnostics" "-vectorize-loops" "-vectorize-slp" "-itodcalls" "-itodcallsbyclone" "-faddrsig" "-D__GCC_HAVE_DWARF2_CFI_ASM=1" "-o" "test.o" "-x" "c" "test.c"

 

설명

Option Explanation

Architecture x86_64-unknown-linux-gnu (64-bit Linux)

Optimization -O2 (default, moderate optimization)

CPU Target -target-cpu x86-64 (not tuned for AMD Zen)

Vectorization -vectorize-loops & -vectorize-slp (SIMD optimizations)

Position Independence -pic-level 2 (for shared libraries)

Math Settings -ffp-contract=on (FMA enabled), -fmath-errno (strict IEEE math)

Debugging -funwind-tables=2, -fdebug-compilation-dir=~/Downloads

 

'HPC' 카테고리의 다른 글

Tmux on NURION  (0) 2025.04.10
시스템 메모리 체크  (0) 2025.03.05
AOCC Command-line options  (0) 2025.02.28
NUMA Pinning in IntelOneAPI  (0) 2025.02.05
NUMA Check  (1) 2025.02.05