ex1 LLVM

以下のCのSource codewがコンパイラによってどう変換されるかを調べる

    extern int printf(char *,...);
    #define TYPE int
    TYPE f(TYPE a, TYPE b) {
        return a + b;
    }
    int main() 
    {
        TYPE a = 1;
        TYPE b = 2;
        printf("%x = %x + %x \n",f(a,b),a,b);
        return 0;
    }

cpp

clang -E での出力を調べる
変換されている部分はどこか。 printf を protoptype ではなく #include で定義した時はどうなるか。

解説

clangの -E オプションはmanによると -E Run the preprocessor stage.

つまりプリプロセッサまで処理を行う。普通のコンパイルの時はあくまで内部的に行われている。

(参考:プリプロセッサ)

Report on Compiler consturction Lecture Exercise 1.1

clangのEオプションはプリプロセッサの展開結果のみを表示するオプションである。まずはexternしている状態の出力を見てみる

 $clang -E report.c

# 1 "report.c"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 330 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "report.c" 2
extern int printf(char *,...);

int f(int a, int b) {
    return a + b;
}
int main()
{
    int a = 1;
    int b = 2;
    printf("%x = %x + %x \n",f(a,b),a,b);
    return 0;
}

最初の#から始まる数行はコンパイルエラーとソースコードを対応付ける為の出力である。プリプロセッサではコメントの除去や単語の置換などを行っており一切に#includeが除去,関数fが単純なint型に戻されている。つまりCompilerにとって解釈しやすい形のCコードと変換されている。

次に#include で定義した場合の状態を見てみる。

 # 1 "report.c"
 # 1 "<built-in>" 1
 # 1 "<built-in>" 3
 # 330 "<built-in>" 3
 # 1 "<command line>" 1
 # 1 "<built-in>" 2
 # 1 "report.c" 2
 # 1 "/usr/include/stdio.h" 1 3 4
 # 64 "/usr/include/stdio.h" 3 4
 # 1 "/usr/include/sys/cdefs.h" 1 3 4
 # 587 "/usr/include/sys/cdefs.h" 3 4
 # 1 "/usr/include/sys/_symbol_aliasing.h" 1 3 4
 # 588 "/usr/include/sys/cdefs.h" 2 3 4
 # 653 "/usr/include/sys/cdefs.h" 3 4
 # 1 "/usr/include/sys/_posix_availability.h" 1 3 4
 # 654 "/usr/include/sys/cdefs.h" 2 3 4
 # 65 "/usr/include/stdio.h" 2 3 4
 # 1 "/usr/include/Availability.h" 1 3 4
 # 190 "/usr/include/Availability.h" 3 4
 # 1 "/usr/include/AvailabilityInternal.h" 1 3 4
 # 191 "/usr/include/Availability.h" 2 3 4
 # 66 "/usr/include/stdio.h" 2 3 4

 # 1 "/usr/include/_types.h" 1 3 4
 # 27 "/usr/include/_types.h" 3 4
 # 1 "/usr/include/sys/_types.h" 1 3 4
 # 33 "/usr/include/sys/_types.h" 3 4
 # 1 "/usr/include/machine/_types.h" 1 3 4
 # 32 "/usr/include/machine/_types.h" 3 4
 # 1 "/usr/include/i386/_types.h" 1 3 4
 # 37 "/usr/include/i386/_types.h" 3 4
 typedef signed char __int8_t;



 typedef unsigned char __uint8_t;
 typedef short __int16_t;
 typedef unsigned short __uint16_t;
 typedef int __int32_t;
 typedef unsigned int __uint32_t;
 typedef long long __int64_t;
 typedef unsigned long long __uint64_t;

 typedef long __darwin_intptr_t;
 typedef unsigned int __darwin_natural_t;
 # 70 "/usr/include/i386/_types.h" 3 4
 typedef int __darwin_ct_rune_t;





 typedef union {
  char __mbstate8[128];
  long long _mbstateL;
 } __mbstate_t;

 typedef __mbstate_t __darwin_mbstate_t;


 typedef long int __darwin_ptrdiff_t;







 typedef long unsigned int __darwin_size_t;


 ** 中略**

 }
 # 379 "/usr/include/stdio.h" 3 4
 void flockfile(FILE *);
 int ftrylockfile(FILE *);
 void funlockfile(FILE *);
 int getc_unlocked(FILE *);
 int getchar_unlocked(void);
 int putc_unlocked(int, FILE *);
 int putchar_unlocked(int);



 int getw(FILE *);
 int putw(int, FILE *);


 __attribute__((__availability__(swift, unavailable, message="Use mkstemp(3) instead.")))

 __attribute__((deprecated("This function is provided for compatibility reasons only.  Due to security concerns inherent in the design of tempnam(3), it is highly recommended that you use mkstemp(3) instead.")))

 char *tempnam(const char *__dir, const char *__prefix) __asm("_" "tempnam" );
 # 417 "/usr/include/stdio.h" 3 4
 # 1 "/usr/include/sys/_types/_off_t.h" 1 3 4
 # 30 "/usr/include/sys/_types/_off_t.h" 3 4
 typedef __darwin_off_t off_t;
 # 418 "/usr/include/stdio.h" 2 3 4


 int fseeko(FILE * __stream, off_t __offset, int __whence);
 off_t ftello(FILE * __stream);





 int snprintf(char * restrict __str, size_t __size, const char * restrict __format, ...) __attribute__((__format__ (__printf__, 3, 4)));
 int vfscanf(FILE * restrict __stream, const char * restrict __format, va_list) __attribute__((__format__ (__scanf__, 2, 0)));
 int vscanf(const char * restrict __format, va_list) __attribute__((__format__ (__scanf__, 1, 0)));
 int vsnprintf(char * restrict __str, size_t __size, const char * restrict __format, va_list) __attribute__((__format__ (__printf__, 3, 0)));
 int vsscanf(const char * restrict __str, const char * restrict __format, va_list) __attribute__((__format__ (__scanf__, 2, 0)));
 # 442 "/usr/include/stdio.h" 3 4
 # 1 "/usr/include/sys/_types/_ssize_t.h" 1 3 4
 # 30 "/usr/include/sys/_types/_ssize_t.h" 3 4
 typedef __darwin_ssize_t ssize_t;
 # 443 "/usr/include/stdio.h" 2 3 4


 int dprintf(int, const char * restrict, ...) __attribute__((__format__ (__printf__, 2, 3))) __attribute__((availability(macosx,introduced=10.7)));
 int vdprintf(int, const char * restrict, va_list) __attribute__((__format__ (__printf__, 2, 0))) __attribute__((availability(macosx,introduced=10.7)));
 ssize_t getdelim(char ** restrict __linep, size_t * restrict __linecapp, int __delimiter, FILE * restrict __stream) __attribute__((availability(macosx,introduced=10.7)));
 ssize_t getline(char ** restrict __linep, size_t * restrict __linecapp, FILE * restrict __stream) __attribute__((availability(macosx,introduced=10.7)));
 # 458 "/usr/include/stdio.h" 3 4
 extern const int sys_nerr;
 extern const char *const sys_errlist[];

 int asprintf(char ** restrict, const char * restrict, ...) __attribute__((__format__ (__printf__, 2, 3)));
 char *ctermid_r(char *);
 char *fgetln(FILE *, size_t *);
 const char *fmtcheck(const char *, const char *);
 int fpurge(FILE *);
 void setbuffer(FILE *, char *, int);
 int setlinebuf(FILE *);
 int vasprintf(char ** restrict, const char * restrict, va_list) __attribute__((__format__ (__printf__, 2, 0)));
 FILE *zopen(const char *, const char *, int);





 FILE *funopen(const void *,
                  int (* _Nullable)(void *, char *, int),
                  int (* _Nullable)(void *, const char *, int),
                  fpos_t (* _Nullable)(void *, fpos_t, int),
                  int (* _Nullable)(void *));
 # 498 "/usr/include/stdio.h" 3 4
 # 1 "/usr/include/secure/_stdio.h" 1 3 4
 # 31 "/usr/include/secure/_stdio.h" 3 4
 # 1 "/usr/include/secure/_common.h" 1 3 4
 # 32 "/usr/include/secure/_stdio.h" 2 3 4
 # 42 "/usr/include/secure/_stdio.h" 3 4
 extern int __sprintf_chk (char * restrict, int, size_t,
      const char * restrict, ...);
 # 52 "/usr/include/secure/_stdio.h" 3 4
 extern int __snprintf_chk (char * restrict, size_t, int, size_t,
       const char * restrict, ...);







 extern int __vsprintf_chk (char * restrict, int, size_t,
       const char * restrict, va_list);







 extern int __vsnprintf_chk (char * restrict, size_t, int, size_t,
        const char * restrict, va_list);
 # 499 "/usr/include/stdio.h" 2 3 4
 # 2 "report.c" 2

 int f(int a, int b) {
     return a + b;
 }
 int main()
 {
     int a = 1;
     int b = 2;
     printf("%x = %x + %x \n",f(a,b),a,b);
     return 0;
 }

展開されたコード自体は同様であるが,#includeするとヘッダーファイルをプリプロセッサ時点で全て読み込み展開する。その為externしている状態と比較するとstdio.h内の今回のコードとは直接関係ない関数や構造体の宣言も行われている。

アセンブラ

clang -S -O0 で出力されるアセンブラについて調べる

O0なので最適化されていない状態のアセンブラが吐かれる。

 $clang -S -O0 report.c
 $cat report.s | pbcopy

     .section    __TEXT,__text,regular,pure_instructions
     .macosx_version_min 10, 12
     .globl    _f
     .align    4, 0x90
 _f:                                     ## @f
     .cfi_startproc
 ## BB#0:
     pushq    %rbp
 Ltmp0:
     .cfi_def_cfa_offset 16
 Ltmp1:
     .cfi_offset %rbp, -16
     movq    %rsp, %rbp
 Ltmp2:
     .cfi_def_cfa_register %rbp
     movl    %edi, -4(%rbp)
     movl    %esi, -8(%rbp)
     movl    -4(%rbp), %esi
     addl    -8(%rbp), %esi
     movl    %esi, %eax
     popq    %rbp
     retq
     .cfi_endproc

     .globl    _main
     .align    4, 0x90
 _main:                                  ## @main
     .cfi_startproc
 ## BB#0:
     pushq    %rbp
 Ltmp3:
     .cfi_def_cfa_offset 16
 Ltmp4:
     .cfi_offset %rbp, -16
     movq    %rsp, %rbp
 Ltmp5:
     .cfi_def_cfa_register %rbp
     subq    $16, %rsp
     movl    $0, -4(%rbp)
     movl    $1, -8(%rbp)
     movl    $2, -12(%rbp)
     movl    -8(%rbp), %edi
     movl    -12(%rbp), %esi
     callq    _f
     leaq    L_.str(%rip), %rdi
     movl    -8(%rbp), %edx
     movl    -12(%rbp), %ecx
     movl    %eax, %esi
     movb    $0, %al
     callq    _printf
     xorl    %ecx, %ecx
     movl    %eax, -16(%rbp)         ## 4-byte Spill
     movl    %ecx, %eax
     addq    $16, %rsp
     popq    %rbp
     retq
     .cfi_endproc

     .section    __TEXT,__cstring,cstring_literals
 L_.str:                                 ## @.str
     .asciz    "%x = %x + %x \n"


 .subsections_via_symbols

main関数から関数fを読んでいる箇所は callq _print の部分である。今回は関数fに(1,2)を引数として与えている為に

    movl    $1, -8(%rbp)
    movl    $2, -12(%rbp)
    movl    -8(%rbp), %edi
    movl    -12(%rbp), %esi

これらの部分でそれぞれ1,2を%edi,%esiに設定している。

関数fはアセンブラ結果の5行目から宣言されている。実際に関数fが計算している箇所を確認すると

     movl    %edi, -4(%rbp)
     movl    %esi, -8(%rbp)
     movl    -4(%rbp), %esi
     addl    -8(%rbp), %esi
     movl    %esi, %eax
     popq    %rbp

と先程設定した%ediと%esiから%rbpにそれぞれ値を格納した後 %esiに第1引数を戻し,%esiに直接addlした後にpopしている事が分かる。またaddl命令なのでこれらは32bitで演算されている。

次に最適化した状態のアセンブラを確認する

$ clang -S -O report.c
$ cat report.s

     .section    __TEXT,__text,regular,pure_instructions
     .macosx_version_min 10, 12
     .globl    _f
     .align    4, 0x90
 _f:                                     ## @f
     .cfi_startproc
 ## BB#0:
     pushq    %rbp
 Ltmp0:
     .cfi_def_cfa_offset 16
 Ltmp1:
     .cfi_offset %rbp, -16
     movq    %rsp, %rbp
 Ltmp2:
     .cfi_def_cfa_register %rbp
     leal    (%rdi,%rsi), %eax
     popq    %rbp
     retq
     .cfi_endproc

     .globl    _main
     .align    4, 0x90
 _main:                                  ## @main
     .cfi_startproc
 ## BB#0:
     pushq    %rbp
 Ltmp3:
     .cfi_def_cfa_offset 16
 Ltmp4:
     .cfi_offset %rbp, -16
     movq    %rsp, %rbp
 Ltmp5:
     .cfi_def_cfa_register %rbp
     leaq    L_.str(%rip), %rdi
     movl    $3, %esi
     movl    $1, %edx
     movl    $2, %ecx
     xorl    %eax, %eax
     callq    _printf
     xorl    %eax, %eax
     popq    %rbp
     retq
     .cfi_endproc

     .section    __TEXT,__cstring,cstring_literals
 L_.str:                                 ## @.str
     .asciz    "%x = %x + %x \n"


 .subsections_via_symbols

最適化していない状態と見比べると関数fがもはや何もしなくなっている。 mainルーチンを確認すると,最初に出力用のフォーマットを宣言し既に1+2の結果である3を%esiに格納しているのがわかる。

つまりコンパイラが最適化として関数fが2変数(1と2)をaddしているだけであり,出力さえすれば良いと解釈した為 1+2の結果である3を関数fを呼ばずに静的に設定し,出力をしているのが理解できる。その為add命令が書かれておらず,callqでprintされるまではひたすらmov命令で数値をレジスタに格納しているのみである。

LLVM

$ clang -emit-llvm -S

LLVM バイトコードの出力が得られることを確認せよ。アセンブラとの対応を示せ。

$ clang -emit-llvm -S report.c

上記のコマンドを実行するとreport.llというファイルが生成され,これがllvmの中間木ファイルである。

$ cat report.ll | pbcopy

> ; ModuleID = 'report.c'
> source_filename = "report.c"
> target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-apple-macosx10.12.0"
> 
> @.str = private unnamed_addr constant [15 x i8] c"%x = %x + %x \0A\00", align 1
> 
> ; Function Attrs: nounwind ssp uwtable
> define i32 @f(i32, i32) #0 {
>   %3 = alloca i32, align 4
>   %4 = alloca i32, align 4
>   store i32 %0, i32* %3, align 4
>   store i32 %1, i32* %4, align 4
>   %5 = load i32, i32* %3, align 4
>   %6 = load i32, i32* %4, align 4
>   %7 = add nsw i32 %5, %6
>   ret i32 %7
> }
> 
> ; Function Attrs: nounwind ssp uwtable
> define i32 @main() #0 {
>   %1 = alloca i32, align 4
>   %2 = alloca i32, align 4
>   %3 = alloca i32, align 4
>   store i32 0, i32* %1, align 4
>   store i32 1, i32* %2, align 4
>   store i32 2, i32* %3, align 4
>   %4 = load i32, i32* %2, align 4
>   %5 = load i32, i32* %3, align 4
>   %6 = call i32 @f(i32 %4, i32 %5)
>   %7 = load i32, i32* %2, align 4
>   %8 = load i32, i32* %3, align 4
>   %9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0), i32 %6, i32 %7, i32 %8)
>   ret i32 0
> }
> 
> declare i32 @printf(i8*, ...) #1
> 
> attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
> attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
> 
> !llvm.module.flags = !{!0}
> !llvm.ident = !{!1}
> 
> !0 = !{i32 1, !"PIC Level", i32 2}
> !1 = !{!"Apple LLVM version 8.0.0 (clang-800.0.42.1)"}
>

今回のLLVMの中間表現を確認していく。まず1行目はコメントである。

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

二行目はDataLayoutを定義しており,まずe-mで始まっていることからターゲットのデータレイアウトがリトルエンディアンであることを示している。次のm:0の部分はMach-O形式でし出力するということを示している。以降はビットサイズの指定などを行っている。

target triple = "x86_64-apple-macosx10.12.0" この部分はtargetとなるアーキテクチャを記述している。

@.str = private unnamed_addr constant [15 x i8] c"%x = %x + %x \0A\00", align 1

@がついているものはllvmの中でグローバル変数として解釈されるが,これは後のprintfで書き出されるものである。アセンブラの.section部分と対応している。

>     .section    __TEXT,__cstring,cstring_literals
> L_.str:                                 ## @.str
>     .asciz    "%x = %x + %x \n"

次のブロックでは関数fについて記述されている。引数2つはそれぞれ%0,%1に保存されている。この時点で%0,%1はおそらくmain関数で代入された値のレジスタそのものであるため一旦別レジスタに格納し,それを再度loadすることで値の中身をコピーし使っている。

アセンブラではaddlなど命令の後にlが付くことで32bitであると読み取れたが llvmの場合i32とbit長で幾つかのものを指定しているという対応がある。

関数fの最後では

%7 = add nsw i32 %5, %6 とadd命令で%5と%6の値を足して%7レジスタに格納している。 addの後に続くnswはNo Signed Wrapであり,計算上オーバーフローなどが発生した場合は llvmのPoison Valueとして処理されるという意味である。

main関数ではallocateでスタックフレーム上に利用する値の場所を確保した後に store未例で一旦そのスタックに値を書き込み,load命令でレジスタに入れて演算している。

>   store i32 0, i32* %1, align 4
>   store i32 1, i32* %2, align 4
>   store i32 2, i32* %3, align 4
>   %4 = load i32, i32* %2, align 4
>   %5 = load i32, i32* %3, align 4

ここで%4,%5にはそれぞれ1,2が32bitで入っている.

%6 = call i32 @f(i32 %4, i32 %5)

この部分で返り値32bitの関数fが%4,%5を引数として呼び出され結果が%6に代入されている。

出力場所はこの部分であり,既に%4,%5は使っている為別のレジスタに値を入れている。

>   %9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0), i32 %6, i32 %7, i32 %8)

またprintf関数はライブラリ関数であるのでdeclare命令で関数宣言を行っているのみである。

llvmのバイトコードが最適化によってどう変化するかも確認する。

$ clang -emit-llvm -O -S report.c

; ModuleID = 'report.c'
source_filename = "report.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"

@.str = private unnamed_addr constant [15 x i8] c"%x = %x + %x \0A\00", align 1

; Function Attrs: norecurse nounwind readnone ssp uwtable
define i32 @f(i32, i32) #0 {
  %3 = add nsw i32 %1, %0
  ret i32 %3
}

; Function Attrs: nounwind ssp uwtable
define i32 @main() #1 {
  %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), i32 3, i32 1, i32 2)
  ret i32 0
}

; Function Attrs: nounwind
declare i32 @printf(i8* nocapture readonly, ...) #2

attributes #0 = { norecurse nounwind readnone ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"Apple LLVM version 8.0.0 (clang-800.0.42.1)"}

結果を見ると関数fは先程のアセンブラと同様にほぼ何もしない様に置き換わっている。定義自体は2つのレジスタの中身をaddするとなっているが今回は呼び出されていない事が分かる。

ここでmain関数を確認するとわずか2行になっており

>  %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), i32 3, i32 1, i32 2)

この部分で既に計算された 3,1,2が先程先生した出力フォーマットに流れている事が分かる。

これはアセンブラの

>     leaq    L_.str(%rip), %rdi
>     movl    $3, %esi
>     movl    $1, %edx
>     movl    $2, %ecx
>     xorl    %eax, %eax
>     callq    _printf

この場所に対応している。つまりllvmの中間木の時点で簡単な計算と使わなくても良い関数は,それぞれ最適化されて処理しているという事が分かる。

aout

出力される a.out を otool を使って調べる。

otoolはosXのバイナリ表示ツールである。今回の出力したa.outを確認してみる。

otoolは幾つかのoptionがあるので,試していく。まずLオプションで使用されている共有ライブラリのバージョンなどがわかる。

$ otool -L a.out                                                            

> a.out:
>         /usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1238.60.2)

この結果このバイナリは/usr/lib/libSystem.B.dylibというCライブラリを使用している事が分かる。 dylib拡張子がついているものはMarch-O特有のものである。

March-OはHeader,Load Commands,Dataの3ブロックで構成されている。さらに先頭にはFat Headerが付随しているh。headerを確認するにはhオプションを使う

$otool -h a.out

> Mach header
>       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
>  0xfeedfacf 16777223          3  0x80           2    15       1200 0x00200085

またtオプションをつけるとTEXTsectionの部分のバイナリが出る。これにvをつけると逆アセンブルも行われる。

otool -t a.out                                                                                                               (hg)-[default]
a.out:
Contents of (__TEXT,__text) section
0000000100000f10        55 48 89 e5 89 7d fc 89 75 f8 8b 75 fc 03 75 f8
0000000100000f20        89 f0 5d c3 66 66 66 2e 0f 1f 84 00 00 00 00 00
0000000100000f30        55 48 89 e5 48 83 ec 10 c7 45 fc 00 00 00 00 c7
0000000100000f40        45 f8 01 00 00 00 c7 45 f4 02 00 00 00 8b 7d f8
0000000100000f50        8b 75 f4 e8 b8 ff ff ff 48 8d 3d 3f 00 00 00 8b
0000000100000f60        55 f8 8b 4d f4 89 c6 b0 00 e8 0e 00 00 00 31 c9
0000000100000f70        89 45 f0 89 c8 48 83 c4 10 5d c3

$otool -tv a.out
>　a.out:
>　(__TEXT,__text) section
>　_f:
>　0000000100000f10    pushq    %rbp
>　0000000100000f11    movq    %rsp, %rbp
>　0000000100000f14    movl    %edi, -0x4(%rbp)
>　0000000100000f17    movl    %esi, -0x8(%rbp)
>　0000000100000f1a    movl    -0x4(%rbp), %esi
>　0000000100000f1d    addl    -0x8(%rbp), %esi
>　0000000100000f20    movl    %esi, %eax
>　0000000100000f22    popq    %rbp
>　0000000100000f23    retq
>　0000000100000f24    nopw    %cs:(%rax,%rax)
>　_main:
>　0000000100000f30    pushq    %rbp
>　0000000100000f31    movq    %rsp, %rbp
>　0000000100000f34    subq    $0x10, %rsp
>　0000000100000f38    movl    $0x0, -0x4(%rbp)
>　0000000100000f3f    movl    $0x1, -0x8(%rbp)
>　0000000100000f46    movl    $0x2, -0xc(%rbp)
>　0000000100000f4d    movl    -0x8(%rbp), %edi
>　0000000100000f50    movl    -0xc(%rbp), %esi
>　0000000100000f53    callq    0x100000f10
>　0000000100000f58    leaq    0x3f(%rip), %rdi
>　0000000100000f5f    movl    -0x8(%rbp), %edx
>　0000000100000f62    movl    -0xc(%rbp), %ecx
>　0000000100000f65    movl    %eax, %esi
>　0000000100000f67    movb    $0x0, %al
>　0000000100000f69    callq    0x100000f7c
>　0000000100000f6e    xorl    %ecx, %ecx
>　0000000100000f70    movl    %eax, -0x10(%rbp)
>　0000000100000f73    movl    %ecx, %eax
>　0000000100000f75    addq    $0x10, %rsp
>　0000000100000f79    popq    %rbp
>　0000000100000f7a    retq

書式にやや違いがあるが,ほぼアセンブラが再現できている。

(5) ARMアセンブラ

-arch arm を付けて、ARMのアセンブラの出力を調べよ

clangのarchオプションの後にアーキテクチャを指定するとそのアセンブラが出力される。

$ clang -S -O0 -arch arm report.c $ cat report.s

>     .section    __TEXT,__text,regular,pure_instructions
>     .macosx_version_min 10, 12
>     .syntax unified
>     .globl    _f
>     .align    2
> _f:                                     @ @f
> @ BB#0:
>     sub    sp, sp, #16
>     mov    r2, r1
>     mov    r3, r0
>     str    r0, [sp, #12]
>     str    r1, [sp, #8]
>     ldr    r0, [sp, #12]
>     add    r0, r0, r1
>     str    r2, [sp, #4]            @ 4-byte Spill
>     str    r3, [sp]                @ 4-byte Spill
>     add    sp, sp, #16
>     bx    lr
> 
>     .globl    _main
>     .align    2
> _main:                                  @ @main
> @ BB#0:
>     push    {r7, lr}
>     mov    r7, sp
>     sub    sp, sp, #24
>     mov    r0, #0
> k    str    r0, [r7, #-4]
>     mov    r1, #1
>     str    r1, [r7, #-8]
>     mov    r1, #2
>     str    r1, [sp, #12]
>     ldr    r2, [r7, #-8]
>     str    r0, [sp, #8]            @ 4-byte Spill
>     mov    r0, r2
>     bl    _f
>     ldr    r2, [r7, #-8]
>     ldr    r3, [sp, #12]
>     ldr    r1, LCPI1_0
> LPC1_0:
>     add    r1, pc, r1
>     str    r0, [sp, #4]            @ 4-byte Spill
>     mov    r0, r1
>     ldr    r1, [sp, #4]            @ 4-byte Reload
>     bl    _printf
>     ldr    r1, [sp, #8]            @ 4-byte Reload
>     str    r0, [sp]                @ 4-byte Spill
>     mov    r0, r1
>     mov    sp, r7
>     pop    {r7, lr}
>     bx    lr
>     .align    2
> @ BB#1:
>     .data_region
> LCPI1_0:
>     .long    L_.str-(LPC1_0+8)
>     .end_data_region
> 
>     .section    __TEXT,__cstring,cstring_literals
> L_.str:                                 @ @.str
>     .asciz    "%x = %x + %x \n"
> 
> 
> .subsections_via_symbols
>

ここで関数fを見てみる.

_f:                                     @ @f
@ BB#0:
    sub    sp, sp, #16
    mov    r2, r1
    mov    r3, r0
    str    r0, [sp, #12]
    str    r1, [sp, #8]

x86アセンブラの場合はmovをした後にaddlを行っていたが
armの場合はstr命令が走っている.

    ldr    r0, [sp, #12]
    add    r0, r0, r1

またadd命令などを見るとアドレッシングモードを利用している事がわかる。

    str    r2, [sp, #4]            @ 4-byte Spill
    str    r3, [sp]                @ 4-byte Spill
    add    sp, sp, #16
    bx    lr

@ 4-byte Spillのコメントを見るとarmの場合str命令で書くbyteごと確保領域を変更することで使用するレジスタのビットサイズを操作している。

コンパイラ

cpp

解説

アセンブラ

LLVM

llvmのバイトコードが最適化によってどう変化するかも確認する。

aout

results matching ""

No results matching ""