I am quite new in this domain. I am currently working on a academic research project which involves understanding "when" a system call is made from a C code. I am using Clang-llvm toolchain to accomplish this task. Here is an example code:
#include <stdio.h>
int main(int argc, char *argv[]){
FILE *fp;
fp = fopen("input.txt","r");
return 0;
}
This code should have one syscall in he fopen function which is __open. Therefore, If we disassemble the open function, we should see a syscall command in the assembly code.
(lldb) disassemble --name __open
a2_c`__open:
-> 0x446ed0 <+0>: endbr64
0x446ed4 <+4>: pushq %r12
0x446ed6 <+6>: movl %esi, %r10d
0x446ed9 <+9>: movl %esi, %r12d
0x446edc <+12>: pushq %rbp
0x446edd <+13>: movq %rdi, %rbp
0x446ee0 <+16>: subq $0x68, %rsp
0x446ee4 <+20>: movq %rdx, 0x40(%rsp)
0x446ee9 <+25>: movq %fs:0x28, %rax
0x446ef2 <+34>: movq %rax, 0x28(%rsp)
0x446ef7 <+39>: xorl %eax, %eax
0x446ef9 <+41>: andl $0x40, %r10d
0x446efd <+45>: jne 0x446f58 ; <+136>
0x446eff <+47>: movl %esi, %eax
0x446f01 <+49>: andl $0x410000, %eax ; imm = 0x410000
0x446f06 <+54>: cmpl $0x410000, %eax ; imm = 0x410000
0x446f0b <+59>: je 0x446f58 ; <+136>
0x446f0d <+61>: movl %fs:0x18, %eax
0x446f15 <+69>: testl %eax, %eax
0x446f17 <+71>: jne 0x446f80 ; <+176>
0x446f19 <+73>: movl %r12d, %edx
0x446f1c <+76>: movq %rbp, %rsi
0x446f1f <+79>: movl $0xffffff9c, %edi ; imm = 0xFFFFFF9C
0x446f24 <+84>: movl $0x101, %eax ; imm = 0x101
0x446f29 <+89>: syscall
0x446f2b <+91>: cmpq $-0x1000, %rax ; imm = 0xF000
0x446f31 <+97>: ja 0x446fc8 ; <+248>
0x446f37 <+103>: movq 0x28(%rsp), %rdx
0x446f3c <+108>: subq %fs:0x28, %rdx
0x446f45 <+117>: jne 0x446ff3 ; <+291>
0x446f4b <+123>: addq $0x68, %rsp
0x446f4f <+127>: popq %rbp
0x446f50 <+128>: popq %r12
0x446f52 <+130>: retq
0x446f53 <+131>: nopl (%rax,%rax)
0x446f58 <+136>: leaq 0x80(%rsp), %rax
0x446f60 <+144>: movl $0x10, 0x10(%rsp)
0x446f68 <+152>: movl 0x40(%rsp), %r10d
0x446f6d <+157>: movq %rax, 0x18(%rsp)
0x446f72 <+162>: leaq 0x30(%rsp), %rax
0x446f77 <+167>: movq %rax, 0x20(%rsp)
0x446f7c <+172>: jmp 0x446f0d ; <+61>
0x446f7e <+174>: nop
0x446f80 <+176>: movl %r10d, 0xc(%rsp)
0x446f85 <+181>: callq 0x46b640 ; __pthread_enable_asynccancel
0x446f8a <+186>: movl 0xc(%rsp), %r10d
0x446f8f <+191>: movl %r12d, %edx
0x446f92 <+194>: movq %rbp, %rsi
0x446f95 <+197>: movl %eax, %r8d
0x446f98 <+200>: movl $0xffffff9c, %edi ; imm = 0xFFFFFF9C
0x446f9d <+205>: movl $0x101, %eax ; imm = 0x101
0x446fa2 <+210>: syscall
0x446fa4 <+212>: cmpq $-0x1000, %rax ; imm = 0xF000
0x446faa <+218>: ja 0x446fe0 ; <+272>
0x446fac <+220>: movl %r8d, %edi
0x446faf <+223>: movl %eax, 0xc(%rsp)
0x446fb3 <+227>: callq 0x46b6b0 ; __pthread_disable_asynccancel
0x446fb8 <+232>: movl 0xc(%rsp), %eax
0x446fbc <+236>: jmp 0x446f37 ; <+103>
0x446fc1 <+241>: nopl (%rax)
0x446fc8 <+248>: movq $-0x48, %rdx
0x446fcf <+255>: negl %eax
0x446fd1 <+257>: movl %eax, %fs:(%rdx)
0x446fd4 <+260>: movl $0xffffffff, %eax ; imm = 0xFFFFFFFF
0x446fd9 <+265>: jmp 0x446f37 ; <+103>
0x446fde <+270>: nop
0x446fe0 <+272>: movq $-0x48, %rdx
0x446fe7 <+279>: negl %eax
0x446fe9 <+281>: movl %eax, %fs:(%rdx)
0x446fec <+284>: movl $0xffffffff, %eax ; imm = 0xFFFFFFFF
0x446ff1 <+289>: jmp 0x446fac ; <+220>
0x446ff3 <+291>: callq 0x449ab0 ; __stack_chk_fail_local
(lldb)
Up to this point things go as expected. The deviation happens in the eax register. My understanding is that the operating system executes a syscall when the syscall command is invoked. The nature of the executed syscall depends on the value in eax register.
Here is the code snippet which moves the value 0x101 in the eax register just before triggerring the syscall 0x446f24 <+84>: movl $0x101, %eax. As far as I knew, the value in eax for a open syscall should be 0x02 (Described Here for Linux).
But in this snippet, it looks like the value being set to eax is something that is in memory location 0x101. My question is, why are we transferring some memory locations value in eax? Shouldn't the value being transferred to eax be 0x02 and then the syscall should be triggered? What am I missing here?