0

I am quite new in this domain. I am currently working on a academic research project which involves understanding "when" a system call is made from a C code. I am using Clang-llvm toolchain to accomplish this task. Here is an example code:

#include <stdio.h>

int main(int argc, char *argv[]){
    FILE *fp;
    fp  = fopen("input.txt","r");
    return 0;
}

This code should have one syscall in he fopen function which is __open. Therefore, If we disassemble the open function, we should see a syscall command in the assembly code.

(lldb) disassemble --name __open
a2_c`__open:
->  0x446ed0 <+0>:   endbr64 
    0x446ed4 <+4>:   pushq  %r12
    0x446ed6 <+6>:   movl   %esi, %r10d
    0x446ed9 <+9>:   movl   %esi, %r12d
    0x446edc <+12>:  pushq  %rbp
    0x446edd <+13>:  movq   %rdi, %rbp
    0x446ee0 <+16>:  subq   $0x68, %rsp
    0x446ee4 <+20>:  movq   %rdx, 0x40(%rsp)
    0x446ee9 <+25>:  movq   %fs:0x28, %rax
    0x446ef2 <+34>:  movq   %rax, 0x28(%rsp)
    0x446ef7 <+39>:  xorl   %eax, %eax
    0x446ef9 <+41>:  andl   $0x40, %r10d
    0x446efd <+45>:  jne    0x446f58                  ; <+136>
    0x446eff <+47>:  movl   %esi, %eax
    0x446f01 <+49>:  andl   $0x410000, %eax           ; imm = 0x410000 
    0x446f06 <+54>:  cmpl   $0x410000, %eax           ; imm = 0x410000 
    0x446f0b <+59>:  je     0x446f58                  ; <+136>
    0x446f0d <+61>:  movl   %fs:0x18, %eax
    0x446f15 <+69>:  testl  %eax, %eax
    0x446f17 <+71>:  jne    0x446f80                  ; <+176>
    0x446f19 <+73>:  movl   %r12d, %edx
    0x446f1c <+76>:  movq   %rbp, %rsi
    0x446f1f <+79>:  movl   $0xffffff9c, %edi         ; imm = 0xFFFFFF9C 
    0x446f24 <+84>:  movl   $0x101, %eax              ; imm = 0x101 
    0x446f29 <+89>:  syscall 
    0x446f2b <+91>:  cmpq   $-0x1000, %rax            ; imm = 0xF000 
    0x446f31 <+97>:  ja     0x446fc8                  ; <+248>
    0x446f37 <+103>: movq   0x28(%rsp), %rdx
    0x446f3c <+108>: subq   %fs:0x28, %rdx
    0x446f45 <+117>: jne    0x446ff3                  ; <+291>
    0x446f4b <+123>: addq   $0x68, %rsp
    0x446f4f <+127>: popq   %rbp
    0x446f50 <+128>: popq   %r12
    0x446f52 <+130>: retq   
    0x446f53 <+131>: nopl   (%rax,%rax)
    0x446f58 <+136>: leaq   0x80(%rsp), %rax
    0x446f60 <+144>: movl   $0x10, 0x10(%rsp)
    0x446f68 <+152>: movl   0x40(%rsp), %r10d
    0x446f6d <+157>: movq   %rax, 0x18(%rsp)
    0x446f72 <+162>: leaq   0x30(%rsp), %rax
    0x446f77 <+167>: movq   %rax, 0x20(%rsp)
    0x446f7c <+172>: jmp    0x446f0d                  ; <+61>
    0x446f7e <+174>: nop    
    0x446f80 <+176>: movl   %r10d, 0xc(%rsp)
    0x446f85 <+181>: callq  0x46b640                  ; __pthread_enable_asynccancel
    0x446f8a <+186>: movl   0xc(%rsp), %r10d
    0x446f8f <+191>: movl   %r12d, %edx
    0x446f92 <+194>: movq   %rbp, %rsi
    0x446f95 <+197>: movl   %eax, %r8d
    0x446f98 <+200>: movl   $0xffffff9c, %edi         ; imm = 0xFFFFFF9C 
    0x446f9d <+205>: movl   $0x101, %eax              ; imm = 0x101 
    0x446fa2 <+210>: syscall 
    0x446fa4 <+212>: cmpq   $-0x1000, %rax            ; imm = 0xF000 
    0x446faa <+218>: ja     0x446fe0                  ; <+272>
    0x446fac <+220>: movl   %r8d, %edi
    0x446faf <+223>: movl   %eax, 0xc(%rsp)
    0x446fb3 <+227>: callq  0x46b6b0                  ; __pthread_disable_asynccancel
    0x446fb8 <+232>: movl   0xc(%rsp), %eax
    0x446fbc <+236>: jmp    0x446f37                  ; <+103>
    0x446fc1 <+241>: nopl   (%rax)
    0x446fc8 <+248>: movq   $-0x48, %rdx
    0x446fcf <+255>: negl   %eax
    0x446fd1 <+257>: movl   %eax, %fs:(%rdx)
    0x446fd4 <+260>: movl   $0xffffffff, %eax         ; imm = 0xFFFFFFFF 
    0x446fd9 <+265>: jmp    0x446f37                  ; <+103>
    0x446fde <+270>: nop    
    0x446fe0 <+272>: movq   $-0x48, %rdx
    0x446fe7 <+279>: negl   %eax
    0x446fe9 <+281>: movl   %eax, %fs:(%rdx)
    0x446fec <+284>: movl   $0xffffffff, %eax         ; imm = 0xFFFFFFFF 
    0x446ff1 <+289>: jmp    0x446fac                  ; <+220>
    0x446ff3 <+291>: callq  0x449ab0                  ; __stack_chk_fail_local
(lldb) 


Up to this point things go as expected. The deviation happens in the eax register. My understanding is that the operating system executes a syscall when the syscall command is invoked. The nature of the executed syscall depends on the value in eax register.

Here is the code snippet which moves the value 0x101 in the eax register just before triggerring the syscall 0x446f24 <+84>: movl $0x101, %eax. As far as I knew, the value in eax for a open syscall should be 0x02 (Described Here for Linux).

But in this snippet, it looks like the value being set to eax is something that is in memory location 0x101. My question is, why are we transferring some memory locations value in eax? Shouldn't the value being transferred to eax be 0x02 and then the syscall should be triggered? What am I missing here?

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
Monowar Anjum
  • 41
  • 1
  • 7
  • 2
    Your libc implementation seems to be using the [openat](https://man7.org/linux/man-pages/man2/open.2.html) system call. – Jester Dec 10 '22 at 22:10
  • See also [glibc source](https://github.com/bminor/glibc/blob/master/sysdeps/unix/sysv/linux/open.c) if that applies. – Jester Dec 10 '22 at 22:17
  • `mov $0x101, %eax` sets RAX=0x101. It doesn't load from memory, that would be `mov 0x101, %eax`, note the lack of `$`. [What's difference between number with $ or without $ symbol in at&t assembly syntax?](https://stackoverflow.com/q/18996870) – Peter Cordes Dec 10 '22 at 22:52
  • Duplicate of [Uneven behavior in different system calls hooking](https://stackoverflow.com/q/52580645) which shows the glibc patch that changed to using the `openat` system call for everything, never `open`. – Peter Cordes Dec 10 '22 at 23:05

0 Answers0