This is an archived copy of a previous semester's site.
Please see the current semester's site.
If we compile mystrlen.c, we get a binary file. If we inspect the contents of that binary file with objdump -d we find the following bytes and their corresponding machine code meaning in assembly.
mystrlen.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <strlen1>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
b: 80 7c 07 01 00 cmpb $0x0,0x1(%rdi,%rax,1)
10: 48 8d 40 01 lea 0x1(%rax),%rax
14: 75 f5 jne b <strlen1+0xb>
16: 5d pop %rbp
17: c3 ret
0000000000000018 <strlen2>:
18: 55 push %rbp
19: 48 89 e5 mov %rsp,%rbp
1c: 80 3f 00 cmpb $0x0,(%rdi)
1f: 74 12 je 33 <strlen2+0x1b>
21: 31 c9 xor %ecx,%ecx
23: 48 8d 41 01 lea 0x1(%rcx),%rax
27: 80 7c 0f 01 00 cmpb $0x0,0x1(%rdi,%rcx,1)
2c: 48 89 c1 mov %rax,%rcx
2f: 75 f2 jne 23 <strlen2+0xb>
31: eb 02 jmp 35 <strlen2+0x1d>
33: 31 c0 xor %eax,%eax
35: 5d pop %rbp
36: c3 ret
0000000000000037 <strlen3>:
37: 55 push %rbp
38: 48 89 e5 mov %rsp,%rbp
3b: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
42: 80 7c 07 01 00 cmpb $0x0,0x1(%rdi,%rax,1)
47: 48 8d 40 01 lea 0x1(%rax),%rax
4b: 75 f5 jne 42 <strlen3+0xb>
4d: 5d pop %rbp
4e: c3 ret
000000000000004f <main>:
4f: 55 push %rbp
50: 48 89 e5 mov %rsp,%rbp
53: bf 00 00 00 00 mov $0x0,%edi
58: be 00 00 00 00 mov $0x0,%esi
5d: ba 12 00 00 00 mov $0x12,%edx
62: 31 c0 xor %eax,%eax
64: e8 00 00 00 00 call 69 <main+0x1a>
69: bf 00 00 00 00 mov $0x0,%edi
6e: be 00 00 00 00 mov $0x0,%esi
73: ba 12 00 00 00 mov $0x12,%edx
78: 31 c0 xor %eax,%eax
7a: e8 00 00 00 00 call 7f <main+0x30>
7f: bf 00 00 00 00 mov $0x0,%edi
84: be 00 00 00 00 mov $0x0,%esi
89: ba 12 00 00 00 mov $0x12,%edx
8e: 31 c0 xor %eax,%eax
90: e8 00 00 00 00 call 95 <main+0x46>
95: bf 00 00 00 00 mov $0x0,%edi
9a: be 00 00 00 00 mov $0x0,%esi
9f: ba 12 00 00 00 mov $0x12,%edx
a4: 31 c0 xor %eax,%eax
a6: e8 00 00 00 00 call ab <main+0x5c>
ab: 31 c0 xor %eax,%eax
ad: 5d pop %rbp
ae: c3 ret
Comments:
% and have strange names like %rdi and %ecx left over from the 1970s when they expected people to code directly in assembly and gave the registers names that suggested a recommended purpose for each.immediatesin machine code, have different syntax when they are values themselves, like
$0x0, or being used as an address, like 33.%rsp is special: it’s the stack pointer and is modified by instructions like call, ret, push, and pop0x1(%rdi,%rax,1) which does some adding and some shifting to create the addresscmpb that compares the subtraction of two values to 0, and then a conditional jump like jne that only jumps if the last comparison was (in this case) not equal to zero.xor %exc,%ecx to zero out the cx register or the complicated lea instruction to perform addition because these require fewer bytes than other versions of the same operation.mystrlen.o: file format elf32-littlearm
Disassembly of section .text:
00000000 <strlen1>:
0: e3a01000 mov r1, #0
4: e7d02001 ldrb r2, [r0, r1]
8: e2811001 add r1, r1, #1
c: e3520000 cmp r2, #0
10: 1afffffb bne 4 <strlen1+0x4>
14: e2410001 sub r0, r1, #1
18: e12fff1e bx lr
0000001c <strlen2>:
1c: e5d01000 ldrb r1, [r0]
20: e3510000 cmp r1, #0
24: 03a00000 moveq r0, #0
28: 012fff1e bxeq lr
2c: e2801001 add r1, r0, #1
30: e3a00000 mov r0, #0
34: e7d12000 ldrb r2, [r1, r0]
38: e2800001 add r0, r0, #1
3c: e3520000 cmp r2, #0
40: 1afffffb bne 34 <strlen2+0x18>
44: e12fff1e bx lr
00000048 <strlen3>:
48: e3a01000 mov r1, #0
4c: e7d02001 ldrb r2, [r0, r1]
50: e2811001 add r1, r1, #1
54: e3520000 cmp r2, #0
58: 1afffffb bne 4c <strlen3+0x4>
5c: e2410001 sub r0, r1, #1
60: e12fff1e bx lr
00000064 <main>:
64: e92d4c10 push {r4, sl, fp, lr}
68: e28db008 add fp, sp, #8
6c: e59f404c ldr r4, [pc, #76] @ c0 <main+0x5c>
70: e59f0044 ldr r0, [pc, #68] @ bc <main+0x58>
74: e3a02012 mov r2, #18
78: e1a01004 mov r1, r4
7c: ebfffffe bl 0 <printf>
80: e59f003c ldr r0, [pc, #60] @ c4 <main+0x60>
84: e1a01004 mov r1, r4
88: e3a02012 mov r2, #18
8c: ebfffffe bl 0 <printf>
90: e59f0030 ldr r0, [pc, #48] @ c8 <main+0x64>
94: e1a01004 mov r1, r4
98: e3a02012 mov r2, #18
9c: ebfffffe bl 0 <printf>
a0: e59f0024 ldr r0, [pc, #36] @ cc <main+0x68>
a4: e1a01004 mov r1, r4
a8: e3a02012 mov r2, #18
ac: ebfffffe bl 0 <printf>
b0: e3a00000 mov r0, #0
b4: e8bd4c10 pop {r4, sl, fp, lr}
b8: e12fff1e bx lr
bc: 00000013 andeq r0, r0, r3, lsl r0
c0: 00000000 andeq r0, r0, r0
c4: 00000027 andeq r0, r0, r7, lsr #32
c8: 0000003c andeq r0, r0, ip, lsr r0
cc: 00000051 andeq r0, r0, r1, asr r0
Comments:
r0, r1 and so on; plus a few special-purpose ones that some instructions like fp, sp, and so on, that are manipulated by some instructions like push and pop.immediatesin machine code, have different syntax when they are values themselves or the address of data, like
#0; or being used as an address of code, like 34.ldr, which can do some arithmetic in the address.cmp or most math instructions like add that compares their results to 0, and then a conditional modfier on any instruction like ldrb for a conditional load (only if the comparison was below 0) or bne for a conditional jump (only branch if the comparison was not equal to zero).