Mastodon

Assembly Language String Library So Far

I wanted to share my progress on Assignment 1 (string processing library) of the Low-Level Programming book. See code below.

I've been working on this assignment in some free times for the last 3 months, and I've invested many hours into it. It was slow going at first, and I got faster, and after print_uint (which took me the longest) I had a good understanding of manipulating the stack to create needed memory workspaces.

Even after 3 months, I still have a few methods to complete, but I have to move on to other things. I simply don't have the time to dedicate to assembly language. I'll be moving on in the book to the further chapters and onto C. Compared to dealing with Assembly, C is heavenly.

I have learned so much, and feel very comfortable with Assembly. But I have to invest my time wisely. Other than getting an appreciation of the low-level details of programming processing, I don't have use for Assembly language in my day job. So I need to put my time into other learning pursuits.

section .data 

section .text

exit:  
    mov rax, 60
    xor rdi, rdi
    syscall

string_length:  
    xor rax, rax
    ; without ret, it falls through to loop

.length_loop:
    cmp byte[rdi + rax], 0
    jz .length_end
    inc rax
    jmp .length_loop

.length_end:
    ret

print_string:  
    push rdi
    call string_length ; puts string length into rax
    pop rsi
    mov rdx, rax ; rax contains length
    mov rax, 1   ; write syscall
    mov rdi, 1   ; stdout file descriptor
    syscall
    ret

print_char:  
    push rdi
    mov rdi, rsp ; rdi must be a memory location of the beginning of the string 
    call print_string
    pop rdi
    ret

print_newline:  
    mov rdi, 10      ; using a data member here will not work since it does not expect a memory address
    call print_char
    ret

print_uint:  
    ; rdi holds 8-byte unsigned integer
    mov rax, rdi  ; rax will be dividend
    mov rcx, 10   ; divisor
    mov rsi, rsp  ; save location of the end of the buffer
    push 0        ; push null terminator, for end of string, 8 bytes
    dec rsi       ; this will point to the first char in the buffer
    sub rsp, 16   ; add space for 16 more bytes = null + 7 + 16 = 24
                  ; this gives us enough for 20 digits (unsigned long) and a null, aligned to 8 bytes

.print_uint_loop:
    cmp rax, 0
    jz .print_uint_print
    xor rdx, rdx  ; clear any garbage from last division
    div rcx ; DIV divides value in RDX:RAX by operand
                  ; quotient stored to RAX
                  ; remainder stored in RDX
    add rdx, 48   ; convert to ASCII
    dec rsi
    mov [rsi], dl ; add lower byte of rdx to buffer
    jmp .print_uint_loop

.print_uint_print:
    mov rdi, rsi  ; point rdi to beginning of string buffer
    call print_string
    add rsp, 24   ; pushing rsp at beginning and popping at end to restore will not work
                  ;     since I moved the stack pointer elsewhere
    ret

print_int:  
    ; rdi holds 8-byte signed integer

    mov rsi, rdi  ; determine sign
    sar rsi, 63   ; propagate sign bit right
    cmp sil, 0xff ; see if lower 8 bits of rsi are all on
    jne print_uint
    push rdi      ; caller-save
    mov rdi, '-'  ; print negative sign
    call print_char
    pop rdi       ; caller-restore
    neg rdi       ; turn from negative to positive: flips all bits and adds 1
    jmp print_uint

read_char:  
    ; returns char in rax (char at beginning)

    xor rax, rax  ; 0 is read syscall number: https://web.archive.org/web/20120210203233/http://www.acsu.buffalo.edu/~charngda/linux_syscalls_64bit.html
    mov rdi, 0    ; 0 is stdin file descriptor
    push 0        ; allocate 8 bytes on stack
    mov rsi, rsp  ; beginning of buffer
    mov rdx, 1    ; read one byte
    syscall

    ; to print input char:
    ;xor rdi, rdi
    ;mov dil, byte[rsp]
    ;sal rdi, 64 ; zero-out (null-terminate) all but first 8 bits, move 8 bits to beginning of register
    ;call print_char
    ;call print_newline

    pop rax
    ret

read_word:  
    ; rdi - address of buffer
    ; rsi - size of buffer
    ; return 0 in rax if too large
    ; return addr in rax if it fits
    ;        and word length in rdx 

    ; reading a single word, so skip all whitespace until we reach
    ; a non-whitespace, then if we hit another whitespace char, 
    ; add null terminator and return

    mov rbx, rsi  ; save size of buffer
    xor rcx, rcx  ; initialize byte counter
    dec rbx       ; account for null terminator

.read_word_find_word:
    push rdi
    push rbx
    push rcx
    call read_char
    pop rcx
    pop rbx
    pop rdi
    ; char returned in rax (al)

    cmp al, 0x09       ; tab
    je .read_word_find_word
    cmp al, 0x0A       ; line feed
    je .read_word_find_word
    cmp al, 0x0D       ; carriage return
    je .read_word_find_word
    cmp al, 0x20       ; space
    je .read_word_find_word
    test al, al        ; null terminator
    jz .read_word_done

.read_word_letters_in_word:
    mov byte[rdi + rcx], al
    inc rcx

    push rdi
    push rbx
    push rcx
    call read_char
    pop rcx
    pop rbx
    pop rdi

    cmp al, 0x09       ; tab
    je .read_word_done
    cmp al, 0x0A       ; line feed
    je .read_word_done
    cmp al, 0x0D       ; carriage return
    je .read_word_done
    cmp al, 0x20       ; space
    je .read_word_done
    test al, al        ; null terminator
    jz .read_word_done

    cmp rcx, rbx       ; run out of space?
    je .read_word_too_long    

    jmp .read_word_letters_in_word

.read_word_done:
    mov byte[rdi + rcx], 0  ; null terminate the string
    mov rax, rdi
    mov rdx, rcx
    ret

.read_word_too_long:
    xor rax, rax
    ret

parse_uint:  
    ; rdi points to a string 
    ; returns rax: number as integer
    ;         rdx: length

    xor rax, rax ; will create integer here
    xor rsi, rsi ; byte counter
    mov rbx, 10  ; will multiply rax by 10 each time

.parse_uint_loop:
    ; multiply rax by 10
    ; take digit from rdi + rsi
    ; convert to int
    ; add to rax
    ; inc rsi

.parse_uint_done:
    mov rdx, rsi
    ret

parse_int:  
    ; rdi points to a string 
    ; returns rax: number as integer
    ;         rdx: length

    ; check for sign
    ; call parse_uint on rest of string
    ; negate if sign
    xor rax, rax
    ret

string_equals:  
    ret

string_copy:  
    ret
comments powered by Disqus