diff options
author | Your Name <you@example.com> | 2020-07-20 02:54:22 +0200 |
---|---|---|
committer | Your Name <you@example.com> | 2020-07-20 02:54:22 +0200 |
commit | c69f053c29faa47d0600f5b147835e970d9cf654 (patch) | |
tree | d97af2f279114c72c3db81ffb489c316fb3f6dd6 | |
download | AARM64-Disassembler-c69f053c29faa47d0600f5b147835e970d9cf654.tar.gz AARM64-Disassembler-c69f053c29faa47d0600f5b147835e970d9cf654.tar.bz2 AARM64-Disassembler-c69f053c29faa47d0600f5b147835e970d9cf654.zip |
Half-added some basic AARM64 instructions such as ADD, RET, MOV, NOP...
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 8 | ||||
-rw-r--r-- | README.md | 16 | ||||
-rwxr-xr-x | build.sh | 19 | ||||
-rw-r--r-- | src/disassemble.cpp | 9 | ||||
-rw-r--r-- | src/examples/1.s | 5 | ||||
-rw-r--r-- | src/include/disassemble.hpp | 13 | ||||
-rw-r--r-- | src/include/instructionhandler.hpp | 14 | ||||
-rw-r--r-- | src/include/instructions.hpp | 35 | ||||
-rw-r--r-- | src/include/utils.hpp | 13 | ||||
-rw-r--r-- | src/instructionhandler.cpp | 49 | ||||
-rw-r--r-- | src/instructions.cpp | 13 | ||||
-rw-r--r-- | src/main.cpp | 38 | ||||
-rw-r--r-- | src/utils.cpp | 36 |
14 files changed, 270 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a667115 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +bin/ +disasm.* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8f06889 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +all: ./bin/disasm + +./bin/disasm: ./src/main.cpp + ./build.sh + +clean: ./bin + rm -rf bin + diff --git a/README.md b/README.md new file mode 100644 index 0000000..3860beb --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# AARM64-Disassembler + +Experimental disassembler for the AARM64 architecture. Not finished. + + +# Compile + +```make``` + +## Usage + +./disasm [AARM64 executable] + +## TODO +Fully implement current instructions to be able to fetch the actual instruction parameters. Implement the full AARM64 instruction set. +https://static.docs.arm.com/ddi0596/a/DDI_0596_ARM_a64_instruction_set_architecture.pdf diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..cedaf10 --- /dev/null +++ b/build.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +out_dir="./bin" + +mkdir $out_dir 2> /dev/null + +for f in $(find ./src -type f \( -iname \*.cpp -o -iname \*.s \)) +do + out_file=$(echo $f | sed 's/.\/src/.\/bin/g' | sed 's/\.cpp/\.o/g' | sed 's/\.s//g') + mkdir -p $(dirname $out_file) + if [[ $f == *.s ]] + then + aarch64-linux-gnu-as $f -o $out_file + else + g++ -g -c $f -o $out_file + fi +done + +g++ -o ./bin/disasm $(find ./bin -type f -name "*.o") -lgcc diff --git a/src/disassemble.cpp b/src/disassemble.cpp new file mode 100644 index 0000000..2a6d6c7 --- /dev/null +++ b/src/disassemble.cpp @@ -0,0 +1,9 @@ +#include "include/disassemble.hpp" + +int Disasm::disassemble(const uint8_t* buffer, uint32_t pos, uint32_t offset) +{ + Instruction* instr = new Instruction(&((uint8_t*)buffer)[pos + offset], offset, pos); + printf("<0x%x:0x%x> - %s - %02x - %s - %d\n", instr->offset, instr->pos, instr->bits, instr->hex, instr->string.c_str(), instr->type); + + return sizeof(uint8_t) * 4; +} diff --git a/src/examples/1.s b/src/examples/1.s new file mode 100644 index 0000000..991f89a --- /dev/null +++ b/src/examples/1.s @@ -0,0 +1,5 @@ +mov X0, #1 +add X0, X0, X0 +add X0, X0, #1 +mov X1, X0 +ret diff --git a/src/include/disassemble.hpp b/src/include/disassemble.hpp new file mode 100644 index 0000000..fde8fb8 --- /dev/null +++ b/src/include/disassemble.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include <stdint.h> +#include <stdio.h> + +#include "utils.hpp" +#include "instructions.hpp" + +class Disasm +{ +public: + static int disassemble(const uint8_t* buffer, uint32_t pos, uint32_t offset); +}; diff --git a/src/include/instructionhandler.hpp b/src/include/instructionhandler.hpp new file mode 100644 index 0000000..c61d3e2 --- /dev/null +++ b/src/include/instructionhandler.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include <tuple> +#include "instructions.hpp" + +class InstructionHanlder +{ +public: + static InstructionType getInstruction(Instruction* instr); + static std::string getInstructionString(Instruction* instr); +private: + static std::tuple<std::string, std::string, InstructionType> getTpl(Instruction* instr); + static std::tuple<std::string, std::string, InstructionType> array[6]; +}; diff --git a/src/include/instructions.hpp b/src/include/instructions.hpp new file mode 100644 index 0000000..ed354b4 --- /dev/null +++ b/src/include/instructions.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include <stdint.h> +#include <string> + +#include "utils.hpp" +#include "disassemble.hpp" + +enum class InstructionType +{ + NUL, + ADD, + ADD_SHIFTED_REG, + SUB, + MOV, + MOVZ, + RET, + NOP +}; + +class Instruction +{ +public: + Instruction(uint8_t* hex, uint32_t offset, uint32_t pos); + uint8_t bits[33]; + uint32_t offset; + uint32_t pos; + uint32_t addr; + InstructionType type; + std::string string; + uint32_t hex; +}; + +#include "instructionhandler.hpp" + diff --git a/src/include/utils.hpp b/src/include/utils.hpp new file mode 100644 index 0000000..5c7cc39 --- /dev/null +++ b/src/include/utils.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include <stdint.h> +#include <string.h> +#include <elf.h> + +class Utils +{ +public: + static void findelf(uint8_t* buffer, uint64_t* textstart, uint64_t* textend); + static void getbinaryrepresentation(uint8_t* bytes, size_t numbytes, uint8_t* buf); + static uint8_t tobit(uint8_t byte, uint8_t pos); +}; diff --git a/src/instructionhandler.cpp b/src/instructionhandler.cpp new file mode 100644 index 0000000..13c6ed8 --- /dev/null +++ b/src/instructionhandler.cpp @@ -0,0 +1,49 @@ +#include "include/instructionhandler.hpp" + +static bool cmpInstruction(uint8_t* bits, std::string mask) +{ + for(int i = 0; i < 32; i++) + { + if(bits[i] != mask.at(i) && mask.at(i) != 'x') + return false; + } + return true; +} + +std::tuple<std::string, std::string, InstructionType> InstructionHanlder::getTpl(Instruction* instr) +{ + for(std::tuple<std::string, std::string, InstructionType> tpl : array) + { + if(cmpInstruction(instr->bits, std::get<0>(tpl))) + return tpl; + } + return {"", "NULL", InstructionType::NUL}; +} + +InstructionType InstructionHanlder::getInstruction(Instruction *instr) +{ + return std::get<2>(getTpl(instr)); +} + +std::string InstructionHanlder::getInstructionString(Instruction *instr) +{ + std::string res; + res.append(std::get<1>(getTpl(instr))); + + return res; +} + +std::tuple<std::string, std::string, InstructionType> InstructionHanlder::array[6] = +{ + { "10010001xxxxxxxxxxxxxxxxxxxxxxxx", "ADD", InstructionType::ADD }, + { "10001011xx0xxxxxxxxxxxxxxxxxxxxx", "ADD", InstructionType::ADD_SHIFTED_REG }, + { "10101010000xxxxx00000011111xxxxx", "MOV", InstructionType::MOV}, + { "110100101xxxxxxxxxxxxxxxxxxxxxxx", "MOV", InstructionType::MOVZ}, + { "1101011001011111000000xxxxx00000", "RET", InstructionType::RET}, + { "11010101000000110010000000011111", "NOP", InstructionType::NOP} +}; + +/* MAYBE MAKE SUBCLASSES oF INSTR TYPES, I THINK THERE ARE 4 + * + * + * THAT WAY WE HAVE ACCESS TO REGS AND SHIT*/ diff --git a/src/instructions.cpp b/src/instructions.cpp new file mode 100644 index 0000000..f32c3e1 --- /dev/null +++ b/src/instructions.cpp @@ -0,0 +1,13 @@ +#include "include/instructions.hpp" + +Instruction::Instruction(uint8_t* hex, uint32_t offset, uint32_t pos) +{ + this->hex = (hex[3] << 24) | (hex[2] << 16)| (hex[1] << 8) | hex[0]; + Utils::getbinaryrepresentation(hex, 4, this->bits); + this->bits[32] = '\0'; + this->offset = offset; + this->pos = pos; + this->addr = this->offset + this->pos; + this->string = InstructionHanlder::getInstructionString(this); + this->type = InstructionHanlder::getInstruction(this); +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..c213f22 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,38 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "include/utils.hpp" +#include "include/disassemble.hpp" + +/* AARM64 dissasembler */ + +int main(int argc, char** argv) +{ + (void)argc; + int fd; + uint64_t textstart; + uint64_t textend; + struct stat sb; + uint8_t* buffer; + + fd = open(argv[1], O_RDONLY); + fstat(fd, &sb); + + buffer = (uint8_t*)mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + textstart = 0; + textend = sb.st_size; + Utils::findelf(buffer, &textstart, &textend); + + uint32_t pos = 0; + while((pos + textstart) < textend) + pos += Disasm::disassemble(buffer, pos, textstart); + + munmap(buffer, sb.st_size); + + return 0; +} diff --git a/src/utils.cpp b/src/utils.cpp new file mode 100644 index 0000000..5caac56 --- /dev/null +++ b/src/utils.cpp @@ -0,0 +1,36 @@ +#include "include/utils.hpp" + +void Utils::findelf(uint8_t* buffer, uint64_t* textstart, uint64_t* textend) +{ + Elf64_Ehdr *elf; + Elf64_Shdr *shdr; + char *strtab; + int counter = 1; + + elf = (Elf64_Ehdr *)(buffer); + shdr = (Elf64_Shdr *)((char *)buffer + elf->e_shoff); + strtab = (char *)((char *)buffer + shdr[elf->e_shstrndx].sh_offset); + while(counter < elf->e_shnum) { + if(strcmp(&strtab[shdr[counter].sh_name], ".text")) + { + *textend = shdr[counter].sh_offset; + return; + } + else + *textstart = shdr[counter].sh_offset; + counter++; + } +} + +void Utils::getbinaryrepresentation(uint8_t* bytes, size_t numbytes, uint8_t* buf) +{ + int counter = 0; + for(int i = numbytes - 1; i >= 0; i--) + for(int j = 7; j >= 0; j--) + buf[counter++] = tobit(bytes[i], j) + '0'; +} + +uint8_t Utils::tobit(uint8_t byte, uint8_t pos) +{ + return byte & 1<< pos ? 1 : 0; +} |