about summary refs log tree commit diff
diff options
context:
space:
mode:
authorYour Name <you@example.com>2020-07-20 02:54:22 +0200
committerYour Name <you@example.com>2020-07-20 02:54:22 +0200
commitc69f053c29faa47d0600f5b147835e970d9cf654 (patch)
treed97af2f279114c72c3db81ffb489c316fb3f6dd6
downloadAARM64-Disassembler-c69f053c29faa47d0600f5b147835e970d9cf654.tar.gz
AARM64-Disassembler-c69f053c29faa47d0600f5b147835e970d9cf654.tar.bz2
AARM64-Disassembler-c69f053c29faa47d0600f5b147835e970d9cf654.zip
Initial Commit HEAD master
Half-added some basic AARM64 instructions such as ADD, RET, MOV, NOP...
-rw-r--r--.gitignore2
-rw-r--r--Makefile8
-rw-r--r--README.md16
-rwxr-xr-xbuild.sh19
-rw-r--r--src/disassemble.cpp9
-rw-r--r--src/examples/1.s5
-rw-r--r--src/include/disassemble.hpp13
-rw-r--r--src/include/instructionhandler.hpp14
-rw-r--r--src/include/instructions.hpp35
-rw-r--r--src/include/utils.hpp13
-rw-r--r--src/instructionhandler.cpp49
-rw-r--r--src/instructions.cpp13
-rw-r--r--src/main.cpp38
-rw-r--r--src/utils.cpp36
14 files changed, 270 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a667115
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+bin/
+disasm.*
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8f06889
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,8 @@
+all: ./bin/disasm
+
+./bin/disasm: ./src/main.cpp
+	./build.sh
+
+clean: ./bin
+	rm -rf bin
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3860beb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,16 @@
+# AARM64-Disassembler
+
+Experimental disassembler for the AARM64 architecture. Not finished.
+
+
+# Compile
+
+```make```
+
+## Usage
+
+./disasm [AARM64 executable]
+
+## TODO
+Fully implement current instructions to be able to fetch the actual instruction parameters. Implement the full AARM64 instruction set.
+https://static.docs.arm.com/ddi0596/a/DDI_0596_ARM_a64_instruction_set_architecture.pdf
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..cedaf10
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+out_dir="./bin"
+
+mkdir $out_dir 2> /dev/null
+
+for f in $(find ./src -type f \( -iname \*.cpp -o -iname \*.s \))
+do
+    out_file=$(echo $f | sed 's/.\/src/.\/bin/g' | sed 's/\.cpp/\.o/g' | sed 's/\.s//g')
+    mkdir -p $(dirname $out_file)
+    if [[ $f == *.s ]]
+    then
+        aarch64-linux-gnu-as $f -o $out_file
+    else
+        g++ -g -c $f -o $out_file
+    fi
+done
+
+g++ -o ./bin/disasm $(find ./bin -type f -name "*.o") -lgcc
diff --git a/src/disassemble.cpp b/src/disassemble.cpp
new file mode 100644
index 0000000..2a6d6c7
--- /dev/null
+++ b/src/disassemble.cpp
@@ -0,0 +1,9 @@
+#include "include/disassemble.hpp"
+
+int Disasm::disassemble(const uint8_t* buffer, uint32_t pos, uint32_t offset)
+{
+    Instruction* instr = new Instruction(&((uint8_t*)buffer)[pos + offset], offset, pos);
+    printf("<0x%x:0x%x> - %s - %02x - %s - %d\n", instr->offset, instr->pos, instr->bits, instr->hex, instr->string.c_str(), instr->type);
+
+    return sizeof(uint8_t) * 4;
+}
diff --git a/src/examples/1.s b/src/examples/1.s
new file mode 100644
index 0000000..991f89a
--- /dev/null
+++ b/src/examples/1.s
@@ -0,0 +1,5 @@
+mov X0, #1
+add X0, X0, X0
+add X0, X0, #1
+mov X1, X0
+ret
diff --git a/src/include/disassemble.hpp b/src/include/disassemble.hpp
new file mode 100644
index 0000000..fde8fb8
--- /dev/null
+++ b/src/include/disassemble.hpp
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "utils.hpp"
+#include "instructions.hpp"
+
+class Disasm
+{
+public:
+    static int disassemble(const uint8_t* buffer, uint32_t pos, uint32_t offset);
+};
diff --git a/src/include/instructionhandler.hpp b/src/include/instructionhandler.hpp
new file mode 100644
index 0000000..c61d3e2
--- /dev/null
+++ b/src/include/instructionhandler.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <tuple>
+#include "instructions.hpp"
+
+class InstructionHanlder
+{
+public:
+    static InstructionType getInstruction(Instruction* instr);
+    static std::string getInstructionString(Instruction* instr);
+private:
+    static std::tuple<std::string, std::string, InstructionType> getTpl(Instruction* instr);
+    static std::tuple<std::string, std::string, InstructionType> array[6];
+};
diff --git a/src/include/instructions.hpp b/src/include/instructions.hpp
new file mode 100644
index 0000000..ed354b4
--- /dev/null
+++ b/src/include/instructions.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <stdint.h>
+#include <string>
+
+#include "utils.hpp"
+#include "disassemble.hpp"
+
+enum class InstructionType
+{
+    NUL,
+    ADD,
+    ADD_SHIFTED_REG,
+    SUB,
+    MOV,
+    MOVZ,
+    RET,
+    NOP
+};
+
+class Instruction
+{
+public:
+    Instruction(uint8_t* hex, uint32_t offset, uint32_t pos);
+    uint8_t bits[33];
+    uint32_t offset;
+    uint32_t pos;
+    uint32_t addr;
+    InstructionType type;
+    std::string string;
+    uint32_t hex;
+};
+
+#include "instructionhandler.hpp"
+
diff --git a/src/include/utils.hpp b/src/include/utils.hpp
new file mode 100644
index 0000000..5c7cc39
--- /dev/null
+++ b/src/include/utils.hpp
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <stdint.h>
+#include <string.h>
+#include <elf.h>
+
+class Utils
+{
+public:
+    static void findelf(uint8_t* buffer, uint64_t* textstart, uint64_t* textend);
+    static void getbinaryrepresentation(uint8_t* bytes, size_t numbytes, uint8_t* buf);
+    static uint8_t tobit(uint8_t byte, uint8_t pos);
+};
diff --git a/src/instructionhandler.cpp b/src/instructionhandler.cpp
new file mode 100644
index 0000000..13c6ed8
--- /dev/null
+++ b/src/instructionhandler.cpp
@@ -0,0 +1,49 @@
+#include "include/instructionhandler.hpp"
+
+static bool cmpInstruction(uint8_t* bits, std::string mask)
+{
+    for(int i = 0; i < 32; i++)
+    {
+        if(bits[i] != mask.at(i) && mask.at(i) != 'x')
+            return false;
+    }
+    return true;
+}
+
+std::tuple<std::string, std::string, InstructionType> InstructionHanlder::getTpl(Instruction* instr)
+{
+    for(std::tuple<std::string, std::string, InstructionType>  tpl : array)
+    {
+        if(cmpInstruction(instr->bits, std::get<0>(tpl)))
+            return tpl;
+    }
+    return {"", "NULL", InstructionType::NUL};
+}
+
+InstructionType InstructionHanlder::getInstruction(Instruction *instr)
+{
+    return std::get<2>(getTpl(instr));
+}
+
+std::string InstructionHanlder::getInstructionString(Instruction *instr)
+{
+    std::string res;
+    res.append(std::get<1>(getTpl(instr)));
+
+    return res;
+}
+
+std::tuple<std::string, std::string, InstructionType> InstructionHanlder::array[6] =
+{
+  { "10010001xxxxxxxxxxxxxxxxxxxxxxxx", "ADD", InstructionType::ADD },
+  { "10001011xx0xxxxxxxxxxxxxxxxxxxxx", "ADD", InstructionType::ADD_SHIFTED_REG },
+  { "10101010000xxxxx00000011111xxxxx", "MOV", InstructionType::MOV},
+  { "110100101xxxxxxxxxxxxxxxxxxxxxxx", "MOV", InstructionType::MOVZ},
+  { "1101011001011111000000xxxxx00000", "RET", InstructionType::RET},
+  { "11010101000000110010000000011111", "NOP", InstructionType::NOP}
+};
+
+/* MAYBE MAKE SUBCLASSES oF INSTR TYPES, I THINK THERE ARE 4
+ *
+ *
+ * THAT WAY WE HAVE ACCESS TO REGS AND SHIT*/
diff --git a/src/instructions.cpp b/src/instructions.cpp
new file mode 100644
index 0000000..f32c3e1
--- /dev/null
+++ b/src/instructions.cpp
@@ -0,0 +1,13 @@
+#include "include/instructions.hpp"
+
+Instruction::Instruction(uint8_t* hex, uint32_t offset, uint32_t pos)
+{
+    this->hex = (hex[3] << 24) | (hex[2] << 16)| (hex[1] << 8) | hex[0];
+    Utils::getbinaryrepresentation(hex, 4, this->bits);
+    this->bits[32] = '\0';
+    this->offset = offset;
+    this->pos = pos;
+    this->addr = this->offset + this->pos;
+    this->string = InstructionHanlder::getInstructionString(this);
+    this->type = InstructionHanlder::getInstruction(this);
+}
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..c213f22
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "include/utils.hpp"
+#include "include/disassemble.hpp"
+
+/* AARM64 dissasembler */
+
+int main(int argc, char** argv)
+{
+    (void)argc;
+    int fd;
+    uint64_t textstart;
+    uint64_t textend;
+    struct stat sb;
+    uint8_t* buffer;
+
+    fd = open(argv[1], O_RDONLY);
+    fstat(fd, &sb);
+
+    buffer = (uint8_t*)mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    textstart = 0;
+    textend = sb.st_size;
+    Utils::findelf(buffer, &textstart, &textend);
+
+    uint32_t pos = 0;
+    while((pos + textstart) < textend)
+        pos += Disasm::disassemble(buffer, pos, textstart);
+
+    munmap(buffer, sb.st_size);
+
+    return 0;
+}
diff --git a/src/utils.cpp b/src/utils.cpp
new file mode 100644
index 0000000..5caac56
--- /dev/null
+++ b/src/utils.cpp
@@ -0,0 +1,36 @@
+#include "include/utils.hpp"
+
+void Utils::findelf(uint8_t* buffer, uint64_t* textstart, uint64_t* textend)
+{
+    Elf64_Ehdr  *elf;
+    Elf64_Shdr  *shdr;
+    char *strtab;
+    int counter = 1;
+
+    elf = (Elf64_Ehdr *)(buffer);
+    shdr = (Elf64_Shdr *)((char *)buffer + elf->e_shoff);
+    strtab = (char *)((char *)buffer + shdr[elf->e_shstrndx].sh_offset);
+    while(counter < elf->e_shnum) {
+        if(strcmp(&strtab[shdr[counter].sh_name], ".text"))
+        {
+            *textend = shdr[counter].sh_offset;
+            return;
+        }
+        else
+            *textstart = shdr[counter].sh_offset;
+        counter++;
+    }
+}
+
+void Utils::getbinaryrepresentation(uint8_t* bytes, size_t numbytes, uint8_t* buf)
+{
+    int counter = 0;
+    for(int i = numbytes - 1; i >= 0; i--)
+        for(int j = 7; j >= 0; j--)
+            buf[counter++] = tobit(bytes[i], j) + '0';
+}
+
+uint8_t Utils::tobit(uint8_t byte, uint8_t pos)
+{
+    return byte & 1<< pos ? 1 : 0;
+}