diff options
author | stefan <stefan@s00.xyz> | 2023-05-24 22:29:46 -0400 |
---|---|---|
committer | stefan <stefan@s00.xyz> | 2023-05-24 22:29:46 -0400 |
commit | 20621e073562ee5d423b636fae8b6aa8e38275fa (patch) | |
tree | 4a81bde039e58602e44a213cfb8d91a65f1dadd9 | |
parent | ba2b9c8a1bb1876b6eb4c9783fde798b19de4418 (diff) | |
download | evm-20621e073562ee5d423b636fae8b6aa8e38275fa.tar.gz |
readme and cleanup
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | README.md | 93 | ||||
-rwxr-xr-x | as/as | bin | 2088979 -> 0 bytes | |||
-rw-r--r-- | as/as.go | 20 | ||||
-rw-r--r-- | as/main.go | 3 | ||||
-rw-r--r-- | as/opcodes.go | 14 | ||||
-rw-r--r-- | test/add.easm | 6 | ||||
-rw-r--r-- | vm/instruction.go | 147 | ||||
-rw-r--r-- | vm/instructions.go | 162 | ||||
-rw-r--r-- | vm/main.go | 8 | ||||
-rw-r--r-- | vm/memory.go | 14 | ||||
-rw-r--r-- | vm/opcodes.go | 9 | ||||
-rw-r--r-- | vm/rom.go | 9 | ||||
-rw-r--r-- | vm/stack.go | 5 | ||||
-rw-r--r-- | vm/vm.go | 28 |
15 files changed, 359 insertions, 179 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4beb6f1 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +TEST?=test/add.easm # you can use the enviornment variable TEST to specify what binary the "test" target will run +BIN=${TEST:%.easm=%.bin} + +build/vm: + mkdir -p build + (cd vm && go build -o ../$@) + +build/as: + (cd as && go build -o ../$@) + +test: build/vm ${BIN} + ./build/vm ${BIN} + +clean: + rm -rf build test/*.bin + +%.bin: %.easm build/as + ./build/as $< -o $@ + +.PHONY: test @@ -1,10 +1,97 @@ evm === -building and running --------------------- +components +---------- + +`vm/`: code for the evm interpreter +`as/`: code for an easm assembler (you can also use `evm compile` if you have geth installed) + +building +-------- + +you can build individual go modules like usual ```sh cd vm & go build -./evm +cd ../as & go build +``` + +you can also use the makefile to build all of the modules, and run the test of your choice + +```sh +TEST=test/add.easm make clean test # don't need to specify add.easm, it is the default test +``` + +as +---- + +you can assemble files by specifying the assembly file, and optionally the name of your output binary using the `-o` flag + +```sh +./as test/add.src -o test/add.bin +``` + +if you want to compare the output with something like the `evm` tool from geth, you can use xxd with the `-p` flag + +```sh +evm compile test/add.src +xxd -p test/add.bin +``` + +vm +--- + +running bytecode: + +```sh +./vm/evm test/add.bin ``` + +programming +----------- + +right now, only a small subset of evm opcodes are supported, and there is no support for calldata, storage, branching, gas fees and much more + +the only instruction that takes a parameter is the `PUSH` instruction. It will take the specified sub 256b number in hex or decimal format, and push it onto the stack. + +``` +PUSH 4 # pushes the value 4 to the stack +``` + +this `PUSH` instruction is then used to pass values into other instructions + +``` +PUSH 4 +PUSH 9 +ADD +``` + +the result of this `ADD` instruction (13) will then be pushed to the stack + +if you need more info on some operations, check the [opcode reference](https://ethereum.org/en/developers/docs/evm/opcodes) + +if you want your program to return some data, you first need store it in memory. Then you push the offset and length (in bytes) of this memory to the stack, and use the return instruction + +``` +PUSH 9 +PUSH 4 +ADD +PUSH 3 // the offset for MSTORE8 +MSTORE8 // the value we want to store is already on the stack after the ADD instruction +PUSH 1 // the size in bytes of our return data +PUSH 3 // offset again +RETURN +``` + +how does this connect to the ethereum network +--------------------------------------------- + +- nodes on the ethereum network will take in similiar bytecode, in addition to persistent contract storage, account state, world state, call data, and much more +- modifications to the state of the ethereum blockchain are verified, usually by comparing the root of hash trees computed by nodes + +more notes +---------- + +- solidity does its own thing, and has its own abi and runtime +- easm is slow and suffers from serious design issues, so ethereum devs want to move to wasm. Binary files differ@@ -2,30 +2,44 @@ package main import ( "fmt" + "os" "strings" "github.com/holiman/uint256" - ) func Assemble(code string) []byte { bytecode := make([]byte, 0) t := strings.Fields(code) + push := byte(0) for i := range t { x, found := OpcodeStrings[t[i]] if found { - bytecode = append(bytecode, x) + push = (&x).IsPush() + fmt.Printf("token: %s, opcode: 0x%x, push = %d\n", t[i], int(x), int(push)) + bytecode = append(bytecode, byte(x)) } else { + if push == 0 { + fmt.Fprintf(os.Stderr, "unexpected token: %s\n", t[i]) + os.Exit(1) + } var n *uint256.Int = nil if len(t[i]) > 1 && t[i][1] == 'x' { n, _ = uint256.FromHex(t[i]) } else { n, _ = uint256.FromDecimal(t[i]) } + + if (t[i] == "PUSH") { + push = byte(n.ByteLen()) + } + + b := n.Bytes32() - bytecode = append(bytecode, b[:]...) + fmt.Printf("token: NUMBER, value: %s, push = %d\n", t[i], int(push), b[32 - push:]) + bytecode = append(bytecode, b[32-push:]...) } } return bytecode @@ -6,7 +6,7 @@ import ( ) func usage(arg0 string) { - fmt.Fprintf(os.Stderr, "usage: %s [-o file] file.easm\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "usage: %s [-o file] file.easm\n", arg0) os.Exit(1) } @@ -20,7 +20,6 @@ func main() { var ain int = 0 for i := 0; i < len(os.Args); i++ { - fmt.Println(os.Args[i]) switch os.Args[i] { case "-o": i = i + 1 diff --git a/as/opcodes.go b/as/opcodes.go index 94c2e2b..be04d19 100644 --- a/as/opcodes.go +++ b/as/opcodes.go @@ -49,9 +49,10 @@ const ( PUSH30 = 0x7d PUSH31 = 0x7e PUSH32 = 0x7f + RETURN = 0xf3 ) -var OpcodeStrings = map[string]byte { +var OpcodeStrings = map[string]Opcode { "STOP": STOP, "ADD": ADD, "MUL": MUL, @@ -66,6 +67,7 @@ var OpcodeStrings = map[string]byte { "MLOAD": MLOAD, "MSTORE": MSTORE, "MSTORE8": MSTORE8, + "PUSH": PUSH1, "PUSH1": PUSH1, "PUSH2": PUSH2, "PUSH3": PUSH3, @@ -98,4 +100,14 @@ var OpcodeStrings = map[string]byte { "PUSH30": PUSH30, "PUSH31": PUSH31, "PUSH32": PUSH32, + "RETURN": RETURN, +} + +// returns 0 if not push, otherwise returns number of bytes +func (o *Opcode) IsPush() byte { + d := *o - PUSH1 + if d >= 0 && d < 31 { + return byte(d) + 1 + } + return 0 } diff --git a/test/add.easm b/test/add.easm index 4876b0b..610b0b9 100644 --- a/test/add.easm +++ b/test/add.easm @@ -1,4 +1,8 @@ PUSH 9 PUSH 4 ADD -STOP +PUSH 3 +MSTORE8 +PUSH 1 +PUSH 3 +RETURN diff --git a/vm/instruction.go b/vm/instruction.go deleted file mode 100644 index 5cd56e6..0000000 --- a/vm/instruction.go +++ /dev/null @@ -1,147 +0,0 @@ -package main - -import ( - "github.com/holiman/uint256" -) - -type Handler func(vm *Evm) - -type Instruction struct { - name string - n int // number of params - handler Handler -} - -type Inst Instruction // shorthand - -// i miss c macros :( -var Instructions = [256]Instruction{ - STOP: { - name: "STOP", - n: 0, - handler: func(vm *Evm) { vm.stopped = true }, - } , - ADD: { - name: "ADD", - n: 2, - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Add(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - MUL: { - n: 2, - name: "MUL", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Mul(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - SUB: { - n: 2, - name: "SUB", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Sub(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - DIV: { - n: 2, - name: "DIV", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Div(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - SDIV: { - n: 2, - name: "SDIV", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.SDiv(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - MOD: { - n: 2, - name: "MOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Mod(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - SMOD: { - n: 2, - name: "SMOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.SMod(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - ADDMOD: { - n: 3, - name: "ADDMOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.AddMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) - }, - }, - MULMOD: { - n: 3, - name: "MULMOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.MulMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) - }, - }, - POP: { - n: 1, - name: "POP", - handler: func(vm *Evm) { - vm.stack.Pop() - }, - }, - MLOAD: { - n: 1, - name: "MLOAD", - }, - MSTORE: { - n: 2, - name: "MSTORE", - }, - MSTORE8: { - n: 2, - name: "MSTORE8", - }, - PUSH1: { name: "PUSH1" }, - PUSH2: { name: "PUSH2" }, - PUSH3: { name: "PUSH3" }, - PUSH4: { name: "PUSH4" }, - PUSH5: { name: "PUSH5" }, - PUSH6: { name: "PUSH6" }, - PUSH7: { name: "PUSH7" }, - PUSH8: { name: "PUSH8" }, - PUSH9: { name: "PUSH9" }, - PUSH10: { name: "PUSH0" }, - PUSH11: { name: "PUSH11" }, - PUSH12: { name: "PUSH12" }, - PUSH13: { name: "PUSH13" }, - PUSH14: { name: "PUSH14" }, - PUSH15: { name: "PUSH15" }, - PUSH16: { name: "PUSH16" }, - PUSH17: { name: "PUSH17" }, - PUSH18: { name: "PUSH18" }, - PUSH19: { name: "PUSH19" }, - PUSH20: { name: "PUSH20" }, - PUSH21: { name: "PUSH21" }, - PUSH22: { name: "PUSH22" }, - PUSH23: { name: "PUSH23" }, - PUSH24: { name: "PUSH24" }, - PUSH25: { name: "PUSH25" }, - PUSH26: { name: "PUSH26" }, - PUSH27: { name: "PUSH27" }, - PUSH28: { name: "PUSH28" }, - PUSH29: { name: "PUSH29" }, - PUSH30: { name: "PUSH30" }, - PUSH31: { name: "PUSH31" }, - PUSH32: { name: "PUSH32" }, -} diff --git a/vm/instructions.go b/vm/instructions.go index 5a1220b..8b8b2e2 100644 --- a/vm/instructions.go +++ b/vm/instructions.go @@ -1,3 +1,163 @@ package main -type InstructionHandler func (vm *Evm) +import ( + "github.com/holiman/uint256" +) + +type Handler func(vm *Evm) + +type Instruction struct { + name string + n int // number of params + handler Handler +} + +type Inst Instruction // shorthand + +// i miss c macros :( +var Instructions = [256]Instruction{ + STOP: { + name: "STOP", + n: 0, + handler: func(vm *Evm) { vm.stopped = true }, + } , + ADD: { + name: "ADD", + n: 2, + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Add(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + MUL: { + n: 2, + name: "MUL", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Mul(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + SUB: { + n: 2, + name: "SUB", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Sub(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + DIV: { + n: 2, + name: "DIV", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Div(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + SDIV: { + n: 2, + name: "SDIV", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.SDiv(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + MOD: { + n: 2, + name: "MOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Mod(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + SMOD: { + n: 2, + name: "SMOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.SMod(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + ADDMOD: { + n: 3, + name: "ADDMOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.AddMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) + }, + }, + MULMOD: { + n: 3, + name: "MULMOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.MulMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) + }, + }, + POP: { + n: 1, + name: "POP", + handler: func(vm *Evm) { + vm.stack.Pop() + }, + }, + MLOAD: { + n: 1, + name: "MLOAD", + }, + MSTORE: { + n: 2, + name: "MSTORE", + handler: func(vm *Evm) { + vm.memory.sw(vm.stack.Pop().Uint64(), vm.stack.Pop()) + }, + }, + MSTORE8: { + n: 2, + name: "MSTORE8", + handler: func(vm *Evm) { + ost := vm.stack.Pop().Uint64() + x := vm.stack.Pop().Bytes() + n := x[len(x)-1] + vm.memory.sb(ost, n) + }, + }, + PUSH1: { name: "PUSH1" }, + PUSH2: { name: "PUSH2" }, + PUSH3: { name: "PUSH3" }, + PUSH4: { name: "PUSH4" }, + PUSH5: { name: "PUSH5" }, + PUSH6: { name: "PUSH6" }, + PUSH7: { name: "PUSH7" }, + PUSH8: { name: "PUSH8" }, + PUSH9: { name: "PUSH9" }, + PUSH10: { name: "PUSH0" }, + PUSH11: { name: "PUSH11" }, + PUSH12: { name: "PUSH12" }, + PUSH13: { name: "PUSH13" }, + PUSH14: { name: "PUSH14" }, + PUSH15: { name: "PUSH15" }, + PUSH16: { name: "PUSH16" }, + PUSH17: { name: "PUSH17" }, + PUSH18: { name: "PUSH18" }, + PUSH19: { name: "PUSH19" }, + PUSH20: { name: "PUSH20" }, + PUSH21: { name: "PUSH21" }, + PUSH22: { name: "PUSH22" }, + PUSH23: { name: "PUSH23" }, + PUSH24: { name: "PUSH24" }, + PUSH25: { name: "PUSH25" }, + PUSH26: { name: "PUSH26" }, + PUSH27: { name: "PUSH27" }, + PUSH28: { name: "PUSH28" }, + PUSH29: { name: "PUSH29" }, + PUSH30: { name: "PUSH30" }, + PUSH31: { name: "PUSH31" }, + PUSH32: { name: "PUSH32" }, + RETURN: { + n: 2, + name: "RETURN", + handler: func(vm *Evm) { + vm.returndata = vm.memory.ld(vm.stack.Pop().Uint64(), vm.stack.Pop().Uint64()) + }, + }, +} @@ -21,6 +21,12 @@ func main() { vm := NewEvm(b) - vm.Start() + returndata := vm.Run() + + fmt.Printf("finished execution.\n") + + if (returndata != nil) { + fmt.Printf("callee returned: %v\n", returndata) + } } diff --git a/vm/memory.go b/vm/memory.go index 0a9dd2c..a0c610a 100644 --- a/vm/memory.go +++ b/vm/memory.go @@ -26,14 +26,18 @@ func (m *Memory) sb(ost uint64, val byte) { (*m)[ost] = val } -// loads 256b word fron offset in memory -func (m *Memory) ld(ost uint64) []byte { - if ost + 32 > uint64(len(*m)) { +func (m *Memory) ld(ost uint64, n uint64) []byte { + if ost + n > uint64(len(*m)) { log.Fatal("trying to load out of memory") } - r := make([]byte, 32) - copy(r, (*m)[ost:ost+32]) + r := make([]byte, n) + copy(r, (*m)[ost:ost+n]) + return r +} + +func (m *Memory) lw(ost uint64) []byte { + r := m.ld(ost, 32) return r } diff --git a/vm/opcodes.go b/vm/opcodes.go index b3d4229..d1f6905 100644 --- a/vm/opcodes.go +++ b/vm/opcodes.go @@ -49,9 +49,18 @@ const ( PUSH30 = 0x7d PUSH31 = 0x7e PUSH32 = 0x7f + RETURN = 0xF3 //... ) func (op *Opcode) String() string { return Instructions[*op].name } + +func (o *Opcode) IsPush() byte { + d := *o - PUSH1 + if d >= 0 && d < 31 { + return byte(d) + 1 + } + return 0 +} @@ -1,16 +1,15 @@ package main import ( - "os" - "fmt" +// "os" +// "fmt" ) type Rom []byte func (r *Rom) Fetch(pc *uint64, n uint64) []byte { - if (*pc + n > uint64(len(*r)) - 1) { - fmt.Fprintf(os.Stderr, "error: trying to read outside of rom\n") - os.Exit(1) + if (*pc + n > uint64(len(*r))) { + return nil } x := make([]byte, n) copy(x, (*r)[*pc:*pc + n]) diff --git a/vm/stack.go b/vm/stack.go index fc1cbd4..ec2a738 100644 --- a/vm/stack.go +++ b/vm/stack.go @@ -2,8 +2,8 @@ package main import ( "log" - - "github.com/holiman/uint256" + "fmt" + "github.com/holiman/uint256" ) const STACK_CAP = (1 << 10) @@ -11,6 +11,7 @@ const STACK_CAP = (1 << 10) type Stack []uint256.Int func (s *Stack) Push(x *uint256.Int) { + fmt.Printf("pushing %s to stack\n", x.String()) *s = append(*s, *x) if len(*s) + 1 > STACK_CAP { log.Fatal("stack overflow") @@ -11,6 +11,7 @@ type Evm struct { memory Memory pc uint64 stopped bool + returndata []byte } func NewEvm(_code []byte) *Evm { @@ -19,25 +20,36 @@ func NewEvm(_code []byte) *Evm { stopped: true, stack : Stack{}, code: _code, + returndata: nil, } } -func (vm *Evm) Start() { +func (vm *Evm) Run() []byte { vm.stopped = false + fmt.Printf("code: ", vm.code) for !(vm.stopped) { - op := vm.code.Fetch(&(vm.pc), 1)[0] - fmt.Printf("pc: %d | opcode: %x -> string: %s\n", vm.pc, op, Instructions[op].name) - if op >= PUSH1 && op <= PUSH32 { - nb := op - PUSH1 + 1 - fmt.Printf("pushing %d byte value to the stack!\n", vm.pc, nb) - b := vm.code.Fetch(&(vm.pc), uint64(nb)) + s := vm.code.Fetch(&(vm.pc), 1) + op := Opcode(STOP) + if (s != nil) { + op = Opcode(s[0]) + } + + fmt.Printf("pc: %d | opcode: 0x%x -> string: %s\n", vm.pc, op, Instructions[op].name) + push := (&op).IsPush() + if (push != 0) { + b := vm.code.Fetch(&(vm.pc), uint64(push)) x := uint256.NewInt(0) x = x.SetBytes(b) vm.stack.Push(x) } else { - vm.Execute(op) + vm.Execute(byte(op)) } + + if (op == RETURN) { + return vm.returndata + } } + return nil } func (vm *Evm) Execute(op byte) { |