From 20621e073562ee5d423b636fae8b6aa8e38275fa Mon Sep 17 00:00:00 2001 From: stefan Date: Wed, 24 May 2023 22:29:46 -0400 Subject: readme and cleanup --- Makefile | 20 +++++++ README.md | 93 +++++++++++++++++++++++++++++- as/as | Bin 2088979 -> 0 bytes as/as.go | 20 ++++++- as/main.go | 3 +- as/opcodes.go | 14 ++++- test/add.easm | 6 +- vm/instruction.go | 147 ------------------------------------------------ vm/instructions.go | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++- vm/main.go | 8 ++- vm/memory.go | 14 +++-- vm/opcodes.go | 9 +++ vm/rom.go | 9 ++- vm/stack.go | 5 +- vm/vm.go | 28 ++++++--- 15 files changed, 359 insertions(+), 179 deletions(-) create mode 100644 Makefile delete mode 100755 as/as delete mode 100644 vm/instruction.go diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4beb6f1 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +TEST?=test/add.easm # you can use the enviornment variable TEST to specify what binary the "test" target will run +BIN=${TEST:%.easm=%.bin} + +build/vm: + mkdir -p build + (cd vm && go build -o ../$@) + +build/as: + (cd as && go build -o ../$@) + +test: build/vm ${BIN} + ./build/vm ${BIN} + +clean: + rm -rf build test/*.bin + +%.bin: %.easm build/as + ./build/as $< -o $@ + +.PHONY: test diff --git a/README.md b/README.md index ab60e06..9975517 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,97 @@ evm === -building and running --------------------- +components +---------- + +`vm/`: code for the evm interpreter +`as/`: code for an easm assembler (you can also use `evm compile` if you have geth installed) + +building +-------- + +you can build individual go modules like usual ```sh cd vm & go build -./evm +cd ../as & go build +``` + +you can also use the makefile to build all of the modules, and run the test of your choice + +```sh +TEST=test/add.easm make clean test # don't need to specify add.easm, it is the default test +``` + +as +---- + +you can assemble files by specifying the assembly file, and optionally the name of your output binary using the `-o` flag + +```sh +./as test/add.src -o test/add.bin +``` + +if you want to compare the output with something like the `evm` tool from geth, you can use xxd with the `-p` flag + +```sh +evm compile test/add.src +xxd -p test/add.bin +``` + +vm +--- + +running bytecode: + +```sh +./vm/evm test/add.bin ``` + +programming +----------- + +right now, only a small subset of evm opcodes are supported, and there is no support for calldata, storage, branching, gas fees and much more + +the only instruction that takes a parameter is the `PUSH` instruction. It will take the specified sub 256b number in hex or decimal format, and push it onto the stack. + +``` +PUSH 4 # pushes the value 4 to the stack +``` + +this `PUSH` instruction is then used to pass values into other instructions + +``` +PUSH 4 +PUSH 9 +ADD +``` + +the result of this `ADD` instruction (13) will then be pushed to the stack + +if you need more info on some operations, check the [opcode reference](https://ethereum.org/en/developers/docs/evm/opcodes) + +if you want your program to return some data, you first need store it in memory. Then you push the offset and length (in bytes) of this memory to the stack, and use the return instruction + +``` +PUSH 9 +PUSH 4 +ADD +PUSH 3 // the offset for MSTORE8 +MSTORE8 // the value we want to store is already on the stack after the ADD instruction +PUSH 1 // the size in bytes of our return data +PUSH 3 // offset again +RETURN +``` + +how does this connect to the ethereum network +--------------------------------------------- + +- nodes on the ethereum network will take in similiar bytecode, in addition to persistent contract storage, account state, world state, call data, and much more +- modifications to the state of the ethereum blockchain are verified, usually by comparing the root of hash trees computed by nodes + +more notes +---------- + +- solidity does its own thing, and has its own abi and runtime +- easm is slow and suffers from serious design issues, so ethereum devs want to move to wasm. diff --git a/as/as b/as/as deleted file mode 100755 index 3c80b92..0000000 Binary files a/as/as and /dev/null differ diff --git a/as/as.go b/as/as.go index bcc7cf8..9a2cc11 100644 --- a/as/as.go +++ b/as/as.go @@ -2,30 +2,44 @@ package main import ( "fmt" + "os" "strings" "github.com/holiman/uint256" - ) func Assemble(code string) []byte { bytecode := make([]byte, 0) t := strings.Fields(code) + push := byte(0) for i := range t { x, found := OpcodeStrings[t[i]] if found { - bytecode = append(bytecode, x) + push = (&x).IsPush() + fmt.Printf("token: %s, opcode: 0x%x, push = %d\n", t[i], int(x), int(push)) + bytecode = append(bytecode, byte(x)) } else { + if push == 0 { + fmt.Fprintf(os.Stderr, "unexpected token: %s\n", t[i]) + os.Exit(1) + } var n *uint256.Int = nil if len(t[i]) > 1 && t[i][1] == 'x' { n, _ = uint256.FromHex(t[i]) } else { n, _ = uint256.FromDecimal(t[i]) } + + if (t[i] == "PUSH") { + push = byte(n.ByteLen()) + } + + b := n.Bytes32() - bytecode = append(bytecode, b[:]...) + fmt.Printf("token: NUMBER, value: %s, push = %d\n", t[i], int(push), b[32 - push:]) + bytecode = append(bytecode, b[32-push:]...) } } return bytecode diff --git a/as/main.go b/as/main.go index 95aff46..6476dc2 100644 --- a/as/main.go +++ b/as/main.go @@ -6,7 +6,7 @@ import ( ) func usage(arg0 string) { - fmt.Fprintf(os.Stderr, "usage: %s [-o file] file.easm\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "usage: %s [-o file] file.easm\n", arg0) os.Exit(1) } @@ -20,7 +20,6 @@ func main() { var ain int = 0 for i := 0; i < len(os.Args); i++ { - fmt.Println(os.Args[i]) switch os.Args[i] { case "-o": i = i + 1 diff --git a/as/opcodes.go b/as/opcodes.go index 94c2e2b..be04d19 100644 --- a/as/opcodes.go +++ b/as/opcodes.go @@ -49,9 +49,10 @@ const ( PUSH30 = 0x7d PUSH31 = 0x7e PUSH32 = 0x7f + RETURN = 0xf3 ) -var OpcodeStrings = map[string]byte { +var OpcodeStrings = map[string]Opcode { "STOP": STOP, "ADD": ADD, "MUL": MUL, @@ -66,6 +67,7 @@ var OpcodeStrings = map[string]byte { "MLOAD": MLOAD, "MSTORE": MSTORE, "MSTORE8": MSTORE8, + "PUSH": PUSH1, "PUSH1": PUSH1, "PUSH2": PUSH2, "PUSH3": PUSH3, @@ -98,4 +100,14 @@ var OpcodeStrings = map[string]byte { "PUSH30": PUSH30, "PUSH31": PUSH31, "PUSH32": PUSH32, + "RETURN": RETURN, +} + +// returns 0 if not push, otherwise returns number of bytes +func (o *Opcode) IsPush() byte { + d := *o - PUSH1 + if d >= 0 && d < 31 { + return byte(d) + 1 + } + return 0 } diff --git a/test/add.easm b/test/add.easm index 4876b0b..610b0b9 100644 --- a/test/add.easm +++ b/test/add.easm @@ -1,4 +1,8 @@ PUSH 9 PUSH 4 ADD -STOP +PUSH 3 +MSTORE8 +PUSH 1 +PUSH 3 +RETURN diff --git a/vm/instruction.go b/vm/instruction.go deleted file mode 100644 index 5cd56e6..0000000 --- a/vm/instruction.go +++ /dev/null @@ -1,147 +0,0 @@ -package main - -import ( - "github.com/holiman/uint256" -) - -type Handler func(vm *Evm) - -type Instruction struct { - name string - n int // number of params - handler Handler -} - -type Inst Instruction // shorthand - -// i miss c macros :( -var Instructions = [256]Instruction{ - STOP: { - name: "STOP", - n: 0, - handler: func(vm *Evm) { vm.stopped = true }, - } , - ADD: { - name: "ADD", - n: 2, - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Add(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - MUL: { - n: 2, - name: "MUL", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Mul(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - SUB: { - n: 2, - name: "SUB", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Sub(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - DIV: { - n: 2, - name: "DIV", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Div(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - SDIV: { - n: 2, - name: "SDIV", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.SDiv(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - MOD: { - n: 2, - name: "MOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.Mod(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - SMOD: { - n: 2, - name: "SMOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.SMod(vm.stack.Pop(), vm.stack.Pop())) - }, - }, - ADDMOD: { - n: 3, - name: "ADDMOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.AddMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) - }, - }, - MULMOD: { - n: 3, - name: "MULMOD", - handler: func(vm *Evm) { - o := uint256.NewInt(0) - vm.stack.Push(o.MulMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) - }, - }, - POP: { - n: 1, - name: "POP", - handler: func(vm *Evm) { - vm.stack.Pop() - }, - }, - MLOAD: { - n: 1, - name: "MLOAD", - }, - MSTORE: { - n: 2, - name: "MSTORE", - }, - MSTORE8: { - n: 2, - name: "MSTORE8", - }, - PUSH1: { name: "PUSH1" }, - PUSH2: { name: "PUSH2" }, - PUSH3: { name: "PUSH3" }, - PUSH4: { name: "PUSH4" }, - PUSH5: { name: "PUSH5" }, - PUSH6: { name: "PUSH6" }, - PUSH7: { name: "PUSH7" }, - PUSH8: { name: "PUSH8" }, - PUSH9: { name: "PUSH9" }, - PUSH10: { name: "PUSH0" }, - PUSH11: { name: "PUSH11" }, - PUSH12: { name: "PUSH12" }, - PUSH13: { name: "PUSH13" }, - PUSH14: { name: "PUSH14" }, - PUSH15: { name: "PUSH15" }, - PUSH16: { name: "PUSH16" }, - PUSH17: { name: "PUSH17" }, - PUSH18: { name: "PUSH18" }, - PUSH19: { name: "PUSH19" }, - PUSH20: { name: "PUSH20" }, - PUSH21: { name: "PUSH21" }, - PUSH22: { name: "PUSH22" }, - PUSH23: { name: "PUSH23" }, - PUSH24: { name: "PUSH24" }, - PUSH25: { name: "PUSH25" }, - PUSH26: { name: "PUSH26" }, - PUSH27: { name: "PUSH27" }, - PUSH28: { name: "PUSH28" }, - PUSH29: { name: "PUSH29" }, - PUSH30: { name: "PUSH30" }, - PUSH31: { name: "PUSH31" }, - PUSH32: { name: "PUSH32" }, -} diff --git a/vm/instructions.go b/vm/instructions.go index 5a1220b..8b8b2e2 100644 --- a/vm/instructions.go +++ b/vm/instructions.go @@ -1,3 +1,163 @@ package main -type InstructionHandler func (vm *Evm) +import ( + "github.com/holiman/uint256" +) + +type Handler func(vm *Evm) + +type Instruction struct { + name string + n int // number of params + handler Handler +} + +type Inst Instruction // shorthand + +// i miss c macros :( +var Instructions = [256]Instruction{ + STOP: { + name: "STOP", + n: 0, + handler: func(vm *Evm) { vm.stopped = true }, + } , + ADD: { + name: "ADD", + n: 2, + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Add(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + MUL: { + n: 2, + name: "MUL", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Mul(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + SUB: { + n: 2, + name: "SUB", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Sub(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + DIV: { + n: 2, + name: "DIV", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Div(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + SDIV: { + n: 2, + name: "SDIV", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.SDiv(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + MOD: { + n: 2, + name: "MOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.Mod(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + SMOD: { + n: 2, + name: "SMOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.SMod(vm.stack.Pop(), vm.stack.Pop())) + }, + }, + ADDMOD: { + n: 3, + name: "ADDMOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.AddMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) + }, + }, + MULMOD: { + n: 3, + name: "MULMOD", + handler: func(vm *Evm) { + o := uint256.NewInt(0) + vm.stack.Push(o.MulMod(vm.stack.Pop(), vm.stack.Pop(), vm.stack.Pop())) + }, + }, + POP: { + n: 1, + name: "POP", + handler: func(vm *Evm) { + vm.stack.Pop() + }, + }, + MLOAD: { + n: 1, + name: "MLOAD", + }, + MSTORE: { + n: 2, + name: "MSTORE", + handler: func(vm *Evm) { + vm.memory.sw(vm.stack.Pop().Uint64(), vm.stack.Pop()) + }, + }, + MSTORE8: { + n: 2, + name: "MSTORE8", + handler: func(vm *Evm) { + ost := vm.stack.Pop().Uint64() + x := vm.stack.Pop().Bytes() + n := x[len(x)-1] + vm.memory.sb(ost, n) + }, + }, + PUSH1: { name: "PUSH1" }, + PUSH2: { name: "PUSH2" }, + PUSH3: { name: "PUSH3" }, + PUSH4: { name: "PUSH4" }, + PUSH5: { name: "PUSH5" }, + PUSH6: { name: "PUSH6" }, + PUSH7: { name: "PUSH7" }, + PUSH8: { name: "PUSH8" }, + PUSH9: { name: "PUSH9" }, + PUSH10: { name: "PUSH0" }, + PUSH11: { name: "PUSH11" }, + PUSH12: { name: "PUSH12" }, + PUSH13: { name: "PUSH13" }, + PUSH14: { name: "PUSH14" }, + PUSH15: { name: "PUSH15" }, + PUSH16: { name: "PUSH16" }, + PUSH17: { name: "PUSH17" }, + PUSH18: { name: "PUSH18" }, + PUSH19: { name: "PUSH19" }, + PUSH20: { name: "PUSH20" }, + PUSH21: { name: "PUSH21" }, + PUSH22: { name: "PUSH22" }, + PUSH23: { name: "PUSH23" }, + PUSH24: { name: "PUSH24" }, + PUSH25: { name: "PUSH25" }, + PUSH26: { name: "PUSH26" }, + PUSH27: { name: "PUSH27" }, + PUSH28: { name: "PUSH28" }, + PUSH29: { name: "PUSH29" }, + PUSH30: { name: "PUSH30" }, + PUSH31: { name: "PUSH31" }, + PUSH32: { name: "PUSH32" }, + RETURN: { + n: 2, + name: "RETURN", + handler: func(vm *Evm) { + vm.returndata = vm.memory.ld(vm.stack.Pop().Uint64(), vm.stack.Pop().Uint64()) + }, + }, +} diff --git a/vm/main.go b/vm/main.go index 61aeebb..688918a 100644 --- a/vm/main.go +++ b/vm/main.go @@ -21,6 +21,12 @@ func main() { vm := NewEvm(b) - vm.Start() + returndata := vm.Run() + + fmt.Printf("finished execution.\n") + + if (returndata != nil) { + fmt.Printf("callee returned: %v\n", returndata) + } } diff --git a/vm/memory.go b/vm/memory.go index 0a9dd2c..a0c610a 100644 --- a/vm/memory.go +++ b/vm/memory.go @@ -26,14 +26,18 @@ func (m *Memory) sb(ost uint64, val byte) { (*m)[ost] = val } -// loads 256b word fron offset in memory -func (m *Memory) ld(ost uint64) []byte { - if ost + 32 > uint64(len(*m)) { +func (m *Memory) ld(ost uint64, n uint64) []byte { + if ost + n > uint64(len(*m)) { log.Fatal("trying to load out of memory") } - r := make([]byte, 32) - copy(r, (*m)[ost:ost+32]) + r := make([]byte, n) + copy(r, (*m)[ost:ost+n]) + return r +} + +func (m *Memory) lw(ost uint64) []byte { + r := m.ld(ost, 32) return r } diff --git a/vm/opcodes.go b/vm/opcodes.go index b3d4229..d1f6905 100644 --- a/vm/opcodes.go +++ b/vm/opcodes.go @@ -49,9 +49,18 @@ const ( PUSH30 = 0x7d PUSH31 = 0x7e PUSH32 = 0x7f + RETURN = 0xF3 //... ) func (op *Opcode) String() string { return Instructions[*op].name } + +func (o *Opcode) IsPush() byte { + d := *o - PUSH1 + if d >= 0 && d < 31 { + return byte(d) + 1 + } + return 0 +} diff --git a/vm/rom.go b/vm/rom.go index 958e70e..b638da0 100644 --- a/vm/rom.go +++ b/vm/rom.go @@ -1,16 +1,15 @@ package main import ( - "os" - "fmt" +// "os" +// "fmt" ) type Rom []byte func (r *Rom) Fetch(pc *uint64, n uint64) []byte { - if (*pc + n > uint64(len(*r)) - 1) { - fmt.Fprintf(os.Stderr, "error: trying to read outside of rom\n") - os.Exit(1) + if (*pc + n > uint64(len(*r))) { + return nil } x := make([]byte, n) copy(x, (*r)[*pc:*pc + n]) diff --git a/vm/stack.go b/vm/stack.go index fc1cbd4..ec2a738 100644 --- a/vm/stack.go +++ b/vm/stack.go @@ -2,8 +2,8 @@ package main import ( "log" - - "github.com/holiman/uint256" + "fmt" + "github.com/holiman/uint256" ) const STACK_CAP = (1 << 10) @@ -11,6 +11,7 @@ const STACK_CAP = (1 << 10) type Stack []uint256.Int func (s *Stack) Push(x *uint256.Int) { + fmt.Printf("pushing %s to stack\n", x.String()) *s = append(*s, *x) if len(*s) + 1 > STACK_CAP { log.Fatal("stack overflow") diff --git a/vm/vm.go b/vm/vm.go index 209ab71..7a6178f 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -11,6 +11,7 @@ type Evm struct { memory Memory pc uint64 stopped bool + returndata []byte } func NewEvm(_code []byte) *Evm { @@ -19,25 +20,36 @@ func NewEvm(_code []byte) *Evm { stopped: true, stack : Stack{}, code: _code, + returndata: nil, } } -func (vm *Evm) Start() { +func (vm *Evm) Run() []byte { vm.stopped = false + fmt.Printf("code: ", vm.code) for !(vm.stopped) { - op := vm.code.Fetch(&(vm.pc), 1)[0] - fmt.Printf("pc: %d | opcode: %x -> string: %s\n", vm.pc, op, Instructions[op].name) - if op >= PUSH1 && op <= PUSH32 { - nb := op - PUSH1 + 1 - fmt.Printf("pushing %d byte value to the stack!\n", vm.pc, nb) - b := vm.code.Fetch(&(vm.pc), uint64(nb)) + s := vm.code.Fetch(&(vm.pc), 1) + op := Opcode(STOP) + if (s != nil) { + op = Opcode(s[0]) + } + + fmt.Printf("pc: %d | opcode: 0x%x -> string: %s\n", vm.pc, op, Instructions[op].name) + push := (&op).IsPush() + if (push != 0) { + b := vm.code.Fetch(&(vm.pc), uint64(push)) x := uint256.NewInt(0) x = x.SetBytes(b) vm.stack.Push(x) } else { - vm.Execute(op) + vm.Execute(byte(op)) } + + if (op == RETURN) { + return vm.returndata + } } + return nil } func (vm *Evm) Execute(op byte) { -- cgit v1.2.3