]> git.the-white-hart.net Git - vhdl/commitdiff
Update CPU0 assembler
authorrs <>
Sat, 4 Oct 2025 05:53:10 +0000 (00:53 -0500)
committerrs <>
Sat, 4 Oct 2025 05:53:10 +0000 (00:53 -0500)
* Add support for multi-file builds
* Add line tracking for error messages
* Add case sensitivity
* Add support for negative byte-values

projects/cpu_0/asm/as.py

index 833ce74866321f5da423ea84520a6a8e2abbffc5..56b9d5fb5a349251c9d51e6b82785844c7551b15 100755 (executable)
@@ -9,12 +9,8 @@ import struct
 # ------------------------------------------------------------------------------
 
 
-def assemble(source: str) -> Tuple[bytearray, dict]:
-    b = bytearray()
-    syms = dict()
-    forwards = list()
-    tokens = source.split()
-    instrs = {
+class Assembler(object):
+    INSTRS = {
         'nop':  b'\x00',
         '#8':   b'\x01', '#32':  b'\x02',
         '@8':   b'\x03', '@32':  b'\x04',
@@ -29,66 +25,95 @@ def assemble(source: str) -> Tuple[bytearray, dict]:
         'lsr':  b'\x1b', 'asr':  b'\x1c', 'shl':  b'\x1d',
     }
 
-    comment = False
-    for token in tokens:
-        token = token.lower()
-
-        if comment:
-            if token == ')':
-                comment = False
-            continue
-
-        if token == '(':
-            comment = True
-        elif token == 'align':
-            x = len(b) % 4
-            if x != 0:
-                b += b'\x00'*(4-x)
-        elif token in instrs:
-            b += instrs[token]
-        elif '=' in token:
-            sym, value = token.split('=', maxsplit=1)
-            value = int(value, 0)
-            syms[sym] = value
-        elif token.endswith(':'):
-            assert token[0] in '_abcdefghijklmnopqrstuvwxyz'
-            syms[token[:-1]] = len(b)
-        elif token[0] in '-0123456789':
-            if token.endswith('i8'):
-                val = int(token[:-2], 0)
-                assert 0 <= val <= 255
-                b += bytes([val])
-            elif token.endswith('i32'):
-                val = int(token[:-3], 0)
-                assert -0x80000000 <= val <= 0xffffffff
-                if val < 0:
-                    b += struct.pack('<i', val)
+    def __init__(self):
+        self.syms = dict()
+        self.forwards = list()
+        self.flash_section = bytearray()
+        self.ram_watermark = 0x01000000  # TODO
+
+    @staticmethod
+    def tokenize(source: str):
+        lines = source.splitlines(keepends=False)
+        for line_no, line in enumerate(lines):
+            for token in line.split():
+                yield token, line_no + 1
+
+    def chomp(self, filename: str, source: str) -> bool:
+        error = False
+        comment = False
+        for token, line_no in self.tokenize(source):
+            if comment:
+                if token == ')':
+                    comment = False
+                continue
+
+            if token == '(':
+                comment = True
+            elif token == 'align':
+                x = len(self.flash_section) % 4
+                if x != 0:
+                    self.flash_section += b'\xff' * (4 - x)
+            elif token.lower() in self.INSTRS:
+                self.flash_section += self.INSTRS[token.lower()]
+            elif '=' in token:
+                sym, value = token.split('=', maxsplit=1)
+                value = int(value, 0)
+                self.syms[sym] = value
+            elif token.endswith(':'):
+                if token[0] not in '_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
+                    print(f'{filename}:{line_no} - Invalid label name "{token}"')
+                    error = True
+                self.syms[token[:-1]] = len(self.flash_section)
+            elif token[0] in '-0123456789':
+                if token.endswith('i8'):
+                    val = int(token[:-2], 0)
+                    if val < -0x80 or val > 0xff:
+                        print(f'{filename}:{line_no} - I8 value out of range: "{token}"')
+                    if val < 0:
+                        self.flash_section += struct.pack('b', val)
+                    else:
+                        self.flash_section += struct.pack('B', val)
+                elif token.endswith('i32'):
+                    val = int(token[:-3], 0)
+                    assert -0x80000000 <= val <= 0xffffffff
+                    if val < 0:
+                        self.flash_section += struct.pack('<i', val)
+                    else:
+                        self.flash_section += struct.pack('<I', val)
                 else:
-                    b += struct.pack('<I', val)
-            else:
-                print('Number must end with "i8" or "i32"')
-                assert False
-        elif token[0] in '_abcdefghijklmnopqrstuvwxyz':
-            if token in syms:
-                b += struct.pack('<I', syms[token])
+                    print(f'{filename}:{line_no} - Number "{token}" must end with "i8" or "i32"')
+                    error = True
+            elif token[0] in '_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
+                if token in self.syms:
+                    self.flash_section += struct.pack('<I', self.syms[token])
+                else:
+                    self.forwards.append((len(self.flash_section), token))
+                    self.flash_section += b'\x00\x00\x00\x00'
+            elif token.startswith('"') and token.endswith('"'):
+                self.flash_section += bytes(ord(c) for c in token[1:-1])
             else:
-                forwards.append((len(b), token))
-                b += b'\x00\x00\x00\x00'
-        elif token.startswith('"') and token.endswith('"'):
-            b += bytes(ord(c) for c in token[1:-1])
+                print(f'{filename}:{line_no} - Unrecognized token "{token}"')
+                error = True
+
+        return not error
 
-    # Align to two-byte words
-    if len(b) % 2 == 1:
-        b += b'\xff'
+    def emit(self) -> Optional[Tuple[bytearray, dict]]:
+        # Align to two-byte words
+        if len(self.flash_section) % 2 == 1:
+            self.flash_section += b'\xff'
 
-    # Resolve forward references
-    for offset, symbol in forwards:
-        if symbol not in syms:
-            print(f'Symbol "{symbol}" not defined')
-        assert symbol in syms
-        b[offset:offset+4] = struct.pack('<I', syms[symbol])
+        # Resolve forward references
+        unref = False
+        for offset, symbol in self.forwards:
+            if symbol not in self.syms:
+                print(f'Unresolved reference to "{symbol}"')
+                unref = True
+                continue
+            self.flash_section[offset:offset + 4] = struct.pack('<I', self.syms[symbol])
+        if unref:
+            return None
 
-    return b, syms
+        return self.flash_section, self.syms
 
 
 # ------------------------------------------------------------------------------
@@ -98,22 +123,33 @@ def main() -> int:
     parser = argparse.ArgumentParser('AS - CPU0 Assembler')
     parser.add_argument('--outfile', '-o', help='Output filename')
     parser.add_argument('--mapfile', '-m', help='Symbol map filename')
-    parser.add_argument('filename', help='Source filename')
+    parser.add_argument('filename', nargs='+', help='Source filenames')
     args = parser.parse_args()
 
-    with open(args.filename, 'r') as f:
-        with open(args.outfile, 'wb') as g:
-            s = f.read()
-            b, syms = assemble(s)
-            g.write(b)
-            if args.mapfile:
-                if args.mapfile == 'stdout':
+    assembler = Assembler()
+    success = True
+    for source_filename in args.filename:
+        with open(source_filename, 'r') as f:
+            print(f'Assembling "{source_filename}"')
+            source = f.read()
+            s = assembler.chomp(source_filename, source)
+            success = success and s
+    if not success:
+        return 1
+    emitted = assembler.emit()
+    if emitted is None:
+        return 1
+    binary, syms = emitted
+    with open(args.outfile, 'wb') as g:
+        g.write(binary)
+        if args.mapfile:
+            if args.mapfile == 'stdout':
+                for addr, sym in sorted((addr, sym) for sym, addr in syms.items()):
+                    print(f'0x{addr:08x} {sym}')
+            else:
+                with open(args.mapfile, 'w') as h:
                     for addr, sym in sorted((addr, sym) for sym, addr in syms.items()):
-                        print(f'0x{addr:08x}: {sym}')
-                else:
-                    with open(args.mapfile, 'w') as h:
-                        for addr, sym in sorted((addr, sym) for sym, addr in syms.items()):
-                            h.write(f'0x{addr:08x}: {sym}\n')
+                        h.write(f'0x{addr:08x} {sym}\n')
 
     return 0