From: user Date: Tue, 26 Dec 2023 08:10:36 +0000 (-0600) Subject: Annotate with cycle counts and SREG requirements X-Git-Url: https://git.the-white-hart.net/?a=commitdiff_plain;ds=sidebyside;p=atmega%2Fsiggen.git Annotate with cycle counts and SREG requirements An asterisk after the cycle count indicates that it requires the SREG value from a previous instruction. --- diff --git a/asm_2/interp.asm b/asm_2/interp.asm index 423df3c..88b63c9 100644 --- a/asm_2/interp.asm +++ b/asm_2/interp.asm @@ -200,818 +200,818 @@ _zreg_done: .org INTERPRETER_START loop: ; Check for countdown events - in r25, TIFR0 - sbrs r25, OCF0A - rjmp _countdown_done + in r25, TIFR0 ; 1 + sbrs r25, OCF0A ; 1/2 + rjmp _countdown_done ; 2 ; Clear the timer expire flag - out TIFR0, r25 + out TIFR0, r25 ; 1 ; Decrement each of the countdown clocks - ldi ZL, LOW(interp_clocks) - ldi ZH, HIGH(interp_clocks) - ldi r23, 16 + ldi ZL, LOW(interp_clocks) ; 1 + ldi ZH, HIGH(interp_clocks) ; 1 + ldi r23, 16 ; 1 _countdown_loop: - dec r23 - brlt _countdown_done - ldd r24, Z+0 - ldd r25, Z+1 - mov r22, r24 - or r22, r25 - breq _countdown_next - subi r24, 1 - sbci r25, 0 + dec r23 ; 1 + brlt _countdown_done ; 1/2* + ldd r24, Z+0 ; 2 + ldd r25, Z+1 ; 2 + mov r22, r24 ; 1 + or r22, r25 ; 1 + breq _countdown_next ; 1/2* + subi r24, 1 ; 1 + sbci r25, 0 ; 1* _countdown_next: - st Z+, r24 - st Z+, r25 - rjmp _countdown_loop + st Z+, r24 ; 2 + st Z+, r25 ; 2 + rjmp _countdown_loop ; 2 _countdown_done: ; Fetch instruction - movw ZL, r2 ; Put bytecode PC into Z - lsl ZL ; *2 to get a byte address from a word address - rol ZH - lpm r4, Z+ ; Load a word and increment - lpm r5, Z+ - lsr ZH ; /2 to get a word address from a byte address - ror ZL - movw r2, ZL ; Save updated bytecode PC + movw ZL, r2 ; 1 Put bytecode PC into Z + lsl ZL ; 1 *2 to get a byte address from a word address + rol ZH ; 1* + lpm r4, Z+ ; 3 Load a word and increment + lpm r5, Z+ ; 3 + lsr ZH ; 1 /2 to get a word address from a byte address + ror ZL ; 1* + movw r2, ZL ; 1 Save updated bytecode PC ; Decode first operand, always register V[X] - mov r24, r5 ; Extract X field from instruction - andi r24, 0x0f - lsl r24 ; *4 to get an offset from an index - lsl r24 - clr r25 ; Add offset to base address of registers - ldi ZL, LOW(interp_regs) - ldi ZH, HIGH(interp_regs) - add ZL, r24 - adc ZH, r25 - movw r16, ZL ; Save the reg addr for use as destination later - ldd r6, Z+0 ; Load operand value - ldd r7, Z+1 - ldd r8, Z+2 - ldd r9, Z+3 + mov r24, r5 ; 1 Extract X field from instruction + andi r24, 0x0f ; 1 + lsl r24 ; 1 *4 to get an offset from an index + lsl r24 ; 1 + clr r25 ; 1 Add offset to base address of registers + ldi ZL, LOW(interp_regs) ; 1 + ldi ZH, HIGH(interp_regs) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + movw r16, ZL ; 1 Save the reg addr for use as destination later + ldd r6, Z+0 ; 2 Load operand value + ldd r7, Z+1 ; 2 + ldd r8, Z+2 ; 2 + ldd r9, Z+3 ; 2 ; Decode second operand based on instruction F field - mov r24, r5 ; Extract F field from instruction - lsr r24 - lsr r24 - lsr r24 - lsr r24 - clr r25 ; Add base address of operand-dispatch jumptable - ldi ZL, LOW(operand_jumptable) - ldi ZH, HIGH(operand_jumptable) - add ZL, r24 - adc ZH, r25 - mov r15, r24 ; Save the F field for decoding instruction later - ijmp ; Jump to whatever code decodes the other operand + mov r24, r5 ; 1 Extract F field from instruction + lsr r24 ; 1 + lsr r24 ; 1 + lsr r24 ; 1 + lsr r24 ; 1 + clr r25 ; 1 Add base address of operand-dispatch jumptable + ldi ZL, LOW(operand_jumptable) ; 1 + ldi ZH, HIGH(operand_jumptable) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + mov r15, r24 ; 1 Save the F field for decoding instruction later + ijmp ; 2 Jump to whatever code decodes the other operand _decode_done: ; Load flags value - ldi r24, 0x3c ; Offset of the VF register (0xf * 4) - clr r25 ; Add offset to base address of registers - ldi ZL, LOW(interp_regs) - ldi ZH, HIGH(interp_regs) - add ZL, r24 - adc ZH, r25 - ld r14, Z + ldi r24, 0x3c ; 1 Offset of the VF register (0xf * 4) + clr r25 ; 1 Add offset to base address of registers + ldi ZL, LOW(interp_regs) ; 1 + ldi ZH, HIGH(interp_regs) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + ld r14, Z ; 2 ; Dispatch based on instruction F field - mov r24, r15 ; Recover saved F field from instruction - clr r25 ; Add base address of instruction-dispatch jumptable - ldi ZL, LOW(f_dispatch_jumptable) - ldi ZH, HIGH(f_dispatch_jumptable) - add ZL, r24 - adc ZH, r25 - ijmp ; Jump to whatever code runs this type of instruction + mov r24, r15 ; 1 Recover saved F field from instruction + clr r25 ; 1 Add base address of instruction-dispatch jumptable + ldi ZL, LOW(f_dispatch_jumptable) ; 1 + ldi ZH, HIGH(f_dispatch_jumptable) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + ijmp ; 2 Jump to whatever code runs this type of instruction _dispatch_done_writeback_flags: ; Get rid of the S, N, and Z flags, we're making our own - ldi r25, 0xe9 - and r14, r25 + ldi r25, 0xe9 ; 1 + and r14, r25 ; 1 ; Compute N flag - ldi r25, 0x04 - sbrc r9, 7 - or r14, r25 + ldi r25, 0x04 ; 1 + sbrc r9, 7 ; 1/2 + or r14, r25 ; 1 ; Compute Z flag - ldi r24, 0x02 - clr r25 - or r25, r6 - or r25, r7 - or r25, r8 - or r25, r9 - brne _no_z - or r14, r24 + ldi r24, 0x02 ; 1 + clr r25 ; 1 + or r25, r6 ; 1 + or r25, r7 ; 1 + or r25, r8 ; 1 + or r25, r9 ; 1 + brne _no_z ; 1/2* + or r14, r24 ; 1 _no_z: ; Compute S flag - mov r25, r14 - lsl r25 ; Shift N flag up to where V is - eor r25, r14 ; Xor to get the value for the S flag - bst r25, 3 ; Read flag value - bld r14, 4 ; Write into proper spot + mov r25, r14 ; 1 + lsl r25 ; 1 Shift N flag up to where V is + eor r25, r14 ; 1 Xor to get the value for the S flag + bst r25, 3 ; 1 Read flag value + bld r14, 4 ; 1 Write into proper spot _dispatch_done_writeback_fixedflags: - ldi r24, 0x3c ; Offset of the VF register (0xf * 4) - clr r25 ; Add offset to base address of registers - ldi ZL, LOW(interp_regs) - ldi ZH, HIGH(interp_regs) - add ZL, r24 - adc ZH, r25 - st Z, r14 ; Store the flag byte generated by the instruction + ldi r24, 0x3c ; 1 Offset of the VF register (0xf * 4) + clr r25 ; 1 Add offset to base address of registers + ldi ZL, LOW(interp_regs) ; 1 + ldi ZH, HIGH(interp_regs) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + st Z, r14 ; 2 Store the flag byte generated by the instruction _dispatch_done_writeback_reg: - movw ZL, r16 ; Recover the pointer to V[X] - std Z+0, r6 ; Save the instruction result to the register - std Z+1, r7 - std Z+2, r8 - std Z+3, r9 + movw ZL, r16 ; 1 Recover the pointer to V[X] + std Z+0, r6 ; 2 Save the instruction result to the register + std Z+1, r7 ; 2 + std Z+2, r8 ; 2 + std Z+3, r9 ; 2 _dispatch_done: - rjmp loop + rjmp loop ; 2 ; ------------------------------------------------------------------------------ ; Operand decoding operand_jumptable: - rjmp operand_VY - rjmp operand_imm32 - rjmp operand_Y - rjmp operand_VY_N - rjmp operand_VY_N - rjmp operand_VY_N - rjmp operand_VY_N - rjmp operand_VY_N - rjmp operand_VY_N - rjmp operand_VY_N - rjmp operand_VY_N - rjmp operand_PC_ssNN - rjmp operand_PC_sNNN - rjmp operand_PC_sNNN - rjmp operand_0NNN - rjmp operand_VY_N + rjmp operand_VY ; 2 + rjmp operand_imm32 ; 2 + rjmp operand_Y ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_VY_N ; 2 + rjmp operand_PC_ssNN ; 2 + rjmp operand_PC_sNNN ; 2 + rjmp operand_PC_sNNN ; 2 + rjmp operand_0NNN ; 2 + rjmp operand_VY_N ; 2 ; ----- V[Y] operand_VY: - mov r24, r4 - andi r24, 0xf0 - lsr r24 - lsr r24 - clr r25 - ldi ZL, LOW(interp_regs) - ldi ZH, HIGH(interp_regs) - add ZL, r24 - adc ZH, r25 - ldd r10, Z+0 - ldd r11, Z+1 - ldd r12, Z+2 - ldd r13, Z+3 - - rjmp _decode_done + mov r24, r4 ; 1 + andi r24, 0xf0 ; 1 + lsr r24 ; 1 + lsr r24 ; 1 + clr r25 ; 1 + ldi ZL, LOW(interp_regs) ; 1 + ldi ZH, HIGH(interp_regs) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + ldd r10, Z+0 ; 2 + ldd r11, Z+1 ; 2 + ldd r12, Z+2 ; 2 + ldd r13, Z+3 ; 2 + + rjmp _decode_done ; 2 ; ----- 32-bit immediate following instruction operand_imm32: - movw ZL, r2 ; Put bytecode PC into Z - lsl ZL ; *2 to get a byte address from a word address - rol ZH - lpm r10, Z+ ; Load four bytes and increment - lpm r11, Z+ - lpm r12, Z+ - lpm r13, Z+ - lsr ZH ; /2 to get a word address from a byte address - ror ZL - movw r2, ZL ; Save updated bytecode PC + movw ZL, r2 ; 1 Put bytecode PC into Z + lsl ZL ; 1 *2 to get a byte address from a word address + rol ZH ; 1* + lpm r10, Z+ ; 3 Load four bytes and increment + lpm r11, Z+ ; 3 + lpm r12, Z+ ; 3 + lpm r13, Z+ ; 3 + lsr ZH ; 1 /2 to get a word address from a byte address + ror ZL ; 1* + movw r2, ZL ; 1 Save updated bytecode PC - rjmp _decode_done + rjmp _decode_done ; 2 ; ----- 4-bit zero-extended immediate within instruction operand_Y: - mov r10, r4 - lsr r10 - lsr r10 - lsr r10 - lsr r10 - clr r11 - clr r12 - clr r13 + mov r10, r4 ; 1 + lsr r10 ; 1 + lsr r10 ; 1 + lsr r10 ; 1 + lsr r10 ; 1 + clr r11 ; 1 + clr r12 ; 1 + clr r13 ; 1 ; Most of these instructions have no use for a zero immediate ; Replace zero with a more useful 0x10 value, for range of 0x01-0x10 ; Instructions that want 0x00-0x0f can mask off the upper nibble - tst r10 - brne _operand_Y_done - ldi r25, 0x10 - mov r10, r25 + tst r10 ; 1 + brne _operand_Y_done ; 1/2* + ldi r25, 0x10 ; 1 + mov r10, r25 ; 1 _operand_Y_done: - rjmp _decode_done + rjmp _decode_done ; 2 ; ----- V[Y] + 4-bit zero-extended immediate within instruction operand_VY_N: ; Load V[Y] - mov r24, r4 - andi r24, 0xf0 - lsr r24 - lsr r24 - clr r25 - ldi ZL, LOW(interp_regs) - ldi ZH, HIGH(interp_regs) - add ZL, r24 - adc ZH, r25 - ldd r10, Z+0 - ldd r11, Z+1 - ldd r12, Z+2 - ldd r13, Z+3 + mov r24, r4 ; 1 + andi r24, 0xf0 ; 1 + lsr r24 ; 1 + lsr r24 ; 1 + clr r25 ; 1 + ldi ZL, LOW(interp_regs) ; 1 + ldi ZH, HIGH(interp_regs) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + ldd r10, Z+0 ; 2 + ldd r11, Z+1 ; 2 + ldd r12, Z+2 ; 2 + ldd r13, Z+3 ; 2 ; Add N - mov r24, r4 - andi r24, 0x0f - clr r25 - add r10, r24 - adc r11, r25 - adc r12, r25 - adc r13, r25 + mov r24, r4 ; 1 + andi r24, 0x0f ; 1 + clr r25 ; 1 + add r10, r24 ; 1 + adc r11, r25 ; 1* + adc r12, r25 ; 1* + adc r13, r25 ; 1* - rjmp _decode_done + rjmp _decode_done ; 2 ; ----- Zero-extended 12-bit immediate within instruction operand_0NNN: - movw r10, r4 - ldi r25, 0x0f - and r11, r25 - clr r12 - clr r13 - rjmp _decode_done + movw r10, r4 ; 1 + ldi r25, 0x0f ; 1 + and r11, r25 ; 1 + clr r12 ; 1 + clr r13 ; 1 + rjmp _decode_done ; 2 ; ----- PC + sign-extended 8-bit immediate within instruction operand_PC_ssNN: ; Sign-extend 8-bit immediate - mov r10, r4 - clr r11 - clr r12 - clr r13 - sbrs r10, 7 - rjmp _sext_done_PCssNN - com r11 - com r12 - com r13 + mov r10, r4 ; 1 + clr r11 ; 1 + clr r12 ; 1 + clr r13 ; 1 + sbrs r10, 7 ; 1/2 + rjmp _sext_done_PCssNN ; 2 + com r11 ; 1 + com r12 ; 1 + com r13 ; 1 _sext_done_PCssNN: ; Add PC - clr r25 - add r10, r2 - adc r11, r3 - adc r12, r25 - adc r13, r25 + clr r25 ; 1 + add r10, r2 ; 1 + adc r11, r3 ; 1* + adc r12, r25 ; 1* + adc r13, r25 ; 1* - rjmp _decode_done + rjmp _decode_done ; 2 ; ----- PC + sign-extended 12-bit immediate within instruction operand_PC_sNNN: ; Sign-extend 12-bit immediate - movw r10, r4 - ldi r25, 0x0f - and r11, r25 - clr r12 - clr r13 - sbrs r11, 3 - rjmp _sext_done_PCsNNN - ldi r25, 0xf0 - or r11, r25 - com r12 - com r13 + movw r10, r4 ; 1 + ldi r25, 0x0f ; 1 + and r11, r25 ; 1 + clr r12 ; 1 + clr r13 ; 1 + sbrs r11, 3 ; 1/2 + rjmp _sext_done_PCsNNN ; 2 + ldi r25, 0xf0 ; 1 + or r11, r25 ; 1 + com r12 ; 1 + com r13 ; 1 _sext_done_PCsNNN: ; Add PC - clr r25 - add r10, r2 - adc r11, r3 - adc r12, r25 - adc r13, r25 + clr r25 ; 1 + add r10, r2 ; 1 + adc r11, r3 ; 1* + adc r12, r25 ; 1* + adc r13, r25 ; 1* - rjmp _decode_done + rjmp _decode_done ; 2 operand_none: - rjmp _decode_done + rjmp _decode_done ; 2 ; ------------------------------------------------------------------------------ ; Instruction dispatch f_dispatch_jumptable: - rjmp dispatch_alu - rjmp dispatch_alu - rjmp dispatch_imm4 - rjmp exec_ldb - rjmp exec_ldh - rjmp exec_ldw - rjmp exec_stb - rjmp exec_sth - rjmp exec_stw - rjmp exec_lpb - rjmp exec_lph - rjmp dispatch_branch - rjmp exec_jal_with_ve - rjmp exec_jmp - rjmp exec_ext - rjmp exec_lpw + rjmp dispatch_alu ; 2 + rjmp dispatch_alu ; 2 + rjmp dispatch_imm4 ; 2 + rjmp exec_ldb ; 2 + rjmp exec_ldh ; 2 + rjmp exec_ldw ; 2 + rjmp exec_stb ; 2 + rjmp exec_sth ; 2 + rjmp exec_stw ; 2 + rjmp exec_lpb ; 2 + rjmp exec_lph ; 2 + rjmp dispatch_branch ; 2 + rjmp exec_jal_with_ve ; 2 + rjmp exec_jmp ; 2 + rjmp exec_ext ; 2 + rjmp exec_lpw ; 2 alu_dispatch_jumptable: - rjmp exec_add - rjmp exec_sub - rjmp exec_and - rjmp exec_or - rjmp exec_xor - rjmp exec_nor - rjmp exec_mov - rjmp exec_mul - rjmp exec_test - rjmp exec_cmp - rjmp exec_udiv - rjmp exec_umod - rjmp exec_sdiv - rjmp exec_smod - rjmp exec_nop - rjmp exec_jal + rjmp exec_add ; 2 + rjmp exec_sub ; 2 + rjmp exec_and ; 2 + rjmp exec_or ; 2 + rjmp exec_xor ; 2 + rjmp exec_nor ; 2 + rjmp exec_mov ; 2 + rjmp exec_mul ; 2 + rjmp exec_test ; 2 + rjmp exec_cmp ; 2 + rjmp exec_udiv ; 2 + rjmp exec_umod ; 2 + rjmp exec_sdiv ; 2 + rjmp exec_smod ; 2 + rjmp exec_nop ; 2 + rjmp exec_jal ; 2 imm4_dispatch_jumptable: - rjmp exec_add - rjmp exec_sub - rjmp exec_mov - rjmp exec_shl - rjmp exec_shrl - rjmp exec_shra - rjmp exec_rol - rjmp exec_ror - rjmp exec_spi - rjmp exec_mft - rjmp exec_mtt - rjmp exec_ddir - rjmp exec_din - rjmp exec_dout - rjmp exec_ain - rjmp exec_aout + rjmp exec_add ; 2 + rjmp exec_sub ; 2 + rjmp exec_mov ; 2 + rjmp exec_shl ; 2 + rjmp exec_shrl ; 2 + rjmp exec_shra ; 2 + rjmp exec_rol ; 2 + rjmp exec_ror ; 2 + rjmp exec_spi ; 2 + rjmp exec_mft ; 2 + rjmp exec_mtt ; 2 + rjmp exec_ddir ; 2 + rjmp exec_din ; 2 + rjmp exec_dout ; 2 + rjmp exec_ain ; 2 + rjmp exec_aout ; 2 branch_dispatch_jumptable: - rjmp exec_jtab - rjmp exec_jtab - rjmp exec_jtab - rjmp exec_jtab - rjmp exec_jtab - rjmp exec_jtab - rjmp exec_blt - rjmp exec_bge - rjmp exec_bv - rjmp exec_bnv - rjmp exec_bmi - rjmp exec_bpl - rjmp exec_bz - rjmp exec_bnz - rjmp exec_c - rjmp exec_nc + rjmp exec_jtab ; 2 + rjmp exec_jtab ; 2 + rjmp exec_jtab ; 2 + rjmp exec_jtab ; 2 + rjmp exec_jtab ; 2 + rjmp exec_jtab ; 2 + rjmp exec_blt ; 2 + rjmp exec_bge ; 2 + rjmp exec_bv ; 2 + rjmp exec_bnv ; 2 + rjmp exec_bmi ; 2 + rjmp exec_bpl ; 2 + rjmp exec_bz ; 2 + rjmp exec_bnz ; 2 + rjmp exec_c ; 2 + rjmp exec_nc ; 2 dispatch_alu: - mov r24, r4 - andi r24, 0x0f - clr r25 - ldi ZL, LOW(alu_dispatch_jumptable) - ldi ZH, HIGH(alu_dispatch_jumptable) - add ZL, r24 - adc ZH, r25 - ijmp + mov r24, r4 ; 1 + andi r24, 0x0f ; 1 + clr r25 ; 1 + ldi ZL, LOW(alu_dispatch_jumptable) ; 1 + ldi ZH, HIGH(alu_dispatch_jumptable) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + ijmp ; 2 dispatch_imm4: - mov r24, r4 - andi r24, 0x0f - clr r25 - ldi ZL, LOW(imm4_dispatch_jumptable) - ldi ZH, HIGH(imm4_dispatch_jumptable) - add ZL, r24 - adc ZH, r25 - ijmp + mov r24, r4 ; 1 + andi r24, 0x0f ; 1 + clr r25 ; 1 + ldi ZL, LOW(imm4_dispatch_jumptable) ; 1 + ldi ZH, HIGH(imm4_dispatch_jumptable) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + ijmp ; 2 dispatch_branch: - mov r24, r5 - andi r24, 0x0f - clr r25 - ldi ZL, LOW(branch_dispatch_jumptable) - ldi ZH, HIGH(branch_dispatch_jumptable) - add ZL, r24 - adc ZH, r25 - ijmp + mov r24, r5 ; 1 + andi r24, 0x0f ; 1 + clr r25 ; 1 + ldi ZL, LOW(branch_dispatch_jumptable) ; 1 + ldi ZH, HIGH(branch_dispatch_jumptable) ; 1 + add ZL, r24 ; 1 + adc ZH, r25 ; 1* + ijmp ; 2 exec_nop: - rjmp _dispatch_done + rjmp _dispatch_done ; 2 exec_add: - add r6, r10 - adc r7, r11 - adc r8, r12 - adc r9, r13 + add r6, r10 ; 1 + adc r7, r11 ; 1* + adc r8, r12 ; 1* + adc r9, r13 ; 1* ; Flags - in r24, SREG ; Load real flags and keep xxxSVxxC - andi r24, 0x19 - ldi r25, 0xe6 ; Clear old flag bits that we're taking from SREG - and r14, r25 - or r14, r24 ; Add flags from SREG into interpreter flags + in r24, SREG ; 1* Load real flags and keep xxxSVxxC + andi r24, 0x19 ; 1 + ldi r25, 0xe6 ; 1 Clear old flag bits that we're taking from SREG + and r14, r25 ; 1 + or r14, r24 ; 1 Add flags from SREG into interpreter flags - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_sub: - sub r6, r10 - sbc r7, r11 - sbc r8, r12 - sbc r9, r13 + sub r6, r10 ; 1 + sbc r7, r11 ; 1* + sbc r8, r12 ; 1* + sbc r9, r13 ; 1* ; Flags - in r24, SREG ; Load real flags and keep xxxSVxxC - andi r24, 0x19 - ldi r25, 0xe6 ; Clear old flag bits that we're taking from SREG - and r14, r25 - or r14, r24 ; Add flags from SREG into interpreter flags + in r24, SREG ; 1* Load real flags and keep xxxSVxxC + andi r24, 0x19 ; 1 + ldi r25, 0xe6 ; 1 Clear old flag bits that we're taking from SREG + and r14, r25 ; 1 + or r14, r24 ; 1 Add flags from SREG into interpreter flags - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_and: - and r6, r10 - and r7, r11 - and r8, r12 - and r9, r13 + and r6, r10 ; 1 + and r7, r11 ; 1 + and r8, r12 ; 1 + and r9, r13 ; 1 ; Flags - in r24, SREG ; Load real flags and keep xxxSVxxx - andi r24, 0x18 - ldi r25, 0xe7 ; Clear old flag bits that we're taking from SREG - and r14, r25 - or r14, r24 ; Add flags from SREG into interpreter flags + in r24, SREG ; 1* Load real flags and keep xxxSVxxx + andi r24, 0x18 ; 1 + ldi r25, 0xe7 ; 1 Clear old flag bits that we're taking from SREG + and r14, r25 ; 1 + or r14, r24 ; 1 Add flags from SREG into interpreter flags - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_or: - or r6, r10 - or r7, r11 - or r8, r12 - or r9, r13 + or r6, r10 ; 1 + or r7, r11 ; 1 + or r8, r12 ; 1 + or r9, r13 ; 1 ; Flags - in r24, SREG ; Load real flags and keep xxxSVxxx - andi r24, 0x18 - ldi r25, 0xe7 ; Clear old flag bits that we're taking from SREG - and r14, r25 - or r14, r24 ; Add flags from SREG into interpreter flags + in r24, SREG ; 1* Load real flags and keep xxxSVxxx + andi r24, 0x18 ; 1 + ldi r25, 0xe7 ; 1 Clear old flag bits that we're taking from SREG + and r14, r25 ; 1 + or r14, r24 ; 1 Add flags from SREG into interpreter flags - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_xor: - eor r6, r10 - eor r7, r11 - eor r8, r12 - eor r9, r13 + eor r6, r10 ; 1 + eor r7, r11 ; 1 + eor r8, r12 ; 1 + eor r9, r13 ; 1 ; Flags - in r24, SREG ; Load real flags and keep xxxSVxxx - andi r24, 0x18 - ldi r25, 0xe7 ; Clear old flag bits that we're taking from SREG - and r14, r25 - or r14, r24 ; Add flags from SREG into interpreter flags + in r24, SREG ; 1* Load real flags and keep xxxSVxxx + andi r24, 0x18 ; 1 + ldi r25, 0xe7 ; 1 Clear old flag bits that we're taking from SREG + and r14, r25 ; 1 + or r14, r24 ; 1 Add flags from SREG into interpreter flags - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_nor: - or r6, r10 - or r7, r11 - or r8, r12 - or r9, r13 - com r6 - com r7 - com r8 - com r9 + or r6, r10 ; 1 + or r7, r11 ; 1 + or r8, r12 ; 1 + or r9, r13 ; 1 + com r6 ; 1 + com r7 ; 1 + com r8 ; 1 + com r9 ; 1 ; Flags - in r24, SREG ; Load real flags and keep xxxSVxxx - andi r24, 0x18 - ldi r25, 0xe7 ; Clear old flag bits that we're taking from SREG - and r14, r25 - or r14, r24 ; Add flags from SREG into interpreter flags + in r24, SREG ; 1* Load real flags and keep xxxSVxxx + andi r24, 0x18 ; 1 + ldi r25, 0xe7 ; 1 Clear old flag bits that we're taking from SREG + and r14, r25 ; 1 + or r14, r24 ; 1 Add flags from SREG into interpreter flags - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_mov: - movw r6, r10 - movw r8, r12 - rjmp _dispatch_done_writeback_reg + movw r6, r10 ; 1 + movw r8, r12 ; 1 + rjmp _dispatch_done_writeback_reg ; 2 exec_mul: - clr r0 ; Zero for adding carries - clr r1 ; Carry accumulation - ldi r21, 32 ; Loop counter - clr r22 ; Temporary for result - clr r23 - clr r24 - clr r25 + clr r0 ; 1 Zero for adding carries + clr r1 ; 1 Carry accumulation + ldi r21, 32 ; 1 Loop counter + clr r22 ; 1 Temporary for result + clr r23 ; 1 + clr r24 ; 1 + clr r25 ; 1 ; Multiply _mul_loop: - dec r21 - brmi _mul_done - - lsl r22 ; Shift result one bit up - rol r23 - rol r24 - rol r25 - adc r1, r0 - - lsl r10 ; Shift multiplier one bit up - rol r11 - rol r12 - rol r13 - - brcc _mul_loop ; If the multiplier high bit was 1, add multiplicand - add r22, r6 - adc r23, r7 - adc r24, r8 - adc r25, r9 - adc r1, r0 - rjmp _mul_loop + dec r21 ; 1 + brmi _mul_done ; 1/2* + + lsl r22 ; 1 Shift result one bit up + rol r23 ; 1* + rol r24 ; 1* + rol r25 ; 1* + adc r1, r0 ; 1* + + lsl r10 ; 1 Shift multiplier one bit up + rol r11 ; 1* + rol r12 ; 1* + rol r13 ; 1* + + brcc _mul_loop ; 1/2* If the multiplier high bit was 1, add multiplicand + add r22, r6 ; 1 + adc r23, r7 ; 1* + adc r24, r8 ; 1* + adc r25, r9 ; 1* + adc r1, r0 ; 1* + rjmp _mul_loop ; 2 _mul_done: ; Copy low half of temporary to result (frees up temp regs for flags) - movw r6, r22 + movw r6, r22 ; 1 - mov r22, r14 ; Copy flags to temp - andi r22, 0xf6 ; Clear V, and C flags + mov r22, r14 ; 1 Copy flags to temp + andi r22, 0xf6 ; 1 Clear V, and C flags ; Set carry flag if any of the upper 32 bits of result would be set - tst r1 - breq _mul_no_carry - ori r22, 0x01 + tst r1 ; 1 + breq _mul_no_carry ; 1/2* + ori r22, 0x01 ; 1 _mul_no_carry: ; Set overflow flag if sign of result disagrees with signs of inputs - mov r23, r9 - eor r23, r13 ; Top bit of r23 is one if result should be negative - eor r23, r25 ; Top bit of r23 is one if result sign is incorrect - sbrc r23, 7 - ori r22, 0x08 + mov r23, r9 ; 1 + eor r23, r13 ; 1 Top bit of r23 is one if result should be negative + eor r23, r25 ; 1 Top bit of r23 is one if result sign is incorrect + sbrc r23, 7 ; 1/2 + ori r22, 0x08 ; 1 - mov r14, r22 ; Copy temp back into flags + mov r14, r22 ; 1 Copy temp back into flags ; Copy high half of temporary to result - movw r8, r24 + movw r8, r24 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_udiv: - mov r1, r9 - eor r1, r13 ; Bit 7 is set if the result needs to be negated - clr r0 ; For adding carries + mov r1, r9 ; 1 + eor r1, r13 ; 1 Bit 7 is set if the result needs to be negated + clr r0 ; 1 For adding carries - ldi r25, 0xf6 ; Discard overflow and carry flags - and r14, r25 + ldi r25, 0xf6 ; 1 Discard overflow and carry flags + and r14, r25 ; 1 ; Set carry flag if dividing by zero ; Then divide anyway, because it's hilarious - mov r25, r10 - or r25, r11 - or r25, r12 - or r25, r13 - brne _udiv_no_divz - ldi r25, 0x01 - or r14, r25 + mov r25, r10 ; 1 + or r25, r11 ; 1 + or r25, r12 ; 1 + or r25, r13 ; 1 + brne _udiv_no_divz ; 1/2* + ldi r25, 0x01 ; 1 + or r14, r25 ; 1 _udiv_no_divz: ; Call/ret take more than three clock cycles, so they can't be used - ldi ZL, LOW(_div_done) - ldi ZH, HIGH(_div_done) - rjmp div_subroutine + ldi ZL, LOW(_div_done) ; 1 + ldi ZH, HIGH(_div_done) ; 1 + rjmp div_subroutine ; 2 _div_done: ; Set the overflow flag if the sign is unexpected - ldi r25, 0x08 - eor r1, r9 - sbrc r1, 7 - or r14, r25 + ldi r25, 0x08 ; 1 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + or r14, r25 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_umod: - mov r1, r13 ; Bit 7 is set if the result needs to be negated - clr r0 ; For adding carries + mov r1, r13 ; 1 Bit 7 is set if the result needs to be negated + clr r0 ; 1 For adding carries - ldi r25, 0xf6 ; Discard overflow and carry flags - and r14, r25 + ldi r25, 0xf6 ; 1 Discard overflow and carry flags + and r14, r25 ; 1 ; Set carry flag if dividing by zero ; Then divide anyway, because it's hilarious - mov r25, r10 - or r25, r11 - or r25, r12 - or r25, r13 - brne _umod_no_divz - ldi r25, 0x01 - or r14, r25 + mov r25, r10 ; 1 + or r25, r11 ; 1 + or r25, r12 ; 1 + or r25, r13 ; 1 + brne _umod_no_divz ; 1/2* + ldi r25, 0x01 ; 1 + or r14, r25 ; 1 _umod_no_divz: ; Call/ret take more than three clock cycles, so they can't be used - ldi ZL, LOW(_mod_done) - ldi ZH, HIGH(_mod_done) - rjmp div_subroutine + ldi ZL, LOW(_mod_done) ; 1 + ldi ZH, HIGH(_mod_done) ; 1 + rjmp div_subroutine ; 2 _mod_done: - movw r6, r22 - movw r8, r24 + movw r6, r22 ; 1 + movw r8, r24 ; 1 ; Set the overflow flag if the sign is unexpected - ldi r25, 0x08 - eor r1, r9 - sbrc r1, 7 - or r14, r25 + ldi r25, 0x08 ; 1 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + or r14, r25 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_sdiv: - mov r1, r9 - eor r1, r13 ; Bit 7 is set if the result needs to be negated - clr r0 - dec r0 ; For adding carries during inversion + mov r1, r9 ; 1 + eor r1, r13 ; 1 Bit 7 is set if the result needs to be negated + clr r0 ; 1 + dec r0 ; 1 For adding carries during inversion - ldi r25, 0xf6 ; Discard overflow and carry flags - and r14, r25 + ldi r25, 0xf6 ; 1 Discard overflow and carry flags + and r14, r25 ; 1 ; Set carry flag if dividing by zero ; Then divide anyway, because it's hilarious - mov r25, r10 - or r25, r11 - or r25, r12 - or r25, r13 - brne _sdiv_no_divz - ldi r25, 0x01 - or r14, r25 + mov r25, r10 ; 1 + or r25, r11 ; 1 + or r25, r12 ; 1 + or r25, r13 ; 1 + brne _sdiv_no_divz ; 1/2* + ldi r25, 0x01 ; 1 + or r14, r25 ; 1 _sdiv_no_divz: ; Absolute value of dividend - bst r9, 7 - brtc _sdiv_no_inv_a - com r9 - com r8 - com r7 - neg r6 - sbc r7, r0 - sbc r8, r0 - sbc r9, r0 + bst r9, 7 ; 1 + brtc _sdiv_no_inv_a ; 1/2* + com r9 ; 1 + com r8 ; 1 + com r7 ; 1 + neg r6 ; 1 + sbc r7, r0 ; 1* + sbc r8, r0 ; 1* + sbc r9, r0 ; 1* _sdiv_no_inv_a: ; Absolute value of divisor - bst r13, 7 - brtc _sdiv_no_inv_b - com r13 - com r12 - com r11 - neg r10 - sbc r11, r0 - sbc r12, r0 - sbc r13, r0 + bst r13, 7 ; 1 + brtc _sdiv_no_inv_b ; 1/2* + com r13 ; 1 + com r12 ; 1 + com r11 ; 1 + neg r10 ; 1 + sbc r11, r0 ; 1* + sbc r12, r0 ; 1* + sbc r13, r0 ; 1* _sdiv_no_inv_b: ; Call/ret take more than three clock cycles, so they can't be used - ldi ZL, LOW(_sdiv_done) - ldi ZH, HIGH(_sdiv_done) - rjmp div_subroutine + ldi ZL, LOW(_sdiv_done) ; 1 + ldi ZH, HIGH(_sdiv_done) ; 1 + rjmp div_subroutine ; 2 _sdiv_done: ; Invert result if necessary - bst r1, 7 - brtc _sdiv_no_inv - com r6 - com r7 - com r8 - com r9 - inc r6 - adc r7, r0 - adc r8, r0 - adc r9, r0 + bst r1, 7 ; 1 + brtc _sdiv_no_inv ; 1/2* + com r6 ; 1 + com r7 ; 1 + com r8 ; 1 + com r9 ; 1 + inc r6 ; 1 + adc r7, r0 ; 1* + adc r8, r0 ; 1* + adc r9, r0 ; 1* _sdiv_no_inv: ; Set the overflow flag if the sign is unexpected - ldi r25, 0x08 - eor r1, r9 - sbrc r1, 7 - or r14, r25 + ldi r25, 0x08 ; 1 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + or r14, r25 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_smod: - mov r1, r9 - eor r1, r13 ; Bit 7 is set if the result is negative - bst r13, 7 - bld r1, 6 ; Bit 6 is set if the modulo needs to be negated - clr r0 - dec r0 ; For adding carries during inversion + mov r1, r9 ; 1 + eor r1, r13 ; 1 Bit 7 is set if the result is negative + bst r13, 7 ; 1 + bld r1, 6 ; 1 Bit 6 is set if the modulo needs to be negated + clr r0 ; 1 + dec r0 ; 1 For adding carries during inversion - ldi r25, 0xf6 ; Discard overflow and carry flags - and r14, r25 + ldi r25, 0xf6 ; 1 Discard overflow and carry flags + and r14, r25 ; 1 ; Set carry flag if dividing by zero ; Then divide anyway, because it's hilarious - mov r25, r10 - or r25, r11 - or r25, r12 - or r25, r13 - brne _smod_no_divz - ldi r25, 0x01 - or r14, r25 + mov r25, r10 ; 1 + or r25, r11 ; 1 + or r25, r12 ; 1 + or r25, r13 ; 1 + brne _smod_no_divz ; 1/2* + ldi r25, 0x01 ; 1 + or r14, r25 ; 1 _smod_no_divz: ; Absolute value of dividend - bst r9, 7 - brtc _smod_no_inv_a - com r9 - com r8 - com r7 - neg r6 - sbc r7, r0 - sbc r8, r0 - sbc r9, r0 + bst r9, 7 ; 1 + brtc _smod_no_inv_a ; 1/2* + com r9 ; 1 + com r8 ; 1 + com r7 ; 1 + neg r6 ; 1 + sbc r7, r0 ; 1* + sbc r8, r0 ; 1* + sbc r9, r0 ; 1* _smod_no_inv_a: ; Absolute value of divisor - bst r13, 7 - brtc _smod_no_inv_b - com r13 - com r12 - com r11 - neg r10 - sbc r11, r0 - sbc r12, r0 - sbc r13, r0 + bst r13, 7 ; 1 + brtc _smod_no_inv_b ; 1/2* + com r13 ; 1 + com r12 ; 1 + com r11 ; 1 + neg r10 ; 1 + sbc r11, r0 ; 1* + sbc r12, r0 ; 1* + sbc r13, r0 ; 1* _smod_no_inv_b: ; Call/ret take more than three clock cycles, so they can't be used - ldi ZL, LOW(_smod_done) - ldi ZH, HIGH(_smod_done) - rjmp div_subroutine + ldi ZL, LOW(_smod_done) ; 1 + ldi ZH, HIGH(_smod_done) ; 1 + rjmp div_subroutine ; 2 _smod_done: ; Adjust modulo if division result is negative - bst r1, 7 - brtc _smod_no_adj - sub r10, r22 - sbc r11, r23 - sbc r12, r24 - sbc r13, r25 + bst r1, 7 ; 1 + brtc _smod_no_adj ; 1/2* + sub r10, r22 ; 1 + sbc r11, r23 ; 1* + sbc r12, r24 ; 1* + sbc r13, r25 ; 1* ; Invert modulo if divisor was negative - bst r1, 6 - brtc _smod_no_inv - com r13 - com r12 - com r11 - neg r10 - sbc r11, r0 - sbc r12, r0 - sbc r13, r0 + bst r1, 6 ; 1 + brtc _smod_no_inv ; 1/2* + com r13 ; 1 + com r12 ; 1 + com r11 ; 1 + neg r10 ; 1 + sbc r11, r0 ; 1* + sbc r12, r0 ; 1* + sbc r13, r0 ; 1* _smod_no_inv: ; Copy adjusted modulo - movw r6, r10 - movw r8, r12 - rjmp _smod_doflags + movw r6, r10 ; 1 + movw r8, r12 ; 1 + rjmp _smod_doflags ; 2 _smod_no_adj: ; Non-negative, copy modulo as-is - movw r6, r22 - movw r8, r24 + movw r6, r22 ; 1 + movw r8, r24 ; 1 _smod_doflags: ; Set the overflow flag if the sign is unexpected - ldi r25, 0x08 - eor r1, r9 - sbrc r1, 7 - or r14, r25 + ldi r25, 0x08 ; 1 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + or r14, r25 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 div_subroutine: @@ -1020,721 +1020,721 @@ div_subroutine: ; r10:r11:r12:r13 - divisor ; r21 - loop counter - clr r22 - clr r23 - clr r24 - clr r25 - ldi r21, 32 + clr r22 ; 1 + clr r23 ; 1 + clr r24 ; 1 + clr r25 ; 1 + ldi r21, 32 ; 1 _div_loop: - lsl r6 - rol r7 - rol r8 - rol r9 - rol r22 - rol r23 - rol r24 - rol r25 - - cp r22, r10 - cpc r23, r11 - cpc r24, r12 - cpc r25, r13 - - brlo _div_next - sub r22, r10 - sbc r23, r11 - sbc r24, r12 - sbc r25, r13 - inc r6 + lsl r6 ; 1 + rol r7 ; 1* + rol r8 ; 1* + rol r9 ; 1* + rol r22 ; 1* + rol r23 ; 1* + rol r24 ; 1* + rol r25 ; 1* + + cp r22, r10 ; 1 + cpc r23, r11 ; 1* + cpc r24, r12 ; 1* + cpc r25, r13 ; 1* + + brlo _div_next ; 1/2* + sub r22, r10 ; 1 + sbc r23, r11 ; 1* + sbc r24, r12 ; 1* + sbc r25, r13 ; 1* + inc r6 ; 1 _div_next: - dec r21 - brne _div_loop + dec r21 ; 1 + brne _div_loop ; 1/2* - ijmp + ijmp ; 2 exec_cmp: - movw r22, r6 - movw r24, r8 - sub r22, r10 - sbc r23, r11 - sbc r24, r12 - sbc r25, r13 - in r21, SREG - andi r21, 0x1d - - or r25, r24 - or r25, r23 - or r25, r22 - brne _cmp_nz - ori r21, 0x02 + movw r22, r6 ; 1 + movw r24, r8 ; 1 + sub r22, r10 ; 1 + sbc r23, r11 ; 1* + sbc r24, r12 ; 1* + sbc r25, r13 ; 1* + in r21, SREG ; 1* + andi r21, 0x1d ; 1 + + or r25, r24 ; 1 + or r25, r23 ; 1 + or r25, r22 ; 1 + brne _cmp_nz ; 1/2* + ori r21, 0x02 ; 1 _cmp_nz: - ldi r25, 0xe0 - and r14, r25 - or r14, r21 - rjmp _dispatch_done_writeback_fixedflags + ldi r25, 0xe0 ; 1 + and r14, r25 ; 1 + or r14, r21 ; 1 + rjmp _dispatch_done_writeback_fixedflags ; 2 exec_test: - clr r0 - mov r25, r14 - andi r25, 0xf9 - - mov r24, r6 - and r24, r10 - or r0, r24 - - mov r24, r7 - and r24, r11 - or r0, r24 - - mov r24, r8 - and r24, r12 - or r0, r24 - - mov r24, r9 - and r24, r13 - sbrc r24, 7 - ori r25, 0x04 - or r0, r24 - - breq _test_z - ori r25, 0x02 + clr r0 ; 1 + mov r25, r14 ; 1 + andi r25, 0xf9 ; 1 + + mov r24, r6 ; 1 + and r24, r10 ; 1 + or r0, r24 ; 1 + + mov r24, r7 ; 1 + and r24, r11 ; 1 + or r0, r24 ; 1 + + mov r24, r8 ; 1 + and r24, r12 ; 1 + or r0, r24 ; 1 + + mov r24, r9 ; 1 + and r24, r13 ; 1 + sbrc r24, 7 ; 1/2 + ori r25, 0x04 ; 1 + or r0, r24 ; 1 + + breq _test_z ; 1/2* + ori r25, 0x02 ; 1 _test_z: - mov r14, r25 - rjmp _dispatch_done_writeback_fixedflags + mov r14, r25 ; 1 + rjmp _dispatch_done_writeback_fixedflags ; 2 exec_jal_with_ve: ; Change destination pointer to V[E] - ldi r24, 0x0e*4 - clr r25 - ldi r16, LOW(interp_regs) - ldi r17, HIGH(interp_regs) - add r16, r24 - adc r17, r25 + ldi r24, 0x0e*4 ; 1 + clr r25 ; 1 + ldi r16, LOW(interp_regs) ; 1 + ldi r17, HIGH(interp_regs) ; 1 + add r16, r24 ; 1 + adc r17, r25 ; 1* ; Fall-through to normal jump-and-link code exec_jal: - movw r6, r2 - clr r8 - clr r9 - movw r2, r10 - rjmp _dispatch_done_writeback_reg + movw r6, r2 ; 1 + clr r8 ; 1 + clr r9 ; 1 + movw r2, r10 ; 1 + rjmp _dispatch_done_writeback_reg ; 2 exec_shl: - clr r24 ; Zero for adding carries - clr r25 ; To accumulate carries - mov r1, r9 ; For overflow flag + clr r24 ; 1 Zero for adding carries + clr r25 ; 1 To accumulate carries + mov r1, r9 ; 1 For overflow flag _shl_loop: - dec r10 ; Decrement counter - brlt _shl_done + dec r10 ; 1 Decrement counter + brlt _shl_done ; 1/2* - lsl r6 ; Shift left by a bit - rol r7 - rol r8 - rol r9 - adc r25, r24 ; Accumulate carries + lsl r6 ; 1 Shift left by a bit + rol r7 ; 1* + rol r8 ; 1* + rol r9 ; 1* + adc r25, r24 ; 1* Accumulate carries - rjmp _shl_loop + rjmp _shl_loop ; 2 _shl_done: - mov r24, r14 ; Discard overflow and carry flags - andi r24, 0xf6 + mov r24, r14 ; 1 Discard overflow and carry flags + andi r24, 0xf6 ; 1 ; Set carry flag if any bits were shifted out - tst r25 - breq _shl_no_carry - ori r24, 0x01 + tst r25 ; 1 + breq _shl_no_carry ; 1/2* + ori r24, 0x01 ; 1 _shl_no_carry: ; Set overflow flag if sign changed - eor r1, r9 - sbrc r1, 7 - ori r24, 0x04 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + ori r24, 0x04 ; 1 - mov r14, r24 + mov r14, r24 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_shrl: - clr r24 ; Zero for adding carries - clr r25 ; To accumulate carries - mov r1, r9 ; For overflow flag + clr r24 ; 1 Zero for adding carries + clr r25 ; 1 To accumulate carries + mov r1, r9 ; 1 For overflow flag _shrl_loop: - dec r10 ; Decrement counter - brlt _shrl_done + dec r10 ; 1 Decrement counter + brlt _shrl_done ; 1/2* - lsr r9 - ror r8 - ror r7 - ror r6 - adc r25, r24 ; Accumulate carries + lsr r9 ; 1 + ror r8 ; 1* + ror r7 ; 1* + ror r6 ; 1* + adc r25, r24 ; 1* Accumulate carries - rjmp _shrl_loop + rjmp _shrl_loop ; 2 _shrl_done: - mov r24, r14 ; Discard overflow and carry flags - andi r24, 0xf6 + mov r24, r14 ; 1 Discard overflow and carry flags + andi r24, 0xf6 ; 1 ; Set carry flag if any bits were shifted out - tst r25 - breq _shrl_no_carry - ori r24, 0x01 + tst r25 ; 1 + breq _shrl_no_carry ; 1/2* + ori r24, 0x01 ; 1 _shrl_no_carry: ; Set overflow flag if sign changed - eor r1, r9 - sbrc r1, 7 - ori r24, 0x04 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + ori r24, 0x04 ; 1 - mov r14, r24 + mov r14, r24 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_shra: - clr r24 ; Zero for adding carries - clr r25 ; To accumulate carries + clr r24 ; 1 Zero for adding carries + clr r25 ; 1 To accumulate carries _shra_loop: - dec r10 ; Decrement counter - brlt _shra_done + dec r10 ; 1 Decrement counter + brlt _shra_done ; 1/2* - asr r9 - ror r8 - ror r7 - ror r6 - adc r25, r24 ; Accumulate carries + asr r9 ; 1 + ror r8 ; 1* + ror r7 ; 1* + ror r6 ; 1* + adc r25, r24 ; 1* Accumulate carries - rjmp _shra_loop + rjmp _shra_loop ; 2 _shra_done: - mov r24, r14 ; Discard overflow and carry flags - andi r24, 0xf6 + mov r24, r14 ; 1 Discard overflow and carry flags + andi r24, 0xf6 ; 1 ; Set carry flag if any bits were shifted out - tst r25 - breq _shra_no_carry - ori r24, 0x01 + tst r25 ; 1 + breq _shra_no_carry ; 1/2* + ori r24, 0x01 ; 1 _shra_no_carry: ; Sign will never change, leave overflow flag clear - mov r14, r24 + mov r14, r24 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_rol: - mov r1, r9 ; For overflow flag + mov r1, r9 ; 1 For overflow flag _rol_loop: - dec r10 ; Decrement counter - brlt _rol_done - - clc ; Pull highest bit into carry - sbrc r9, 7 - sec - rol r6 - rol r7 - rol r8 - rol r9 - - rjmp _rol_loop + dec r10 ; 1 Decrement counter + brlt _rol_done ; 1/2* + + clc ; 1 Pull highest bit into carry + sbrc r9, 7 ; 1/2 + sec ; 1 + rol r6 ; 1* + rol r7 ; 1* + rol r8 ; 1* + rol r9 ; 1* + + rjmp _rol_loop ; 2 _rol_done: - mov r24, r14 ; Discard overflow and carry flags - andi r24, 0xf6 + mov r24, r14 ; 1 Discard overflow and carry flags + andi r24, 0xf6 ; 1 ; No bits will be lost, leave carry flag clear ; Set overflow flag if sign changed - eor r1, r9 - sbrc r1, 7 - ori r24, 0x04 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + ori r24, 0x04 ; 1 - mov r14, r24 + mov r14, r24 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_ror: - mov r1, r9 ; For overflow flag + mov r1, r9 ; 1 For overflow flag _ror_loop: - dec r10 ; Decrement counter - brlt _ror_done - - clc ; Pull lowest bit into carry - sbrc r6, 0 - sec - ror r9 - ror r8 - ror r7 - ror r6 - - rjmp _ror_loop + dec r10 ; 1 Decrement counter + brlt _ror_done ; 1/2* + + clc ; 1 Pull lowest bit into carry + sbrc r6, 0 ; 1/2 + sec ; 1 + ror r9 ; 1* + ror r8 ; 1* + ror r7 ; 1* + ror r6 ; 1* + + rjmp _ror_loop ; 2 _ror_done: - mov r14, r14 ; Discard overflow and carry flags - andi r24, 0xf6 + mov r14, r14 ; 1 Discard overflow and carry flags + andi r24, 0xf6 ; 1 ; No bits will be lost, leave carry flag clear ; Set overflow flag if sign changed - eor r1, r9 - sbrc r1, 7 - ori r24, 0x04 + eor r1, r9 ; 1 + sbrc r1, 7 ; 1/2 + ori r24, 0x04 ; 1 - mov r14, r24 + mov r14, r24 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_spi: - movw ZL, r6 + movw ZL, r6 ; 1 _spi_byte_loop: - dec r10 - brlt _spi_done - ld r25, Z - out SPDR, r25 + dec r10 ; 1 + brlt _spi_done ; 1/2* + ld r25, Z ; 2 + out SPDR, r25 ; 1 _spi_wait_loop: - in r25, SPSR - sbrs r25, SPIF - rjmp _spi_wait_loop - in r25, SPDR - st Z+, r25 - rjmp _spi_byte_loop + in r25, SPSR ; 1 + sbrs r25, SPIF ; 1/2 + rjmp _spi_wait_loop ; 2 + in r25, SPDR ; 1 + st Z+, r25 ; 2 + rjmp _spi_byte_loop ; 2 _spi_done: - rjmp _dispatch_done + rjmp _dispatch_done ; 2 exec_mft: ; Restrict operand to 0-f - ldi r25, 0x0f - and r10, r25 + ldi r25, 0x0f ; 1 + and r10, r25 ; 1 - lsl r10 - clr r25 + lsl r10 ; 1 + clr r25 ; 1 - ldi ZL, LOW(interp_clocks) - ldi ZH, HIGH(interp_clocks) - add ZL, r10 - adc ZH, r25 + ldi ZL, LOW(interp_clocks) ; 1 + ldi ZH, HIGH(interp_clocks) ; 1 + add ZL, r10 ; 1 + adc ZH, r25 ; 1* - ld r6, Z+ - ld r7, Z+ - clr r8 - clr r9 + ld r6, Z+ ; 2 + ld r7, Z+ ; 2 + clr r8 ; 1 + clr r9 ; 1 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_mtt: ; Restrict operand to 0-f - ldi r25, 0x0f - and r10, r25 + ldi r25, 0x0f ; 1 + and r10, r25 ; 1 - lsl r10 - clr r25 + lsl r10 ; 1 + clr r25 ; 1 - ldi ZL, LOW(interp_clocks) - ldi ZH, HIGH(interp_clocks) - add ZL, r10 - adc ZH, r25 + ldi ZL, LOW(interp_clocks) ; 1 + ldi ZH, HIGH(interp_clocks) ; 1 + add ZL, r10 ; 1 + adc ZH, r25 ; 1* - st Z+, r6 - st Z+, r7 + st Z+, r6 ; 2 + st Z+, r7 ; 2 - rjmp _dispatch_done + rjmp _dispatch_done ; 2 exec_ddir: ; Restrict operand to 0-f - ldi r25, 0x0f - and r10, r25 + ldi r25, 0x0f ; 1 + and r10, r25 ; 1 ; Extract LSB from first operand - ldi r23, 0x01 - and r6, r23 - clr r7 + ldi r23, 0x01 ; 1 + and r6, r23 ; 1 + clr r7 ; 1 ; Mask of all bits except LSB - ldi r24, 0xfe - ldi r25, 0xff + ldi r24, 0xfe ; 1 + ldi r25, 0xff ; 1 ; Rotate LSB and mask into position specified by second operand _ddir_loop: - dec r10 - brlt _din_loop_done + dec r10 ; 1 + brlt _din_loop_done ; 1/2* - sec - rol r24 - rol r25 + sec ; 1 + rol r24 ; 1* + rol r25 ; 1* - clc - rol r6 - rol r7 + clc ; 1 + rol r6 ; 1* + rol r7 ; 1* - rjmp _ddir_loop + rjmp _ddir_loop ; 2 _ddir_loop_done: ; Read-modify-write - in r22, DDRB - in r23, DDRC - and r22, r24 - and r23, r25 - or r22, r6 - or r23, r7 - out DDRB, r22 - out DDRC, r23 + in r22, DDRB ; 1 + in r23, DDRC ; 1 + and r22, r24 ; 1 + and r23, r25 ; 1 + or r22, r6 ; 1 + or r23, r7 ; 1 + out DDRB, r22 ; 1 + out DDRC, r23 ; 1 - rjmp _dispatch_done + rjmp _dispatch_done ; 2 exec_din: ; Restrict operand to 0-f - ldi r25, 0x0f - and r10, r25 + ldi r25, 0x0f ; 1 + and r10, r25 ; 1 ; Read port values - in r24, PINB - in r25, PINC + in r24, PINB ; 1 + in r25, PINC ; 1 ; Shift desired value into LSB _din_loop: - dec r10 - brlt _din_loop_done + dec r10 ; 1 + brlt _din_loop_done ; 1/2* - clc - ror r25 - ror r24 + clc ; 1 + ror r25 ; 1* + ror r24 ; 1* - rjmp _din_loop + rjmp _din_loop ; 2 _din_loop_done: ; Extract port LSB and put into result LSB - andi r24, 0x01 - ldi r25, 0xfe - and r6, r25 - or r6, r24 + andi r24, 0x01 ; 1 + ldi r25, 0xfe ; 1 + and r6, r25 ; 1 + or r6, r24 ; 1 - rjmp _dispatch_done_writeback_flags + rjmp _dispatch_done_writeback_flags ; 2 exec_dout: ; Restrict operand to 0-f - ldi r25, 0x0f - and r10, r25 + ldi r25, 0x0f ; 1 + and r10, r25 ; 1 ; Extract LSB from first operand - ldi r23, 0x01 - and r6, r23 - clr r7 + ldi r23, 0x01 ; 1 + and r6, r23 ; 1 + clr r7 ; 1 ; Mask of all bits except LSB - ldi r24, 0xfe - ldi r25, 0xff + ldi r24, 0xfe ; 1 + ldi r25, 0xff ; 1 ; Rotate LSB and mask into position specified by second operand _dout_loop: - dec r10 - brlt _dout_loop_done + dec r10 ; 1 + brlt _dout_loop_done ; 1/2* - sec - rol r24 - rol r25 + sec ; 1 + rol r24 ; 1* + rol r25 ; 1* - clc - rol r6 - rol r7 + clc ; 1 + rol r6 ; 1* + rol r7 ; 1* - rjmp _dout_loop + rjmp _dout_loop ; 2 _dout_loop_done: ; Read-modify-write - in r22, PORTB - in r23, PORTC - and r22, r24 - and r23, r25 - or r22, r6 - or r23, r7 - out PORTB, r22 - out PORTC, r23 + in r22, PORTB ; 1 + in r23, PORTC ; 1 + and r22, r24 ; 1 + and r23, r25 ; 1 + or r22, r6 ; 1 + or r23, r7 ; 1 + out PORTB, r22 ; 1 + out PORTC, r23 ; 1 - rjmp _dispatch_done + rjmp _dispatch_done ; 2 exec_ain: ; Set the ADC source - lds r25, ADMUX - andi r25, 0xf0 - mov r24, r10 - andi r24, 0x0f - or r25, r24 - sts ADMUX, r25 + lds r25, ADMUX ; 2 + andi r25, 0xf0 ; 1 + mov r24, r10 ; 1 + andi r24, 0x0f ; 1 + or r25, r24 ; 1 + sts ADMUX, r25 ; 2 ; Trigger a single conversion - lds r25, ADCSRA - ori r25, (1 << ADSC) - sts ADCSRA, r25 + lds r25, ADCSRA ; 2 + ori r25, (1 << ADSC) ; 1 + sts ADCSRA, r25 ; 2 ; Wait for conversion to complete _ain_wait: - lds r25, ADCSRA - sbrs r25, ADIF - rjmp _ain_wait - sts ADCSRA, r25 + lds r25, ADCSRA ; 2 + sbrs r25, ADIF ; 1/2 + rjmp _ain_wait ; 2 + sts ADCSRA, r25 ; 2 ; Read value from ADC - lds r6, ADCL - lds r7, ADCH - clr r8 - clr r9 + lds r6, ADCL ; 2 + lds r7, ADCH ; 2 + clr r8 ; 1 + clr r9 ; 1 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_aout: ; Restrict operand to 0-7 - ldi r25, 0x07 - and r10, r25 + ldi r25, 0x07 ; 1 + and r10, r25 ; 1 - clr r25 + clr r25 ; 1 - ldi ZL, LOW(_aout_jtab) - ldi ZH, HIGH(_aout_jtab) - add ZL, r10 - adc ZH, r25 - ijmp + ldi ZL, LOW(_aout_jtab) ; 1 + ldi ZH, HIGH(_aout_jtab) ; 1 + add ZL, r10 ; 1 + adc ZH, r25 ; 1* + ijmp ; 2 _aout_jtab: - rjmp _aout_ocr0a - rjmp _aout_ocr0b - rjmp _aout_ocr1a - rjmp _aout_ocr1b - rjmp _aout_ocr2a - rjmp _aout_ocr2b - rjmp _aout_done - rjmp _aout_done + rjmp _aout_ocr0a ; 2 + rjmp _aout_ocr0b ; 2 + rjmp _aout_ocr1a ; 2 + rjmp _aout_ocr1b ; 2 + rjmp _aout_ocr2a ; 2 + rjmp _aout_ocr2b ; 2 + rjmp _aout_done ; 2 + rjmp _aout_done ; 2 _aout_ocr0a: - out OCR0A, r6 - rjmp _aout_done + out OCR0A, r6 ; 1 + rjmp _aout_done ; 2 _aout_ocr0b: - out OCR0B, r6 - rjmp _aout_done + out OCR0B, r6 ; 1 + rjmp _aout_done ; 2 _aout_ocr1a: - sts OCR1AH, r7 - sts OCR1AL, r8 - rjmp _aout_done + sts OCR1AH, r7 ; 2 + sts OCR1AL, r8 ; 2 + rjmp _aout_done ; 2 _aout_ocr1b: - sts OCR1BH, r7 - sts OCR1BL, r8 - rjmp _aout_done + sts OCR1BH, r7 ; 2 + sts OCR1BL, r8 ; 2 + rjmp _aout_done ; 2 _aout_ocr2a: - sts OCR2A, r6 - rjmp _aout_done + sts OCR2A, r6 ; 2 + rjmp _aout_done ; 2 _aout_ocr2b: - sts OCR2B, r6 - rjmp _aout_done + sts OCR2B, r6 ; 2 + rjmp _aout_done ; 2 _aout_done: - rjmp _dispatch_done + rjmp _dispatch_done ; 2 exec_ldb: ; Load byte - movw ZL, r10 - ld r6, Z+ + movw ZL, r10 ; 1 + ld r6, Z+ ; 2 ; Sign extend - clr r0 - sbrc r6, 7 - com r0 - mov r7, r0 - mov r8, r0 - mov r9, r0 + clr r0 ; 1 + sbrc r6, 7 ; 1/2 + com r0 ; 1 + mov r7, r0 ; 1 + mov r8, r0 ; 1 + mov r9, r0 ; 1 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_ldh: ; Load halfword - movw ZL, r10 - ld r6, Z+ - ld r7, Z+ + movw ZL, r10 ; 1 + ld r6, Z+ ; 2 + ld r7, Z+ ; 2 ; Sign extend - clr r0 - sbrc r7, 7 - com r0 - mov r8, r0 - mov r9, r0 + clr r0 ; 1 + sbrc r7, 7 ; 1/2 + com r0 ; 1 + mov r8, r0 ; 1 + mov r9, r0 ; 1 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_ldw: ; Load word - movw ZL, r10 - ld r6, Z+ - ld r7, Z+ - ld r8, Z+ - ld r9, Z+ + movw ZL, r10 ; 1 + ld r6, Z+ ; 2 + ld r7, Z+ ; 2 + ld r8, Z+ ; 2 + ld r9, Z+ ; 2 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_lpb: ; Load byte - movw ZL, r10 - lpm r6, Z+ + movw ZL, r10 ; 1 + lpm r6, Z+ ; 3 ; Sign extend - clr r0 - sbrc r6, 7 - com r0 - mov r7, r0 - mov r8, r0 - mov r9, r0 + clr r0 ; 1 + sbrc r6, 7 ; 1/2 + com r0 ; 1 + mov r7, r0 ; 1 + mov r8, r0 ; 1 + mov r9, r0 ; 1 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_lph: ; Load halfword - movw ZL, r10 - lpm r6, Z+ - lpm r7, Z+ + movw ZL, r10 ; 1 + lpm r6, Z+ ; 3 + lpm r7, Z+ ; 3 ; Sign extend - clr r0 - sbrc r7, 7 - com r0 - mov r8, r0 - mov r9, r0 + clr r0 ; 1 + sbrc r7, 7 ; 1/2 + com r0 ; 1 + mov r8, r0 ; 1 + mov r9, r0 ; 1 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_lpw: ; Load word - movw ZL, r10 - lpm r6, Z+ - lpm r7, Z+ - lpm r8, Z+ - lpm r9, Z+ + movw ZL, r10 ; 1 + lpm r6, Z+ ; 3 + lpm r7, Z+ ; 3 + lpm r8, Z+ ; 3 + lpm r9, Z+ ; 3 - rjmp _dispatch_done_writeback_reg + rjmp _dispatch_done_writeback_reg ; 2 exec_stb: - movw ZL, r10 - st Z+, r6 - rjmp _dispatch_done + movw ZL, r10 ; 1 + st Z+, r6 ; 2 + rjmp _dispatch_done ; 2 exec_sth: - movw ZL, r10 - st Z+, r6 - st Z+, r7 - rjmp _dispatch_done + movw ZL, r10 ; 1 + st Z+, r6 ; 2 + st Z+, r7 ; 2 + rjmp _dispatch_done ; 2 exec_stw: - movw ZL, r10 - st Z+, r6 - st Z+, r7 - st Z+, r8 - st Z+, r9 - rjmp _dispatch_done + movw ZL, r10 ; 1 + st Z+, r6 ; 2 + st Z+, r7 ; 2 + st Z+, r8 ; 2 + st Z+, r9 ; 2 + rjmp _dispatch_done ; 2 exec_ext: ; Can't use regular call/ret instructions, they take more than 3 cycles ; Put the return address into temporaries - ldi r24, LOW(_ext_done) - ldi r25, HIGH(_ext_done) + ldi r24, LOW(_ext_done) ; 1 + ldi r25, HIGH(_ext_done) ; 1 ; Execute at the target address - movw ZL, r10 - ijmp + movw ZL, r10 ; 1 + ijmp ; 2 _ext_done: - rjmp _dispatch_done + rjmp _dispatch_done ; 2 exec_jtab: - add r10, r6 ; Add V[X] to PC+sext(nn) - adc r11, r7 - movw r2, r10 - rjmp _dispatch_done + add r10, r6 ; 1 Add V[X] to PC+sext(nn) + adc r11, r7 ; 1* + movw r2, r10 ; 1 + rjmp _dispatch_done ; 2 exec_blt: - sbrc r14, 4 - movw r2, r10 ; Branch if S bit is set - rjmp _dispatch_done + sbrc r14, 4 ; 1/2 + movw r2, r10 ; 1 Branch if S bit is set + rjmp _dispatch_done ; 2 exec_bge: - sbrs r14, 4 - movw r2, r10 ; Branch if S bit is clear - rjmp _dispatch_done + sbrs r14, 4 ; 1/2 + movw r2, r10 ; 1 Branch if S bit is clear + rjmp _dispatch_done ; 2 exec_bv: - sbrc r14, 3 - movw r2, r10 ; Branch if V bit is set - rjmp _dispatch_done + sbrc r14, 3 ; 1/2 + movw r2, r10 ; 1 Branch if V bit is set + rjmp _dispatch_done ; 2 exec_bnv: - sbrs r14, 3 - movw r2, r10 ; Branch if V bit is clear - rjmp _dispatch_done + sbrs r14, 3 ; 1/2 + movw r2, r10 ; 1 Branch if V bit is clear + rjmp _dispatch_done ; 2 exec_bmi: - sbrc r14, 2 - movw r2, r10 ; Branch if N bit is set - rjmp _dispatch_done + sbrc r14, 2 ; 1/2 + movw r2, r10 ; 1 Branch if N bit is set + rjmp _dispatch_done ; 2 exec_bpl: - sbrs r14, 2 - movw r2, r10 ; Branch if N bit is clear - rjmp _dispatch_done + sbrs r14, 2 ; 1/2 + movw r2, r10 ; 1 Branch if N bit is clear + rjmp _dispatch_done ; 2 exec_bz: - sbrc r14, 1 - movw r2, r10 ; Branch if Z bit is set - rjmp _dispatch_done + sbrc r14, 1 ; 1/2 + movw r2, r10 ; 1 Branch if Z bit is set + rjmp _dispatch_done ; 2 exec_bnz: - sbrs r14, 1 - movw r2, r10 ; Branch if Z bit is clear - rjmp _dispatch_done + sbrs r14, 1 ; 1/2 + movw r2, r10 ; 1 Branch if Z bit is clear + rjmp _dispatch_done ; 2 exec_c: - sbrc r14, 0 - movw r2, r10 ; Branch if C bit is set - rjmp _dispatch_done + sbrc r14, 0 ; 1/2 + movw r2, r10 ; 1 Branch if C bit is set + rjmp _dispatch_done ; 2 exec_nc: - sbrs r14, 0 - movw r2, r10 ; Branch if C bit is clear - rjmp _dispatch_done + sbrs r14, 0 ; 1/2 + movw r2, r10 ; 1 Branch if C bit is clear + rjmp _dispatch_done ; 2 exec_jmp: - movw r2, r10 - rjmp _dispatch_done + movw r2, r10 ; 1 + rjmp _dispatch_done ; 2 ; ------------------------------------------------------------------------------