diff --git a/test/femto.tal b/test/femto.tal new file mode 100644 index 0000000..d2b3ce5 --- /dev/null +++ b/test/femto.tal @@ -0,0 +1,1730 @@ +( femto.tal ) +( ) +( requires terminal to be in raw mode ) +( see femto launcher script for more details ) + +( TODO: ) +( - get long line truncation/scrolling working ) +( - allow line numbers to be toggled off ) +( - open file command? ) +( - close file command? ) +( - search&replace ) + +|00 @System [ + &vector $2 &wst $1 &rst $1 &pad $4 + &r $2 &g $2 &b $2 &debug $1 &halt $1 ] + +|10 @Console [ + &vector $2 &stdin $1 &pad1 $1 &proc-get $1 &host-get $1 &pad2 $1 &type $1 + &stdout $1 &stderr $1 &proc-put $1 &pad3 $1 ¶m $2 &opts $1 &host-put $1 ] + +|a0 @File [ + &vector $2 &success $2 &stat $2 &delete $1 &append $1 + &name $2 &length $2 &read $2 &write $2 ] + +( MAX file size is currently #ce80, i.e. 52864 bytes ) + +%dbg { #ff .System/debug DEO } +%emit { .Console/stdout DEO } +%sp { #2018 DEO } +%nl { #0a18 DEO } +%cr { #0d18 DEO } +%crlf { cr nl } +%ansi { #1b18 DEO #5b18 DEO } + +( emit macros ) +( ) +( these save one byte and are easier to read. ) +%emit-! { LIT2 "! 18 DEO } +%emit-$ { LIT2 "$ 18 DEO } +%emit-lpar { LIT2 28 18 DEO } +%emit-rpar { LIT2 29 18 DEO } +%emit-, { LIT2 ", 18 DEO } +%emit-0 { LIT2 "0 18 DEO } +%emit-1 { LIT2 "1 18 DEO } +%emit-2 { LIT2 "2 18 DEO } +%emit-3 { LIT2 "3 18 DEO } +%emit-6 { LIT2 "6 18 DEO } +%emit-7 { LIT2 "7 18 DEO } +%emit-: { LIT2 ": 18 DEO } +%emit-; { LIT2 "; 18 DEO } +%emit-C { LIT2 "C 18 DEO } +%emit-H { LIT2 "H 18 DEO } +%emit-J { LIT2 "J 18 DEO } +%emit-K { LIT2 "K 18 DEO } +%emit-[ { LIT2 "[ 18 DEO } +%emit-] { LIT2 "] 18 DEO } +%emit-m { LIT2 "m 18 DEO } +%emit-n { LIT2 "n 18 DEO } +%emit-~ { LIT2 "~ 18 DEO } + +%lmargin { #0006 } + +( zero page ) +|0000 + +( terminal size information ) +( ) +( for now these are constant but eventually we could detect terminal resizes ) +@term [ + &cols $2 ( relative x coordinate of cursor, from 0 ) + &rows $2 ( relative y coordinate of cursor, from 1 ) +] + +( configuration settings used when editing ) +( ) +( these can be changed at any time without breaking anything else ) +@config [ + &tab-width $2 ( how many spaces to display tab chars ) + &tab-adjust $2 ( how many "extra" spaces; tab-width - 1 ) + &insert-tabs $1 ( tab key inserts tabs when true ) + &color $2 ( digits of highlight color in reverse order ) + &red $2 ( digits of color for line/EOF markers in reverse order ) +] + +( tracks information related to the buffer's view of data ) +( ) +( limit and line-count change when we modify the buffer. ) +( offset and line-offset change when we move the view port. ) +@buffer [ + &limit $2 ( last byte of actual data (not including \0) + 1 ) + &line-count $2 ( total number of lines in file ) + &offset $2 ( first byte of data visible in terminal ) + &line-offset $2 ( first line of text visible in terminal ) +] + +( relative cursor positions, e.g. 0 to cols-1 ) +( ) +( when these change we may need to move the view port as well ) +@cursor [ + &col $2 ( current column value 0-n (may exceed lenght of row) ) + &row $2 ( current relative row value, 0-(height-1) ) +] + +( tracks overall editor state between events ) +@state [ + &in-help $1 ( are we showing help? ) + &in-undo $1 ( are we currently in undo? ) + &key $1 ( last key read ) + &saw-esc $1 ( did we just see ESC? ) + &saw-xterm $1 ( did we just see an ESC [ xterm sequence? ) + &saw-vt $1 ( did we just see an ESC [ $N vt sequence? ) + &redraw $1 ( redrawing: bits determine which parts ) + ( 0x01 cursor ) + ( 0x02 statusbar ) + ( 0x04 prompt ) + ( 0x08 matches ) + ( 0x10 body and everything else ) + &message $1 ( did we just print a message? ) + &modified $1 ( has the buffer been modified? ) + &quitting $1 ( are we in the process of quitting? ) +] + +( prompt uses .tmp/pos and .tmp/data to track user input ) +@prompt [ + &active $1 ( is prompt currently active? ) + &vector $2 ( what code to run when user responds ) + &string $2 ( string to print for the prompt ) +] + +( temporary input buffer used for a variety of things ) +@tmp [ + &pos $2 ( temporary pointer to address when reading data ) + &data $80 ( small scratch pad when reading data ) +] + +( search uses .tmp/pos and .tmp/data to track query string ) +@searching [ + &active $1 ( are we displaying search results? ) + &orig-row $2 ( row we began the search at ) + &orig-col $2 ( col we began the search at ) + ®ex $2 ( regex to be stored if any ) + &start $2 ( absolute start pos of match ) + &end $2 ( absolute limit pos of match ) +] + +( startup ) +|0100 + ( init zero page ) + #0050 .term/cols STZ2 + #0018 .term/rows STZ2 + #0004 .config/tab-width STZ2 + #0003 .config/tab-adjust STZ2 + #00 .config/insert-tabs STZ +( #3333 .config/color STZ2 ) +( #3133 .config/red STZ2 ) + #3033 .config/color STZ2 + #3033 .config/red STZ2 + ;data .buffer/offset STZ2 + + init-terminal + + ( start reading the filename from argv ) + ;filename .tmp/pos STZ2 + ;read-filename .Console/vector DEO2 + BRK + +( run a given command with no input/output ) +@run-cmd ( cmd* -> ) + .Console/param DEO2 + #01 .Console/opts DEO + #01 .Console/host-put DEO JMP2r + +( restore the terminal to "normal" settings ) +@restore-terminal + ;stty-sane !run-cmd + +( put the terminal in raw mode ) +@init-terminal ( -> ) + ;stty-raw !run-cmd + +( command strings to use ) +@stty-sane "stty 20 "sane 00 +@stty-raw "stty 20 "raw 20 "-echo 00 + +( import uxn regex library ) +~regex.tal + +( ERROR HANDLING ) + +( using errorq will print the given message before causing ) +( the interpreter to halt. ) +@errorq ( msg* -> ) + emit-! sp print nl dbg BRK + +( open the given file at editor start up ) +( ) +( this is called during startup by ;read-filename ) +( ) +( TODO: enable closing/opening files with editor already running ) +@open-file ( filename* -> ) + .File/name DEO2 + #ce81 .File/length DEO2 + ;data .File/read DEO2 + + .File/success DEI2 #0000 EQU2 .state/modified STZ + .File/success DEI2 #ce81 LTH2 ?&ok + crlf + ;messages/input-error print + ;filename print crlf !quit-now + + ( calculate buffer limit address using start + size ) + &ok .File/success DEI2 ;data ADD2 .buffer/limit STZ2 + JMP2r + +( ask the terminal for its size ) +( ) +( called during editor initialization by ;read-filename ) +( ) +( TODO: consider supporting terminal resizing ) +@setup-terminal-size ( -> ) + #03e7 DUP2 term-move-cursor + term-get-cursor-position + ;tmp/data .tmp/pos STZ2 + ;receive-terminal-size .Console/vector DEO2 + JMP2r + +( receive size information from the terminal ) +( ) +( called from Console/vector after ;setup-terminal-size ) +@receive-terminal-size ( -> ) + .Console/stdin DEI .state/key STZ + .state/key LDZ .tmp/pos LDZ2 STA + .tmp/pos LDZ2 INC2 .tmp/pos STZ2 + .state/key LDZ LIT "R EQU ?parse-terminal-size + BRK + +( parse and store terminal size information ) +( ) +( called by ;receive-terminal-size after complete message received ) +@parse-terminal-size ( -> ) + #0000 ,&acc STR2 + .tmp/data LDZk #1b NEQ ?&parse-error ( i ) INC + LDZk LIT "[ NEQ ?&parse-error ( i ) INC + &loop + LDZk LIT "; EQU ?&parse-col + LIT2r :&loop !&read + &parse-col + INC ,&acc LDR2 #0002 SUB2 .term/rows STZ2 + #0000 ,&acc STR2 + &loop2 + LDZk LIT "R EQU ?&done + LIT2r :&loop2 !&read + &read + LDZk LIT "0 SUB #00 SWP + ,&acc LDR2 #000a MUL2 ADD2 ,&acc STR2 + INC JMP2r + &done + ,&acc LDR2 .term/cols STZ2 POP + ;on-key .Console/vector DEO2 + draw-all + BRK + [ &acc $2 ] + &parse-error POP .tmp/data LDZ2 + ;messages/term-size-parse-error !errorq + +@count-c ( c^ -> n* ) + #0000 ,&count STR2 + STH ;data + &loop LDAk #00 EQU ?&done + LDAk STHkr NEQ ?&next + ,&count LDR2 INC2 ,&count STR2 + &next INC2 !&loop + &done POP2 POPr ,&count LDR2 JMP2r [ &count $2 ] + +( save count of number of lines in input file ) +( ) +( this method also detects whether \t characters are used, ) +( and uses this to initialize config/insert-tabs. ) +@setup-linecount ( -> ) + #0a count-c INC2 .buffer/line-count STZ2 + #09 count-c #0000 GTH2 .config/insert-tabs STZ + JMP2r + +( reads filename from the program's argv ) +( ) +( currently femto must be given a file to edit, and reading this ) +( filename is the first thing that happens in ;startup. ) +( ) +( TODO: support other situations, such as: ) +( - launching femto without a file name ) +( - closing the given file and opening a new one ) +@read-filename ( -> ) + #12 DEI #0a EQU ?&execute ( did we read \n ? ) + #12 DEI .tmp/pos LDZ2 STA ( no, so save in buffer ) + .tmp/pos LDZ2 INC2 .tmp/pos STZ2 ( pos++ ) + BRK ( return ) + + &execute ( we saw a newline, so do something ) + #00 .tmp/pos LDZ2 STA ( null terminate str ) + ;filename open-file ( open file ) + setup-linecount ( determine # of lines ) + setup-terminal-size ( detect terminal dimensions ) + BRK + +( jump to beginning of line ) +@bol ( -> ) + #0000 .cursor/col STZ2 + redraw-statusbar-and-cursor + !return + +( jump to beginning of line ) +@eol ( -> ) + cur-len .cursor/col STZ2 + redraw-statusbar-and-cursor + !return + +@forward ( -> ) + go-forward !return + +( move forward by one character ) +@go-forward ( -> ) + cur-pos last-pos GTH2 ( ?return ) ?&noop + redraw-statusbar-and-cursor + cur-col cur-len LTH2 ?&normal + #0000 .cursor/col STZ2 + .cursor/row LDZ2 INC2 .cursor/row STZ2 + !ensure-visible-cursor + &normal + cur-col INC2 .cursor/col STZ2 + &noop JMP2r + +( move backward by one character ) +@back ( -> ) + go-back !return + +( internal implementation shared by ;back and ;backspace ) +@go-back ( -> ) + cur-pos ;data EQU2 ?&noop + cur-col #0001 LTH2 ?&next-line + cur-col #0001 SUB2 .cursor/col STZ2 + !redraw-statusbar-and-cursor + &next-line + .cursor/row LDZ2k #0001 SUB2 ROT STZ2 + cur-len .cursor/col STZ2 + ensure-visible-cursor + redraw-statusbar-and-cursor + &noop JMP2r + +( move up by one line ) +@up ( -> ) + .cursor/row LDZ2 #0000 EQU2 ?return + .cursor/row LDZ2 #0001 SUB2 .cursor/row STZ2 + ensure-visible-cursor + redraw-statusbar-and-cursor + !return + +( move down by one line ) +@down ( -> ) + .cursor/row LDZ2 + .buffer/line-count LDZ2 #0001 SUB2 EQU2 ?return + .cursor/row LDZ2 INC2 .cursor/row STZ2 + ensure-visible-cursor + redraw-statusbar-and-cursor + !return + +@is-word-char ( c^ -> bool^ ) + DUP #2f GTH OVR #3a LTH AND STH + DUP #40 GTH OVR #5b LTH AND STH ORAr + DUP #60 GTH SWP #7b LTH AND STHr ORA JMP2r + +@not-word-char ( c^ -> bool^ ) + is-word-char #00 EQU JMP2r + +@forward-by-word ( -> ) + cur-pos + &first + LDAk #00 EQU ?&done + LDAk is-word-char ?&second + INC2 go-forward !&first + &second + LDAk #00 EQU ?&done + LDAk not-word-char ?&done + INC2 go-forward !&second + &done + POP2 !return + +@back-by-word ( -> ) + cur-pos #0001 SUB2 + &first + DUP2 ;data LTH2 ?&done + LDAk is-word-char ?&second + #0001 SUB2 go-back !&first + &second + DUP2 ;data LTH2 ?&done + LDAk not-word-char ?&done + #0001 SUB2 go-back !&second + &done + POP2 !return + +@help + #01 .state/in-help STZ + term-erase-all + #0000 #0000 term-move-cursor + emit-color-bold + ;help-text print + emit-reset + redraw-all + BRK + +( center buffer view on the current line ) +@center-view + .term/rows LDZ2 INC2 #0002 DIV2 STH2k + .cursor/row LDZ2 LTH2 ?&standard + POP2r + #0000 .buffer/line-offset STZ2 + ;data .buffer/offset STZ2 + !&done + &standard + .cursor/row LDZ2 STH2r SUB2 + DUP2 .buffer/line-offset STZ2 + abs-line .buffer/offset STZ2 + &done + redraw-all !return + +( move up by one page ) +@page-up ( -> ) + .term/rows LDZ2 #0002 SUB2 STH2k + .buffer/line-offset LDZ2 LTH2 ?&move-full + POP2r + zero-row + #0000 .cursor/col STZ2 + !&done + &move-full + .cursor/row LDZ2 STH2kr SUB2 .cursor/row STZ2 + .buffer/line-offset LDZ2 STH2r SUB2 + DUP2 .buffer/line-offset STZ2 + abs-line .buffer/offset STZ2 + &done + redraw-all !return + +( move down by one page ) +@page-down + eof-is-visible ?&near-eof + .term/rows LDZ2 #0002 SUB2 STH2k + .buffer/line-offset LDZ2 ADD2 + DUP2 .buffer/line-offset STZ2 + abs-line .buffer/offset STZ2 + .cursor/row LDZ2 STH2r ADD2 .cursor/row STZ2 + redraw-all !return + &near-eof + .buffer/line-count LDZ2 #0001 SUB2 .cursor/row STZ2 + cur-len .cursor/col STZ2 + redraw-cursor !return + +( return true if the end of the file is visible ) +@eof-is-visible ( -> bool^ ) + .buffer/line-offset LDZ2 .term/rows LDZ2 ADD2 INC2 + .buffer/line-count LDZ2 + GTH2 JMP2r + +( beginning quitting femto, prompting if unsaved changes ) +@quit + #01 .state/quitting STZ + .state/modified LDZ #00 EQU ?quit-now + ;messages/quit-prompt ;messages/null ;do-quit start-prompt + redraw-prompt-and-cursor !return + +( display two strings on the message line ) +( ) +( often this involves a static messages + an argument like ;tmp/data. ) +( ) +( use messages/null for the second string if only one is needed. ) +@send-message ( s1* s2* -> ) + #01 .state/message STZ + move-to-message-line + SWP2 print !print + +( callback executed in response to the quit prompt. ) +@do-quit + .tmp/data LDZ LIT "n EQU ?quit-now + .tmp/data LDZ LIT "y EQU ?save + #00 .state/quitting STZ + ;messages/unknown-input ;tmp/data send-message + BRK + +( quit the editor and restore the terminal ) +( ) +( this definition is needed so the address can be used by JCN2. ) +@quit-now + ;finish-quit .Console/vector DEO2 + restore-terminal BRK + +@finish-quit + .Console/type DEI #81 NEQ ?{ #80 .System/halt DEO } BRK + +( label that calls BRK ) +( ) +( this definition is needed so the address can be used by JCN2. ) +@ignore + BRK + +( insert the given character at the cursor position ) +( ) +( this should not be called for newlines, see ;newline ) +@insert ( c^ -> ) + cur-pos shift-right + cur-col INC2 .cursor/col STZ2 + redraw-all !return + +( insert the given character in the prompt ) +@insert-prompt ( c^ -> ) + .tmp/pos LDZ2 STH2k STA ( data[pos] <- c ) + INC2r #00 STH2kr STA ( data[pos+1] <- 0 ) + STH2r .tmp/pos STZ2 ( pos <- pos+1 ) + redraw-prompt-and-cursor !return + +( insert a tab at the cursor position ) +( ) +( depending on the state of config/insert-tabs this will ) +( either call ;insert with \t or else insert a number of ) +( spaces based on .config/tab-width. ) +@insert-tab ( -> ) + .config/insert-tabs LDZ ?&use-tabs + #0000 .config/tab-width LDZ2 SUB2 + &loop + DUP2 #0000 EQU2 ?&done + #20 cur-pos shift-right + INC2 !&loop + &done + cur-col .config/tab-width LDZ2 ADD2 .cursor/col STZ2 + redraw-all !return + &use-tabs + #09 !insert + +( insert a newline at the cursor position ) +@newline ( c^ -> ) + #0a cur-pos shift-right + #0000 .cursor/col STZ2 + .cursor/row LDZ2 INC2 .cursor/row STZ2 + .buffer/line-count LDZ2k INC2 ROT STZ2 + ensure-visible-cursor + redraw-all !return + +( delete the character to the left of the cursor, if any ) +@backspace ( -> ) + cur-pos ;data EQU2 ?return + go-back !delete + +( delete the last character in the prompt ) +@backspace-prompt ( -> ) + .tmp/pos LDZ2 ;tmp/data EQU2 ?&skip ( ?return ) + #00 .tmp/pos LDZ2 #0001 SUB2 ( 0 pos-1 ) + STH2k STA ( data[pos-1] <- 0 ) + STH2r .tmp/pos STZ2 ( pos <- pos-1 ) + &skip redraw-prompt-and-cursor !return + +( delete the character under the cursor, if any ) +@delete ( -> ) + last-pos cur-pos LTH2 ?return + cur-pos LDAk STH ( cur [c] ) + shift-left ( [c] ) + STHr #0a NEQ ?¬-newline + .buffer/line-count LDZ2k #0001 SUB2 ROT STZ2 + ¬-newline redraw-all !return + +( used at the start of an escape sequence to set up state. ) +( ) +( many keys such as page-down will actually send an escape character ) +( followed by others. to support these we use saw-esc to interpret ) +( input characters differently. ) +( ) +( see also state/saw-xterm which supports such sequences. ) +@escape ( -> ) + #01 .state/saw-esc STZ BRK + +( move to the end of the file ) +@goto-end ( -> ) + .buffer/line-count LDZ2 #0001 SUB2 .cursor/row STZ2 + .buffer/line-count LDZ2 .term/rows LDZ2 LTH2k ?&use-zero + SUB2 #0002 ADD2 !&continue + &use-zero + POP2 POP2 #0000 + &continue + DUP2 .buffer/line-offset STZ2 + abs-line .buffer/offset STZ2 + cur-len .cursor/col STZ2 + redraw-all !return + +( move to the start of the file ) +@goto-start ( -> ) + zero-row + #0000 .cursor/col STZ2 + redraw-all !return + +( prompt for a line number and move to that line ) +@goto-line ( -> ) + ;messages/goto-line ;messages/null ;do-goto-line start-prompt + redraw-prompt-and-cursor !return + +( parse the given string as a decimal number ) +( ) +( returns the number as a short followed by whether parsing was ok ) +@parse-decimal-number ( addr* -> n* ok^ ) + LDAk ?&non-empty + #00 JMP2r + &non-empty + LIT2r 0000 + &loop + LDAk ?&continue + POP2 STH2r #01 JMP2r + &continue + LDAk LIT "0 LTH ?&fail + LDAk LIT "9 GTH ?&fail + LIT2r 000a MUL2r + LDAk LIT "0 SUB #00 SWP STH2 ADD2r + INC2 !&loop + &fail + POP2r #00 JMP2r + +( go to the given line number ) +( ) +( this is used as a callback from the goto-line prompt ) +@do-goto-line ( n* -> ) + ;tmp/data parse-decimal-number + ?&ok + ;messages/unknown-input ;tmp/data send-message + !return + &ok + #0001 SUB2 ( convert 1-indexing to 0-indexing ) + DUP2 .buffer/line-count LDZ2 LTH2 ?&within + POP2 !goto-end + &within + jump-to-line + redraw-all !return + +( move the cursor to the given coordinates ) +( ) +( this won't move the display if the given coordinates are visible. ) +@move-to-coord ( col* row* -> ) + DUP2 line-is-visible ?jump-to-coord/short + !jump-to-coord + +( move the cursor to the given coordinates ) +( ) +( this will always ensure the display is centered on the given coordinates ) +@jump-to-coord ( x* y* -> ) + .term/rows LDZ2 INC2 #0002 DIV2 LTH2k ( x y rows/2 ylines-rows? ) + ?&late ( x y y-rows/2 lines-rows ) + POP2 !&finish + &early ( x y rows/2 ) + POP2 #0000 !&finish ( x y 0000 ) + &late ( x y y-rows/2 lines-rows ) + NIP2 + &finish ( x y o ) + redraw-all + SUB2k STH2 DUP2 ( x y o o [y-o] ) + .buffer/line-offset STZ2 ( x y o [y-o] ) + abs-line .buffer/offset STZ2 ( x y [y-o] ) + POP2r + &short + redraw-statusbar-and-cursor + .cursor/row STZ2 ( x ) + .cursor/col STZ2 + JMP2r + +( jump to the given line number ) +@jump-to-line ( n* -> ) + #0000 SWP2 !jump-to-coord + +( ensure the cursor is visibe ) +( ) +( if the cursor is not already visible the screen will be ) +( centered on the cursor's coordinates. ) +@ensure-visible-cursor + .cursor/row LDZ2 .buffer/line-offset LDZ2 + SUB2 .term/rows LDZ2 LTH2 ?&noop + .cursor/row LDZ2 jump-to-line + redraw-all + &noop JMP2r + +( currently used to print stack information. ) +@debug + ;messages/input-error !errorq + +( move the terminal's cursor to the message line ) +( ) +( this low level method does not change any editor state (such as ) +( the cursor) but is used to display messages. ) +@move-to-message-line ( -> ) + #0000 .term/rows LDZ2 #0002 ADD2 !term-move-cursor + +( start a prompt on the message line ) +( ) +( the arguments are as follows: ) +( - the prompt string will printed in bold ) +( - the default string will be editable ) +( - the vector address will be used on return ) +( ) +( prompts can always be cancelled using C-g. ) +( ) +( when called vector should end in a BRK instructinon. ) +@start-prompt ( prompt* default* vector* -> ) + .prompt/active LDZ ?&is-active + #01 .prompt/active STZ ( prompt/active <- 1 ) + .prompt/vector STZ2 ( prompt/vector <- vector ) + ;tmp/data str-copy ( tmp/data <- default ) + ;tmp/data ADD2 .tmp/pos STZ2 ( tmp/pos <- len(default)+data ) + .prompt/string STZ2 ( prompt/string <- prompt ) + JMP2r + &is-active + #0000 DIV + +( ends prompt without calling vector ) +@cancel-prompt ( -> ) + #00 .prompt/active STZ + #00 .state/quitting STZ + clear-message-line + redraw-prompt-and-cursor + !return + +( finishes prompt and executes callback ) +( ) +( when called vector should end in a BRK instruction ) +@finish-prompt ( -> ) + #00 .prompt/active STZ + clear-message-line + redraw-prompt-and-cursor + .prompt/vector LDZ2 JMP2 + +( begin saving the file, prompting the user for a fiel name ) +@save + ;messages/save-prompt ;filename ;do-save start-prompt + redraw-prompt-and-cursor + !return + +( save the file with the filename found in tmp/data ) +@do-save ( -> ) + .buffer/limit LDZ2 ;data SUB2 STH2 ( [size] ) + ;tmp/data .File/name DEO2 + STH2kr .File/length DEO2 + ;data .File/write DEO2 + + .File/success DEI2 STH2r EQU2 ?&ok + ;messages/save-failed !&finish + &ok + #00 .state/modified STZ + ;tmp/data ;filename str-copy POP2 + ;messages/save-ok + &finish + ;tmp/data send-message + .state/quitting LDZ ?quit-now + #03 .state/redraw STZ ( FIXME: why do we have to do this? ) + !return + +( begin a search, prompting for a search string ) +@search ( -> ) + ;messages/search-prompt ;messages/null ;do-search start-prompt + redraw-prompt-and-cursor + !return + +( execute a search, using the given search string ) +@do-search ( -> ) + .cursor/row LDZ2 .searching/orig-row STZ2 + .cursor/col LDZ2 .searching/orig-col STZ2 + #0000 .searching/regex STZ2 + move-to-next-match ?&found + move-to-prev-match ?&found + ;messages/no-matches-found ;tmp/data send-message BRK + &found + #01 .searching/active STZ + redraw-matches + !return + +( begin a search, prompting for a regular expression ) +@regex-search ( -> ) + ;messages/regex-search-prompt ;messages/null ;do-regex-search start-prompt + redraw-prompt-and-cursor !return + +( execute a search, using the given regular expressions ) +( ) +( TODO: handle invalid regular expressions that fail to compile ) +@do-regex-search ( -> ) + cur-pos DUP2 .searching/start STZ2 .searching/end STZ2 + .cursor/row LDZ2 .searching/orig-row STZ2 + .cursor/col LDZ2 .searching/orig-col STZ2 + ;tmp/data compile .searching/regex STZ2 + move-to-next-regex-match ?&found + move-to-prev-regex-match ?&found + ;messages/no-matches-found ;tmp/data send-message BRK + &found + #01 .searching/active STZ + redraw-matches + !return + +( toggle the color used by the terminal ) +( ) +( available colors are: ) +( - black ) +( - red ) +( - green ) +( - yellow ) +( - blue ) +( - magenta ) +( - cyan ) +( - white ) +@toggle-color ( -> ) + .config/color LDZ2 #3733 EQU2 ?{ + .config/color LDZ2 #0100 ADD2 DUP2 .config/color STZ2 + DUP2 #3733 LTH2 ?{ .config/red STZ2 !&done } + POP2 #3133 .config/red STZ2 !&done + } + #3033 DUP2 .config/color STZ2 .config/red STZ2 + &done redraw-all !return + +( toggle whether to use literal tab characters ) +( ) +( when opening a file, this defaults to 01 if existing tab ) +( characters are found, and 00 otherwise. ) +@toggle-tabs ( -> ) + .config/insert-tabs LDZk #00 EQU SWP STZ + redraw-statusbar-and-cursor !return + +( interpret user input as an escaped sequence ) +( ) +( called by on-key with state/saw-esc is true ) +( ) +( TODO: maybe M-% for search&replace ) +@on-key-escaped ( -> ) + #00 .state/saw-esc STZ + .state/key LDZ LIT "< EQU ( M-< ) ?goto-start + .state/key LDZ LIT "> EQU ( M-> ) ?goto-end + .state/key LDZ LIT "b EQU ( M-b ) ?back-by-word + .state/key LDZ LIT "c EQU ( M-c ) ?toggle-color + .state/key LDZ LIT "f EQU ( M-f ) ?forward-by-word + .state/key LDZ LIT "g EQU ( M-g ) ?goto-line + .state/key LDZ LIT "h EQU ( M-h ) ?help + .state/key LDZ LIT "s EQU ( M-s ) ?regex-search + .state/key LDZ LIT "t EQU ( M-t ) ?toggle-tabs + .state/key LDZ LIT "u EQU ( M-u ) ?undo + .state/key LDZ LIT "v EQU ( M-v ) ?page-up + .state/key LDZ LIT "[ EQU ( M-[ ) ?xterm + BRK + +( set our input to expect xterm control sequences ) +( ) +( after seeing ESC followed by [ we expect various ) +( ANSI or xterm control sequences. these include ) +( things like: ) +( - up/down/left/right arrow keys ) +( - page up/page down keys ) +( - end/home keys ) +@xterm + #01 .state/saw-xterm STZ BRK + +( after seeing sequences like "ESC [ 1" we expect ) +( to see a trailing ~ to complete the sequence. ) +( ) +( this callback checks for and if set performs ) +( the relevant action. ) +@on-key-vt ( -> ) + .state/saw-vt LDZk STH #00 SWP STZ + .state/key LDZ LIT "~ EQU ?&ok + POPr BRK + &ok + STHr DUP LIT "1 NEQ ?¬-1 + ( ^[[1~ -> home ) POP !bol + ¬-1 DUP LIT "2 NEQ ?¬-2 + ( ^[[2~ -> insert ) POP BRK + ¬-2 DUP LIT "3 NEQ ?¬-3 + ( ^[[3~ -> delete ) POP !delete + ¬-3 DUP LIT "4 NEQ ?¬-4 + ( ^[[4~ -> end ) POP !eol + ¬-4 DUP LIT "5 NEQ ?¬-5 + ( ^[[5~ -> page up ) POP !page-up + ¬-5 DUP LIT "6 NEQ ?¬-6 + ( ^[[6~ -> page down ) POP !page-down + ¬-6 DUP LIT "7 NEQ ?¬-7 + ( ^[[7~ -> home ) POP !bol + ¬-7 DUP LIT "8 NEQ ?¬-8 + ( ^[[8~ -> end ) POP !eol + ¬-8 + ( ??? ) POP BRK + +( after seeing sequences like "ESC [" we expect ) +( to see more characters to determine the logical key. ) +( ) +( this callback performs the relevant action, ) +( or else sets (or unsets) state as necessary ) +( to continue (or end) the sequence. ) +@on-key-xterm ( -> ) + #00 .state/saw-xterm STZ + .state/key LDZ LIT "A EQU ( ^[[A -> up ) ?up + .state/key LDZ LIT "B EQU ( ^[[B -> down ) ?down + .state/key LDZ LIT "C EQU ( ^[[C -> right ) ?forward + .state/key LDZ LIT "D EQU ( ^[[D -> left ) ?back + .state/key LDZ LIT "F EQU ( ^[[F -> end ) ?eol + .state/key LDZ LIT "H EQU ( ^[[H -> home ) ?bol + .state/key LDZ LIT "0 LTH ?ignore + .state/key LDZ LIT "8 GTH ?ignore + .state/key LDZ .state/saw-vt STZ ( ^[[1 through ^[[8 ) + BRK + +( clear the message line ) +( ) +( this includes the code needed to move the cursor ) +( to that line, the ANSI control sequence to clear ) +( the line, and unsetting state/message. ) +( ) +( if state/message is unset this is a no-op. ) +@clear-message-line + .state/message LDZ #00 EQU ?&done + move-to-message-line + term-erase-line + #00 .state/message STZ + &done JMP2r + +( cancel the active search ) +( ) +( this method unsets searching/active and also restores ) +( the original cursor position. ) +@cancel-search + .searching/orig-row LDZ2 jump-to-line + .searching/orig-col LDZ2 .cursor/col STZ2 + !finish-search + +( cancel the active search ) +( ) +( this method unsets searching/active. unlike ;cancel-search ) +( this leaves the cursor where it is. ) +@finish-search + #00 .searching/active STZ + reset-arena redraw-all !return + +( TODO: i haven't decided how to solve the problem of ) +( overlapping matches yet. i don't really want to maintain ) +( a global list of all matches, which means that currently ) +( it can change in response to e.g. cursor position when ) +( matches overlap. ) +( ) +( UPDATE: now that we have searching/start and searching/end ) +( we can use those to resume the search after the full match. ) +( this solves the problem except in some very strange cases ) +( which are quite unlikely. ) + +( jump forward to the next match, if any. ) +( ) +( moves the cursor forward to the next match. if there are no ) +( further matches the cursor does not move. ) +@jump-to-next-match ( -> ) + .searching/regex LDZ2 ORA ?&is-regex + move-to-next-match POP !return + &is-regex + move-to-next-regex-match POP !return + +( jump backward to the previous match, if any. ) +( ) +( moves the cursor backward to the previous match. if there are ) +( no further matches the cursor does not move. ) +@jump-to-prev-match ( -> ) + .searching/regex LDZ2 ORA ?&is-regex + move-to-prev-match POP !return + &is-regex + move-to-prev-regex-match POP !return + +( move to the next substring match. ) +( ) +( called by ;jump-to-next-match. ) +@move-to-next-match ( -> ok^ ) + .buffer/limit LDZ2 + cur-pos INC2 + &loop + GTH2k #00 EQU ?&fail + DUP2 matches-at + ORA ?&found + INC2 !&loop + &found + NIP2 jump-to-pos #01 JMP2r + &fail + POP2 POP2 #00 JMP2r + +( move to the previous substring match. ) +( ) +( called by ;jump-to-prev-match. ) +@move-to-prev-match ( -> ok^ ) + ;data + cur-pos #0001 SUB2 + &loop + GTH2k ?&fail + DUP2 matches-at + ORA ?&found + #0001 SUB2 !&loop + &found + NIP2 jump-to-pos #01 JMP2r + &fail + POP2 POP2 #00 JMP2r + +( move to the next regex match. ) +( ) +( called by ;jump-to-next-match. ) +@move-to-next-regex-match ( -> ok^ ) + .searching/end LDZ2 .buffer/limit LDZ2 OVR2 + GTH2 ?&ok + POP2 #00 JMP2r + &ok + .searching/regex LDZ2 rx-search ?&found + #00 JMP2r + &found + ;search-end LDA2 .searching/end STZ2 + ;search-start LDA2 DUP2 .searching/start STZ2 + jump-to-pos #01 JMP2r + +( move to the previous substring match. ) +( ) +( called by ;jump-to-prev-match. ) +( ) +( compared to move-to-next-regex-match this is kind of inefficient. ) +( that's because we have no easy way to search backwards from a point. ) +( ) +( we could do some kind of fancy thing where we search the previous ) +( N bytes, then the 2N bytes before that, etc. ) +( ) +( however, 64K is small enough that just searching from the beginning ) +( and then taking the last match before the cursor works. ) +@move-to-prev-regex-match ( -> ok^ ) + LITr 00 + cur-pos ;data ( limit pos [res] ) + &loop ( limit pos [res] ) + GTH2k #00 EQU ?&done ( limit pos ) + DUP2 .searching/regex LDZ2 rx-search ( limit pos match? ) + #00 EQU ?&done ( limit pos ) + OVR2 ;search-end LDA2 LTH2 ?&done + POP2 POPr LITr 01 + ;search-start LDA2 .searching/start STZ2 + ;search-end LDA2 DUP2 .searching/end STZ2 + !&loop + &done + POP2 POP2 STHr DUP #00 EQU ?&fail + .searching/start LDZ2 jump-to-pos + &fail + JMP2r + +( on-key event handler to use when searching ) +( ) +( when searching the user can: ) +( - move to the next match (n or C-s) ) +( - move to the previous match (p or C-r) ) +( - end the search leaving the cursor where it is (enter) ) +( - cancel the search restoring the cursor (C-g) ) +@on-key-searching + .state/key LDZ #07 EQU ( C-g ) ?cancel-search + .state/key LDZ #08 EQU ( C-h ) ?help + .state/key LDZ #0d EQU ( \r ) ?finish-search + .state/key LDZ #12 EQU ( C-r ) ?jump-to-prev-match + .state/key LDZ #13 EQU ( C-s ) ?jump-to-next-match + .state/key LDZ #6e EQU ( n ) ?jump-to-next-match + .state/key LDZ #70 EQU ( p ) ?jump-to-prev-match + !ignore + +( on-key event handler to use when prompt is active ) +( ) +( when the prompt is active the user can: ) +( - append characters to the input string ) +( - delete from the end of the input string (backspace) ) +( - complete the input and act (enter) ) +( - cancel the prompt without action (C-g) ) +( ) +( TODO: currently it's impossible to edit the prompt ) +( except from the end. ideally we'd support most of the ) +( same navigation commands as we do in the buffer, such as ) +( C-a, C-d, etc. however, it's enough extra work to enable ) +( this that for now i haven't done it. ) +@on-key-prompt + .state/key LDZ #07 EQU ( C-g ) ?cancel-prompt + .state/key LDZ #08 EQU ( C-h ) ?help + .state/key LDZ #0d EQU ( \r ) ?finish-prompt + .state/key LDZ #7f EQU ( DEL ) ?backspace-prompt + .state/key LDZ #20 LTH ?ignore ( ignore for now ) + .state/key LDZ #7e GTH ?ignore ( ignore for now ) + .state/key LDZ ( printable ASCII ) !insert-prompt + BRK + +( on-key event handler ) +( ) +( this is the "normal" event handler to use for editing ) +( the buffer. it checks various state values to determine ) +( if we're in the midst of a control sequence, if we're ) +( searching or have an active prompt, etc. ) +( ) +( TODO: C-h for help ) +( ) +( you could also imagine building data structures of ) +( commands to unify input strings, help text, callbacks, ) +( and so on. this might ultimately be more efficient but ) +( for now what we have works. ) +@on-key + #00 .state/in-undo STZ + .Console/stdin DEI .state/key STZ + clear-message-line + .state/in-help LDZ #00 .state/in-help STZ ?return + .searching/active LDZ ?on-key-searching + .prompt/active LDZ ?on-key-prompt + .state/saw-vt LDZ ?on-key-vt + .state/saw-xterm LDZ ?on-key-xterm + .state/saw-esc LDZ ?on-key-escaped + .state/key LDZ #01 EQU ( C-a ) ?bol + .state/key LDZ #02 EQU ( C-b ) ?back + .state/key LDZ #04 EQU ( C-d ) ?delete + .state/key LDZ #05 EQU ( C-e ) ?eol + .state/key LDZ #06 EQU ( C-f ) ?forward + .state/key LDZ #08 EQU ( C-h ) ?help + .state/key LDZ #09 EQU ( \t ) ?insert-tab + .state/key LDZ #0c EQU ( C-l ) ?center-view + .state/key LDZ #0d EQU ( \r ) ?newline + .state/key LDZ #0e EQU ( C-n ) ?down + .state/key LDZ #0f EQU ( C-o ) ?save + .state/key LDZ #10 EQU ( C-p ) ?up + .state/key LDZ #13 EQU ( C-s ) ?search + .state/key LDZ #16 EQU ( C-v ) ?page-down + .state/key LDZ #18 EQU ( C-x ) ?quit + .state/key LDZ #1a EQU ( C-z ) ?debug + .state/key LDZ #1b EQU ( ESC ) ?escape + .state/key LDZ #7f EQU ( DEL ) ?backspace + .state/key LDZ #20 LTH ?ignore ( ignore for now ) + .state/key LDZ #7e GTH ?ignore ( ignore for now ) + .state/key LDZ ( printable ASCII ) !insert + +( return the smaller of two short values ) +@min2 ( x* y* -> min* ) + LTH2k JMP SWP2 POP2 JMP2r + +( method to add bits to the redraw register ) +( ) +( state/redraw uses 8 bits to represent which parts ) +( of the screen (if any) should be redrawn. this method ) +( uses logical-or (ORA) to add the bits of n to those ) +( already set. ) +@redraw-add ( n^ -> ) + .state/redraw LDZk ROT ORA SWP STZ JMP2r + +( various redrawing methods ) +( ) +( these don't perform a redraw right away, but instead ) +( signal that the next drawing should include that part. ) +@redraw-cursor ( -> ) #01 !redraw-add +@redraw-statusbar-and-cursor ( -> ) #03 !redraw-add +@redraw-prompt-and-cursor ( -> ) #05 !redraw-add +@redraw-matches ( -> ) #08 !redraw-add +@redraw-all ( -> ) #1f !redraw-add + +( draw the current cursor location ) +@draw-cursor ( -> ) + .prompt/active LDZ #00 EQU JMP JMP2r + ( TODO: handle long lines ) + cur-w-col lmargin ADD2 + .cursor/row LDZ2 .buffer/line-offset LDZ2 SUB2 + !term-move-cursor + +( current column in terms of display width ) +( ) +( this is different than ;cur-col due to tabs ) +@cur-w-col ( -> col* ) + LIT2r 0000 ( [0] ) + cur-line DUP2 cur-col ADD2 SWP2 ( lim s [0] ) + &loop GTH2k ?&next + POP2 POP2 STH2r JMP2r + &next LDAk #09 EQU ?&tabs INC2 INC2r !&loop + &tabs INC2 .config/tab-width LDZ2 STH2 ADD2r !&loop + +( move the terminal cursor to the statusbar line ) +@move-to-statusbar ( -> ) + #0000 .term/rows LDZ2 !term-move-cursor + +( draw the full statusbar ) +@draw-statusbar ( -> ) + move-to-statusbar + emit-color-reverse + + LIT2r 2018 .term/cols LDZ2 #0001 ( cols i [2018] ) + &loop LTH2k ?&done DEOkr INC2 !&loop + &done POP2 POP2 POP2r ( ) + + move-to-statusbar + + ( display ** if the buffer has unsaved changes, -- otherwise ) + #2d .state/modified LDZ #00 NEQ #03 MUL SUB DUP emit emit sp + + ;filename print + sp emit-[ + .buffer/limit LDZ2 ;data SUB2 emit-dec2 + ;messages/bytes print + sp + .buffer/line-count LDZ2 emit-dec2 + ;messages/lines print + sp emit-lpar + cur-col INC2 emit-dec2 + emit-, + .cursor/row LDZ2 INC2 emit-dec2 + emit-rpar sp emit-[ + LIT "s .config/insert-tabs LDZ ADD emit + emit-] sp + ;messages/help-msg print + !emit-reset + +@draw-prompt ( -> ) + clear-message-line + .prompt/active LDZ ?&is-active + JMP2r + &is-active + #01 .state/message STZ + move-to-message-line + emit-color-bold + .prompt/string LDZ2 print + emit-reset + ;tmp/data print + JMP2r + +@draw-linenum ( n* -> ) + emit-reset + emit-color + emit-dec2-pad sp + !emit-reset + +@matches-at ( s* -> limit* ) + LIT2r :tmp/data + &loop LDAkr STHr #00 EQU ?&done + LDAk LDAkr STHr NEQ ?&fail + INC2 INC2r !&loop + &fail POP2 #0000 + &done POP2r JMP2r + +( TODO: this doesn't handle tabs correctly ) +@draw-region ( offset* limit* col* row* -> ) + OVR2 ( offset limit col row col ) + .term/cols LDZ2 SWP2 SUB2 STH2 ( offset limit col row [cols-col] ) + term-move-cursor ( offset limit [cols-col] ) + OVR2 STH2r ADD2 ( offset limit offset+cols-col ) + min2 STH2 ( offset [cutoff] ) + &loop ( i [cutoff] ) + DUP2 STH2kr LTH2 #00 EQU ?&done + LDAk #00 EQU ?&done + LDAk #18 DEO INC2 !&loop + &done + POP2 POP2r JMP2r + +@screen-limit ( -> sc-limit* ) + .term/rows LDZ2 .buffer/line-offset LDZ2 ADD2 ( row0+rows ) + DUP2 .buffer/line-count LDZ2 LTH2 ?¬-end + POP2 .buffer/limit LDZ2 JMP2r + ¬-end + !abs-line + +@draw-regex-matches ( -> ) + emit-color-reverse-bold ( ) + screen-limit .buffer/offset LDZ2 ( limit pos ) + &loop ( limit pos ) + GTH2k #00 EQU ( limit pos limit>pos=0? ) + ?&done ( limit pos ) + DUP2 .searching/regex LDZ2 ( limit pos pos rx ) + rx-search #00 EQU ( limit pos found=0? ) + ?&done ( limit pos ) + POP2 ;search-start LDA2 ( limit start ) + GTH2k #00 EQU ( limit start limit>start=0? ) + ?&done ( limit start ) + ;search-end LDA2 OVR2 ( limit start end start ) + pos-to-row-col ( limit start end row col ) + lmargin ADD2 ( limit start end row col+lm ) + SWP2 .buffer/line-offset LDZ2 SUB2 ( limit start end col+lm row-lo ) + draw-region ( limit ) + ;search-end LDA2 ( limit end ) !&loop + &done ( limit pos ) + POP2 POP2 JMP2r + +@draw-matches ( -> ) + ( return if not searching ) + .searching/active LDZ #00 EQU ?&return ( ) + .searching/regex LDZ2 ORA ?draw-regex-matches + emit-color-reverse-bold + lmargin ,&x STR2 #0000 ,&y STR2 ( x <- 0, y <- 0 ) + .buffer/offset LDZ2 DUP2 + screen-limit SUB2 STH2 ( offset [-count] ) + + &loop ( offset [-count] ) + STH2kr #0000 EQU2 ?&done + DUP2 matches-at ( offset mlim [-count] ) + DUP2 ORA ?&found + POP2 ( offset [-count] ) + LDAk #0a EQU ?&newline + draw-matches/count-tabs + #0001 !&next ( offset n [-count] ) + &found ( offset mlim [-count] ) + STH2k ( offset mlim [mlim -count] ) + OVR2 SWP2 ,&x LDR2 ,&y LDR2 ( offset offset mlim x y [mlim -count] ) + draw-region ( offset [mlim -count] ) + STH2r ( offset mlim [-count] ) + OVR2 SUB2 ( offset mlim-offset [-count] ) + &next ( offset n [-count] ) + DUP2 ,&x LDR2 ADD2 ,&x STR2 ( offset n [-count ) + STH2k ( offset n [n -count] ) + ADD2 ADD2r ( offset+n [n-count] ) + !&loop + &newline ( offset [-count] ) + lmargin ,&x STR2 + ,&y LDR2 INC2 ,&y STR2 + INC2 INC2r + !&loop + &done + POP2 POP2r + emit-reset + &return + JMP2r + [ &x $2 &y $2 ] + &count-tabs ( offset -> offset ) + LDAk #09 NEQ ?&count-tabs/done + ,&x LDR2 .config/tab-adjust LDZ2 ADD2 ,&x STR2 + &count-tabs/done JMP2r + +@emit-tab ( -> ) + #0000 .config/tab-width LDZ2 SUB2 + LIT2r 2018 + &loop ORAk ?&next POP2 POP2r JMP2r + &next DEOkr INC2 !&loop + + +( ANSI terminal notes ) +( ) +( attrs [0-7] ) +( reset, bright, dim, underscore, ) +( blink, ???, reverse, hidden ) +( ) +( fg [30-37], bg [40-47] ) +( black, red, green, yellow, ) +( blue, magenta, cyan, white ) + +( ANSI control sequence to move the cursor to the given coord ) +( ESC [ $row ; $col H ) +@term-move-cursor ( col* row* -> ) + ansi INC2 ( row+1 ) emit-dec2 + emit-; INC2 ( col+1 ) emit-dec2 + emit-H JMP2r + +( ANSI control sequence to move N positions right ) +( ESC [ $n C ) +@term-move-right ( n* -> ) + ansi emit-dec2 emit-C JMP2r + +( ANSI control sequence to get the cursor position ) +( ESC [ 6 n ) +@term-get-cursor-position ( -> ) + LIT2 00 "n LIT "6 !ansi-emit + +( ANSI control sequence to erase entire screen ) +( ESC [ 2 J ) +@term-erase-all ( -> ) + LIT2 00 "J LIT "2 !ansi-emit + +( ANSI control sequence to erase the current line ) +( ESC [ 2 K ) +@term-erase-line ( -> ) + LIT2 00 "K LIT "2 !ansi-emit + +@ansi-emit ( 00 cn ... c1 c0 -> ) + LITr 18 ( Console/stdout ) + #5b1b STHkr DEO STHkr DEO ( ESC [ ) + &loop DUP ?&next POP POPr JMP2r + &next STHkr DEO !&loop + +( ESC [ 3 1 m ) +@emit-red ( -> ) + LIT2 00 "m .config/red LDZ2 !ansi-emit + +( ESC [ 0 m ) +@emit-reset ( -> ) + #00 LIT2 "m "0 !ansi-emit + +( ESC [ 1 m $ ESC [ 0 m ) +@emit-red-dollar ( -> ) + emit-red emit-$ !emit-reset + +( ESC [ 3 $x ; 7 m ) +( $x is 0-7 ) +@emit-color-reverse ( -> ) + LIT2 00 "m LIT2 "7 "; .config/color LDZ2 !ansi-emit + +@emit-color ( -> ) + LIT2 00 "m .config/color LDZ2 !ansi-emit + +( ESC [ 3 $x ; 1 m ) +( $x is 0-7 ) +@emit-color-bold ( -> ) + LIT2 00 "m LIT2 "1 "; .config/color LDZ2 !ansi-emit + +( ESC [ 3 $x ; 1 ; 7 m ) +( $x is 0-7 ) +@emit-color-reverse-bold ( -> ) + LIT2 00 "m LIT2 "7 "; LIT2 "1 "; .config/color LDZ2 !ansi-emit + +@draw-all ( -> ) + term-erase-all + #0000 #0000 term-move-cursor + .buffer/line-offset LDZ2 STH2 LIT2r 0001 ( [k line-offset] ) + .buffer/offset LDZ2 + &bol + ADD2kr STH2r draw-linenum + lmargin INC2 ,&x STR2 + &loop ( offset [k line-offset] ) + LDAk #00 EQU ?&eof + LDAk #0a EQU ?&eol + ,&x LDR2 .term/cols LDZ2 + LTH2k ?&ok + GTH2 ?&skip + emit-red-dollar ,&x LDR2 INC2 ,&x STR2 + &skip INC2 + !&loop + &ok POP2 POP2 + LDAk #09 EQU ?&do-tab + LDAk emit INC2 ,&x LDR2 INC2 ,&x STR2 + !&loop + &eol INC2r + STH2kr .term/rows LDZ2 GTH2 ?&done + crlf INC2 !&bol + &do-tab emit-tab INC2 + .config/tab-width LDZ2 ,&x LDR2 ADD2 ,&x STR2 + !&loop + [ &x $2 ] + &eof + emit-red + &eof-loop + STH2kr .term/rows LDZ2 GTH2 ?&done + crlf + lmargin term-move-right + emit-~ INC2r + !&eof-loop + &done POP2 POP2r POP2r + emit-reset + draw-matches + draw-statusbar + draw-prompt + !draw-cursor + +( handler completion code to do necessary drawing and BRK ) +@return ( -> ) + .state/redraw LDZ + DUP #10 AND ?&draw-all + DUP #08 AND ?&do-8 !&skip-8 &do-8 draw-matches + &skip-8 DUP #04 AND ?&do-4 !&skip-4 &do-4 draw-prompt + &skip-4 DUP #02 AND ?&do-2 !&skip-2 &do-2 draw-statusbar + &skip-2 DUP #01 AND ?&do-1 !&finish &do-1 draw-cursor !&finish + &draw-all draw-all + &finish POP #00 .state/redraw STZ BRK + +@str-copy ( src* dst* -> len* ) + STH2 DUP2 ( src src [dst] ) + &loop LDAk #00 EQU ?&done + LDAk STH2kr STA + INC2 INC2r !&loop + &done ( src src+n [dst+n] ) + SWP2 SUB2 + #00 STH2r STA + JMP2r + +@print ( s* -> ) + &loop LDAk #00 EQU ?&eof + LDAk #18 DEO INC2 !&loop + &eof POP2 JMP2r + +@cur-len ( -> n* ) + cur-line !line-len + +@line-len ( s* -> n* ) + #0000 STH2 + &loop LDAk #00 EQU ?&end + LDAk #0a EQU ?&end + INC2 INC2r !&loop + &end POP2 STH2r JMP2r + +@line-is-visible ( n* -> bool^ ) + .buffer/line-offset LDZ2 LTH2k ?&no + .term/rows LDZ2 ADD2 LTH2 JMP2r + &no POP2 POP2 #00 JMP2r + +@jump-to-pos ( s* -> ) + pos-to-logical-row-col SWP2 !move-to-coord + +@pos-to-display-row-col ( s* -> row* col* ) + .config/tab-width LDZ2 ;pos-to-row-col/tab-width STA2 !pos-to-row-col + +@pos-to-logical-row-col ( s* -> row* col* ) + #0001 ;pos-to-row-col/tab-width STA2 ( fall-through ) + +@pos-to-row-col ( s* -> row* col* ) + #0000 ,&row STR2 + #0000 ,&col STR2 + ;data + &loop ( s pos ) + GTH2k #00 EQU ?&done + LDAk #0a EQU ?&newline + LDAk #09 EQU ?&tab + #0001 + &inc + ,&col LDR2 ADD2 ,&col STR2 + INC2 !&loop + &newline + #0000 ,&col STR2 + ,&row LDR2 INC2 ,&row STR2 + INC2 !&loop + &tab + LIT2 [ &tab-width $2 ] !&inc + &done + POP2 POP2 + ,&row LDR2 ,&col LDR2 JMP2r + [ &row $2 &col $2 ] + +( counts y lines forward from the given address ) +@line-to-pos ( addr* y* -> s* ) + #0000 SWP2 SUB2 STH2 ( addr [-y] ) + &newline ( addr [-y] ) + STH2kr ORA ?&loop !&done + &loop ( addr [-y] ) + LDAk #00 EQU ?¬-found ( addr [-y] ) + LDAk #0a EQU ?&found ( addr [-y] ) + INC2 !&loop ( addr+1 [-y] ) + &found INC2 INC2r !&newline + &done POP2r JMP2r + ¬-found POP2 POP2r #0000 JMP2r + +( find string pointer for absolute y coordinate ) +@abs-line ( y* -> s* ) + ;data SWP2 !line-to-pos + +( return a pointer to the current line ) +@cur-line ( -> s* ) + .cursor/row LDZ2 .buffer/line-offset LDZ2 LTH2k ?&early + ( if cursor/row is later than line-offset ) + ( we can save some time by starting at ) + ( buffer/offset instead of the beginning ) + SUB2 .buffer/offset LDZ2 SWP2 !line-to-pos + &early + ( if cursor/row is earlier than line-offset ) + ( we need to use the absolute y coordinate ) + POP2 !abs-line + +( return a pointer to the current cursor position ) +@cur-pos ( -> s* ) + cur-line cur-col ADD2 JMP2r + +( insert one character at the cursor position ) +@shift-right ( c^ addr* -> ) + #01 .state/modified STZ + ROT STH ( addr [prev^] ) + last-pos SWP2 ( last addr [prev^] ) + .state/in-undo LDZ ?&loop + STH2k #00 STH2r u-push + &loop LTH2k ?&done ( last addr [prev^] ) + LDAk STH SWPr ( last addr [prev^ curr^] ) + DUP2 STHr ( last addr addr prev^ [curr^] ) + ROT ROT STA ( last addr [curr^] ) + INC2 !&loop ( last addr+1 [curr^] ) + &done NIP2 DUP2 ( addr addr [prev^] ) + STHr ROT ROT ( addr prev^ addr ) + STA INC2 ( addr+1 ) + .buffer/limit STZ2 ( ) + JMP2r + +( remove one character at the cursor position ) +( ) +( TODO: change last/addr order and GTH -> LTH to remove hack ) +@shift-left ( addr* -> ) + #01 .state/modified STZ + last-pos SWP2 ( last addr ) + .state/in-undo LDZ ?&loop + STH2k cur-pos LDA STH2r u-push + &loop GTH2k ?&next ( last addr ) + !&done ( last addr ) + &next DUP2 INC2 LDAk ( last addr addr+1 c1^ ) + STH SWP2 STHr ( last addr+1 addr c1^ ) + ROT ROT ( last addr+1 c1^ addr ) + STA !&loop ( last addr+1 ) + &done POP2 ( last ) + .buffer/limit STZ2 ( ) + #00 .buffer/limit LDZ2 STA ( ensure null termination ) + JMP2r + +( current column in terms of bytes in buffer ) +@cur-col ( -> col* ) + .cursor/col LDZ2 cur-len !min2 + +( jump to the first line in the buffer ) +@zero-row ( -> ) + ;data .buffer/offset STZ2 + #0000 .buffer/line-offset STZ2 + #0000 .cursor/row STZ2 + JMP2r + +( return the location of the last character in the buffer ) +@last-pos ( -> addr* ) + .buffer/limit LDZ2 #0001 SUB2 JMP2r + +( emit a short as a decimal ) +@emit-dec2 ( n* -> ) + LITr 00 ( n [0] ) + &read ( n [k] ) + #000a DIV2k STH2k MUL2 SUB2 STH2r INCr ( n%10 n/10 [k+1] ) + DUP2 ORA ?&read + POP2 ( top element was 0000 ) + &write ( n0 n1 ... nk [k+1] ) + NIP #30 ADD #18 DEO LITr 01 SUBr ( n0 ... n{k-1} [k] ) + STHkr ?&write + POPr JMP2r + +( emit a short as a decimal with leading spaces ) +@emit-dec2-pad ( n* -> ) + LITr 00 ( n [0] ) + &read ( n [k] ) + #000a DIV2k STH2k MUL2 SUB2 STH2r INCr ( n%10 n/10 [k+1] ) + STHkr #05 LTH ?&read + POP2 ( top element was 0000 ) + &write0 ( n0 n1 ... nk [k+1] ) + DUP2 ORA ?emit-dec2/write + POP2 sp LITr 01 SUBr + STHkr ?&write0 + POPr JMP2r + +( various string constants used as messages for the user ) +@messages [ &null 00 + &input-error "Input 20 "error, 20 "file 20 "too 20 "large: 20 00 + &bytes 20 "bytes, 00 + &save-ok "Successfully 20 "saved 20 00 + &save-failed "Failed 20 "to 20 "save 20 00 + &lines 20 "lines] 00 + &goto-line "Go 20 "to 20 "line: 20 00 + &save-prompt "File 20 "Name 20 "to 20 "Write: 20 00 + &search-prompt "Text 20 "to 20 "Search 20 "for: 20 00 + ®ex-search-prompt "Regex 20 "to 20 "Search 20 "for: 20 00 + &quit-prompt "Save 20 "modified 20 "file 20 "(y/n)? 20 00 + &unknown-input "Unknown 20 "input: 20 00 + &no-matches-found "No 20 "matches 20 "found: 20 00 + &term-size-parse-error "Error 20 "parsing 20 "term 20 "size 00 + &help-msg "(help: 20 "C-h) 00 + ] + +@help-text + 09 "femto 20 "input 20 "reference 09 "(C 20 "is 20 "Ctrl, 20 "M 20 "is 20 "Meta/Alt ") 0d 0a + 0d 0a + 09 "quit 09 09 "C-x 09 09 "cancel 09 09 "C-g 0d 0a + 09 "save 09 09 "C-o 09 09 "undo 09 09 "C-u 0d 0a + 0d 0a + 09 "move 20 "up 09 09 "C-p 20 "(up) 09 "page 20 "up 09 09 "M-v 20 "(pg-up) 0d 0a + 09 "move 20 "down 09 "C-n 20 "(down) 09 "page 20 "down 09 "C-v 20 "(pg-dn) 0d 0a + 09 "move 20 "left 09 "C-b 20 "(left) 09 "line 20 "start 09 "C-a 20 "(home) 0d 0a + 09 "move 20 "right 09 "C-f 20 "(right) 09 "line 20 "end 09 "C-e 20 "(end) 0d 0a + 0d 0a + 09 "goto 20 "file 20 "start 09 "M-< 09 09 "left 20 "by 20 "word 09 "M-b 0d 0a + 09 "goto 20 "file 20 "end 09 "M-> 09 09 "right 20 "by 20 "word 09 "M-f 0d 0a + 09 "goto 20 "line 09 "M-g 09 09 "center 20 "cursor 09 "C-l 0d 0a + 0d 0a + 09 "search 09 09 "C-s 09 09 "toggle 20 "colors 09 "M-c 0d 0a + 09 "regex 20 "search 09 "M-s 09 09 "toggle 20 "tabs 09 "M-t 0d 0a + 09 "next 20 "match 09 "C-s 20 "(n) 0d 0a + 09 "prev 20 "match 09 "C-r 20 "(p) 0d 0a + 09 "end 20 "search 09 "enter 0d 0a + 09 "cancel 20 "search 09 "C-g 0d 0a + 0d 0a + 09 09 09 "press 20 "any 20 "key 20 "to 20 "continue... 0d 0a + 00 + +( perform the undo action ) +@undo ( -> ) + #01 .state/in-undo STZ + ;undo-stack/pos LDA2 ;undo-stack/data EQU2 ?&noop + ;undo-stack/pos LDA2 #0003 SUB2 + DUP2 ;undo-stack/pos STA2 + LDA2k STH2 ( pos [addr] ) + INC2 INC2 LDA STH2r ( c addr ) + jump-to-pos + DUP #00 EQU ?&delete + DUP #0a EQU ?&newline + DUP #09 EQU ?&tab + !insert + &newline POP !newline + &tab POP !insert-tab + &delete POP !delete + &noop BRK + +( free up space in the undo stack by evicting the oldest entry ) +@u-free ( -> ) + ;undo-stack/data STH2k #0003 ADD2 ( st+3 [st] ) + &loop LDAk STH2kr STA + INC2 INC2r + DUP2 ;undo-stack/pos LDA2 LTH2 ?&loop + ;undo-stack/pos LDA2k #0003 SUB2 SWP2 STA2 + POP2 POP2r JMP2r + +( push a new item on the undo stack ) +@u-push ( c^ addr* -> ) + ;undo-stack/pos DUP2 LDA2 ( c^ addr* top* pos* ) + GTH2 ( c^ addr* top>pos^ ) ?&has-room ( c^ addr* ) + u-free + &has-room ;undo-stack/pos LDA2 ( c^ addr* pos* ) + STH2k ( c addr pos [pos] ) + STA2 ( c [pos] ) + STH2r INC2 INC2 STH2k ( c pos+2 [pos+2] ) + STA ( [pos+2] ) + STH2r INC2 ( pos+3 ) ;undo-stack/pos STA2 ( [] ) + JMP2r + +( path to file being edited ) +@filename $80 + +( stack of data to undo ) +( ) +( each item in the stack consists of: ) +( - 2 bytes: address to jump to ) +( - 1 byte: character to insert, or \0 to delete ) +( ) +( pos points to the next open stack frame. ) +( when pos points to data the stack is empty. ) +( when pos points to pos the stack is full. ) +@undo-stack [ + &data $180 ( 128 steps ) + &pos :undo-stack/data +] + +( actual file data to be edited ) +@data $ce80 + +( end of femto.tal ) diff --git a/test/regex.tal b/test/regex.tal new file mode 100644 index 0000000..731ab95 --- /dev/null +++ b/test/regex.tal @@ -0,0 +1,913 @@ +( regex.tal ) +( ) +( compiles regex expression strings into regex nodes, then uses ) +( regex nodes to match input strings. ) +( ) +( two methods are currently supported: ) +( ) +( 1. match ) +( ) +( when matching the regex must match the entire string. this means ) +( that it is unnecessary to use ^ and $ when matching, since their ) +( effect is implied. it also means that that dot nodes will match ) +( any characters at all including newlines. ) +( ) +( match returns 01 if the string was matched and 00 otherwise. ) +( ) +( 2. search ) +( ) +( when searching the regex attempts to find matching substrings ) +( in the given string. this means that after successfully finding ) +( a match, search may be called on the remaining substring to find ) +( more matches. ) +( ) +( when searching, ^ matches the beginning of the string OR a line. ) +( $ matches the end of a line OR the end of the entire string. ) +( the dot nodes will not match newline characters, which must be ) +( matched explicitly. ) +( ) +( finally, search-multiline will cause ^ and $ to use the matching ) +( behavior (i.e. only matching the beginning or end of a string). ) +( however dot nodes will still not match newline characters. ) +( ) +( search returns 01 if the string was matched and 00 otherwise. ) +( additionally, the @search-start and @search-end addresses will ) +( contain the starting location and match boundary of the matching ) +( substring. ) +( ) +( regex node types: ) +( ) +( NAME DESCRIPTION STRUCT ) +( empty matches empty string [ #01 next* ] ) +( dot matches any one char [ #02 next* ] ) +( lit matches one specific char (c) [ #03 c^ next* ] ) +( or matches either left or right [ #04 left* right* ] ) +( star matches expr zero-or-more times [ #05 expr* next* ] ) +( (NOTE: r.expr.next must be r) ) +( caret matches start of line/string [ #06 next* ] ) +( dollar matches end of line/string [ #07 next* ] ) +( lpar starts subgroup region [ #08 i^ next* ] ) +( rpar ends subgroup region [ #09 i^ next* ] ) +( class character class, e.g. [a-z] [ #0a next* n^ ... ] ) +( (NOTE: n is the number of pairs in ...) ) +( nclass negative class, e.g. [^a-z] [ #0b next* n^ ... ] ) +( (NOTE: n is the number of pairs in ...) ) +( ) +( `or` and `star` have the same structure and are handled by the ) +( same code (;do-or). however, the node types are kept different ) +( to make it clearer how to parse and assemble the nodes. ) +( ) +( dollar nodes contain a next pointer even though this usually ) +( will not be needed. ) +( ) +( lpar and rpar contain addresses pointing between subgroup-bot ) +( and subgroup-bot. rpar's address will always be +2 relative to ) +( the corresponding lpar address. ) +( ) +( concatenation isn't a node, it is implied by the *next addr. ) +( a next value of #0000 signals the end of the regex. ) +( ) +( in these docs str* is an address to a null-terminated string. ) +( regexes should not include nulls and cannot match them (other ) +( than the null which signals the end of a string). ) + +( TODO: we have lpar and rpar nodes but aren't using them yet ) +( 1. need to modify c-lpar and c-par ) +( 2. we need to store subgroup-posd in regions during parsing: ) +( a. need to store the current pos in the region ) +( b. need to call start to move subgroup-pos forward ) +( 3. when finishing parsing a region we need lpar/rpar nodes ) +( 4. we also need to store "last started subgroup" on the stack ) +( 5. when backtracking we must rewind to "last started" subgroup ) + +%debug { #ff #0e DEO } +%emit! { #18 DEO } +%space { #20 emit! } +%newline { #0a emit! } + +( now that uxnasm throws errors about writing into the zero page ) +( we have to do something like this to be able to compile library ) +( code. we have to guess what offset to use since it needs to ) +( avoid conficting with the program we're included in. ) +( ) +( remove this if needed when including it in other projects. ) +( |2000 ) + +( ERROR HANDLING ) + +( using error! will print the given message before causing ) +( the interpreter to halt. ) +@error!! ( msg* -> ) + LIT "! emit! space + &loop LDAk #00 EQU ,&done JCN + LDAk emit! INC2 ,&loop JMP + &done POP2 newline #ff0e DEO #010f DEO BRK + +( error messages ) +@unknown-node-type "unknown 20 "node 20 "type 00 +@mismatched-parens "mismatched 20 "parenthesis 00 +@stack-is-full "stack 20 "is 20 "full 00 +@stack-is-empty "stack 20 "is 20 "empty 00 +@arena-is-full "arena 20 "is 20 "full 00 +@star-invariant "star 20 "invariant 20 "failed 00 +@plus-invariant "plus 20 "invariant 20 "failed 00 +@qmark-invariant "question 20 "mark 20 "invariant 20 "failed 00 + +( REGEX MATCHING ) + +( use stored regex to match against a stored string. ) +( ) +( regex* should be the address of a compiled regex ) +( such as that returned from ;compile. ) +( ) +( str* should be a null-terminated string. ) +( ) +( returns true if the string, and false otherwise. ) +@rx-match ( str* regex* -> bool^ ) + #01 ;match-multiline STA + #00 ;search-mode STA + ;rx-reset JSR2 + ;loop JMP2 + +@rx-search-multiline ( str* regex* -> bool^ ) + #01 ;match-multiline STA + #01 ;search-mode STA + ,rx-search/main JMP + +@rx-search ( str* regex* -> bool^ ) + #00 ;match-multiline STA + #01 ;search-mode STA + &main STH2 ( s* [r*] ) + DUP2 ;string-start STA2 ( s* [r*] ) + &loop LDAk #00 EQU ,&eof JCN ( s* [r*] ) + ;rx-reset JSR2 ( s* [r*] ) + DUP2 ;search-start STA2 ( s* [r*] ) + DUP2 STH2kr ;loop JSR2 ( s* b^ [r*] ) + ,&found JCN ( s* [r*] ) + INC2 ,&loop JMP ( s+1* [r*] ) + &found POP2 POP2r #01 JMP2r ( 01 ) + &eof ;rx-reset JSR2 ( s* [r*] ) + DUP2 ;search-start STA2 ( s* [r*] ) + STH2r ;loop JMP2 ( b^ ) + +( reset all "runtime" memory allocated during match/search ) +@rx-reset ( -> ) + ;reset-stack JSR2 + ;subgroup-reset JMP2 + +( loop used during matching ) +( ) +( we don't use the return stack here since that ) +( complicates the back-tracking we need to do. ) +( ultimately this code will issue a JMP2r to ) +( return a boolean, which is where the stack ) +( effects signature comes from. ) +@loop ( s* r* -> bool^ ) + LDAk #01 EQU ;do-empty JCN2 + LDAk #02 EQU ;do-dot JCN2 + LDAk #03 EQU ;do-literal JCN2 + LDAk #04 EQU ;do-or JCN2 + LDAk #05 EQU ;do-or JCN2 ( same code as the or case ) + LDAk #06 EQU ;do-caret JCN2 + LDAk #07 EQU ;do-dollar JCN2 + LDAk #08 EQU ;do-lpar JCN2 + LDAk #09 EQU ;do-rpar JCN2 + LDAk #0a EQU ;do-ccls JCN2 + LDAk #0b EQU ;do-ncls JCN2 + LDAk #dd ;unknown-node-type ;error!! JSR2 + +( used when we hit a dead-end during matching. ) +( ) +( if stack is non-empty we have a point we can resume from. ) +@goto-backtrack ( -> bool^ ) + ;stack-exist JSR2 ,&has-stack JCN ( do we have stack? ) + #00 JMP2r ( no, return false ) + &has-stack + ;pop4 JSR2 + ;subgroup-backtrack JSR2 + ;goto-next JMP2 ( yes, resume from the top ) + +( follow the given address (next*) to continue matching ) +@goto-next ( str* next* -> bool^ ) + DUP2 #0000 GTH2 ,&has-next JCN + POP2 LDAk #00 EQU ,&end-of-string JCN + ;search-mode LDA ,&end-of-search JCN + POP2 ;goto-backtrack JMP2 + &end-of-search DUP2 ;search-end STA2 + &end-of-string POP2 #01 JMP2r + &has-next ;loop JMP2 + +( handle the empty node -- just follow the next pointer ) +@do-empty ( str* regex* -> bool^ ) + INC2 LDA2 ( load next ) + ;goto-next JMP2 ( jump to next ) + +( FIXME: not currently used ) +@do-lpar ( str* regex* -> bool^ ) + STH2 DUP2 ( s s [r] ) + INC2r LDA2kr STH2r ( s s i [r+1] ) + ;subgroup-start JSR2 ( s [r+1] ) + STH2r INC2 INC2 ( s r+3 ) + LDA2 ;goto-next JMP2 ( jump to next ) + +( FIXME: not currently used ) +@do-rpar ( str* regex* -> bool^ ) + STH2 DUP2 ( s s [r] ) + INC2r LDA2kr STH2r ( s s i [r+1] ) + ;subgroup-finish JSR2 ( s [r+1] ) + STH2r INC2 INC2 ( s r+3 ) + LDA2 ;goto-next JMP2 ( jump to next ) + +( handle dot -- match any one character ) +@do-dot ( str* regex* -> bool^ ) + INC2 LDA2 STH2 ( load and stash next ) + LDAk #00 NEQ ,&non-empty JCN ( is there a char? ) + &backtrack POP2r POP2 ;goto-backtrack JMP2 ( no, clear stacks and backtrack ) + &non-empty LDAk #0a NEQ ,&match JCN ( yes, match unless \n in search-mode ) + ;search-mode LDA ,&backtrack JCN ( if \n and search-mode, treat as EOF ) + &match INC2 STH2r ;goto-next JMP2 ( on match: inc s, restore and jump ) + +( hande caret -- match string start (or possibly after newline) without advancing ) +@do-caret ( str* regex* -> bool^ ) + INC2 LDA2 STH2 ( load and stash next ) + DUP2 ;string-start LDA2 EQU2 ,&at-start JCN ( at string start? ) + ;match-multiline LDA ,&no-match JCN ( are we in multi-line mode? ) + DUP2 #0001 SUB2 LDA #0a EQU ,&at-start JCN ( just after newline? ) + &no-match POP2r POP2 ;goto-backtrack JMP2 ( clear stacks and backtrack ) + &at-start STH2r ;goto-next JMP2 ( go to next without advancing ) + +( hande dollar -- match string end (or possibly before newline) without advancing ) +@do-dollar ( str* regex* -> bool^ ) + INC2 LDA2 STH2 ( load and stash next ) + LDAk #00 EQU ,&at-end JCN ( at string end? ) + ;match-multiline LDA ,&no-match JCN ( are we in multi-line mode? ) + LDAk #0a EQU ,&at-end JCN ( at newline? ) + &no-match POP2r POP2 ;goto-backtrack JMP2 ( clear stacks and backtrack ) + &at-end STH2r ;goto-next JMP2 ( go to next without advancing ) + +( handle literal -- match one specific character ) +@do-literal ( str* regex* -> bool^ ) + INC2 + LDAk STH ( store c ) + INC2 LDA2 STH2 ROTr ( store next, move c to top ) + LDAk + STHr EQU ,&matches JCN ( do we match this char? ) + POP2r POP2 ;goto-backtrack JMP2 ( no, clear stacks and backtrack ) + &matches + INC2 STH2r ;goto-next JMP2 ( yes, inc s, restore and jump ) + +( handle or -- try the left branch but backtrack to the right if needed ) +( ) +( this also handles asteration, since it ends up having the same structure ) +@do-or ( str* regex* -> bool^ ) + INC2 OVR2 OVR2 #0002 ADD2 ( s r+1 s r+3 ) + LDA2 ;push4 JSR2 ( save (s, right) in the stack for possible backtracking ) + LDA2 ;loop JMP2 ( continue on left branch ) + +@matches-cls ( str* regex* -> bool^ ) + OVR2 LDA ,¬-null JCN + ( needs to have a character to match ) + POP2 POP2 ;goto-backtrack JMP2 + ¬-null + DUP2 INC2 LDA2 STH2 ( str regex [next] ) + OVR2 INC2 STH2 ( str regex [str+1 next] ) + SWP2 LDA STH ( regex [c str+1 next] ) + #0003 ADD2 LDAk #00 SWP #0002 MUL2 ( r+3 len*2 [c str+1 next] ) + SWP2 INC2 STH2k ADD2 STH2r ( r+4+len*2 r+4 [c str+1 next] ) + &loop ( limit addr [c str+1 next] ) + EQU2k ,&missing JCN + LDAk STHkr GTH ,&next1 JCN INC2 + LDAk STHkr LTH ,&next2 JCN ,&found JMP + &next1 INC2 + &next2 INC2 ,&loop JMP + &missing POP2 POP2 POPr ,&negated LDR ,&match JCN + &no-match POP2r POP2r ;goto-backtrack JMP2 + &found POP2 POP2 POPr ,&negated LDR ,&no-match JCN + &match STH2r STH2r ;goto-next JMP2 + [ &negated $1 ] + +( ) +@do-ccls ( str* regex* -> bool^ ) + #00 ,matches-cls/negated STR ,matches-cls JMP + +( ) +@do-ncls ( str* regex* -> bool^ ) + #01 ,matches-cls/negated STR ,matches-cls JMP + +( REGEX PARSING ) + +( do we match across lines? ) +( - should be true when matching ) +( - can be true or false when searching ) +( - affects syntax of . ^ and $ ) +@match-multiline $1 + +( are we in searching mode? ) +( - should be true when searching ) +( - should be false when matching ) +@search-mode $1 + +( ) +@string-start $2 +@search-start $2 +@search-end $2 + +( track the position in the input string ) +@pos $2 + +( track how many levels deep we are in parenthesis ) +@parens $2 + +( how many subgroups have we seen so far? ) +@groupnum $1 + +( read and increment pos ) +@read ( -> c^ ) + ;pos LDA2k ( pos s ) + LDAk STHk #00 EQU ( pos s c=0 [c] ) + ,&is-eof JCN ( pos s [c] ) + INC2 ( pos s+1 [c] ) + SWP2 STA2 ,&return JMP ( [c] ) + &is-eof POP2 POP2 + &return STHr ( c ) + JMP2r + +( is pos currently pointing to a star? ) +@peek-to-star ( -> is-star^ ) + ;pos LDA2 LDA LIT "* EQU JMP2r + +( is pos currently pointing to a plus? ) +@peek-to-plus ( -> is-plus^ ) + ;pos LDA2 LDA LIT "+ EQU JMP2r + +( is pos currently pointing to a qmark? ) +@peek-to-qmark ( -> is-qmark^ ) + ;pos LDA2 LDA LIT "? EQU JMP2r + +( just increment pos ) +@skip + ;pos LDA2 INC2 ;pos STA2 JMP2r + +( TODO: ) +( 1. character groups: [] and [^] ) +( 2. symbolic escapes, e.g. \n ) + +( STRETCH GOALS: ) +( a. ^ and $ ) +( b. counts: {n} and {m,n} ) +( c. substring matching, i.e. searching ) +( d. subgroup extraction ) +( e. back-references, e.g \1 ) +( f. non-capturing groups, e.g. (?:) ) + +( compile an expression string into a regex graph ) +( ) +( the regex will be allocated in the arena; if there is not ) +( sufficient space an error will be thrown. ) +( ) +( the stack will also be used during parsing although unlike ) +( the arena it will be released once compilation ends. ) +@compile ( expr* -> regex* ) + ;pos STA2 + #0000 ;parens STA2 + ;rx-reset JSR2 + ;compile-region JMP2 + +( the basic strategy here is to build a stack of non-or ) +( expressions to be joined together at the end of the ) +( region. each stack entry has two regex addresses: ) +( - the start of the regex ) +( - the current tail of the regex ) +( when we concatenate a new node to a regex we update ) +( the second of these but not the first. ) +( ) +( the bottom of the stack for a given region is denoted ) +( by #ffff #ffff. above that we start with #0000 #0000 ) +( to signal an empty node. ) +@compile-region ( -> r2* ) + #ffff #ffff ;push4 JSR2 ( stack delimiter ) + #0000 #0000 ;push4 JSR2 ( stack frame start ) +@compile-region-loop + ;read JSR2 + DUP #00 EQU ;c-done JCN2 + DUP LIT "| EQU ;c-or JCN2 + DUP LIT ". EQU ;c-dot JCN2 + DUP LIT "^ EQU ;c-caret JCN2 + DUP LIT "$ EQU ;c-dollar JCN2 + DUP LIT "( EQU ;c-lpar JCN2 + DUP LIT ") EQU ;c-rpar JCN2 + DUP LIT "[ EQU ;c-lbrk JCN2 + DUP LIT "] EQU ;c-rbrk JCN2 + DUP LIT "\ EQU ;c-esc JCN2 + DUP LIT "* EQU ;c-star JCN2 + DUP LIT "+ EQU ;c-plus JCN2 + DUP LIT "? EQU ;c-qmark JCN2 + ;c-char JMP2 + +( either finalize the given r0/r1 or else wrap it in ) +( a star node if a star is coming up next. ) +( ) +( we use this look-ahead approach rather than compiling ) +( star nodes directly since the implementation is simpler. ) +@c-peek-and-finalize ( r0* r1* -> r2* ) + ;peek-to-star JSR2 ( r0 r1 next-is-star? ) ,&next-is-star JCN + ;peek-to-plus JSR2 ( r0 r1 next-is-plus? ) ,&next-is-plus JCN + ;peek-to-qmark JSR2 ( r0 r1 next-is-qmark? ) ,&next-is-qmark JCN + ,&finally JMP ( r0 r1 ) + &next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ,&finally JMP + &next-is-plus ;skip JSR2 POP2 ;alloc-plus JSR2 DUP2 ,&finally JMP + &next-is-qmark ;skip JSR2 POP2 ;alloc-qmark JSR2 DUP2 ,&finally JMP + &finally ;push-next JSR2 ;compile-region-loop JMP2 + +( called when we reach EOF of the input string ) +( ) +( as with c-rpar we have to unroll the current level ) +( of the stack, building any or-nodes that are needed. ) +( ) +( this is where we detect unclosed parenthesis. ) +@c-done ( c^ -> r2* ) + POP + ;parens LDA2 #0000 GTH2 ,&mismatched-parens JCN + ;unroll-stack JSR2 POP2 JMP2r + &mismatched-parens ;mismatched-parens ;error!! JSR2 + +( called when we read "|" ) +( ) +( since we defer building or-nodes until the end of the region ) +( we just start a new stack frame and continue. ) +@c-or ( c^ -> r2* ) + POP + #0000 #0000 ;push4 JSR2 + ;compile-region-loop JMP2 + +( called when we read left parenthesis ) +( ) +( this causes us to: ) +( ) +( 1. increment parens ) +( 2. start a new region on the stack ) +( 3. jump to compile-region to start parsing the new region ) +@c-lpar ( c^ -> r2* ) + POP + ;parens LDA2 INC2 ;parens STA2 ( parens++ ) + ;compile-region JMP2 + +( called when we read right parenthesis ) +( ) +( this causes us to: ) +( ) +( 1. check for mismatched parens ) +( 2. decrement parens ) +( 3. unroll the current region on the stack into one regex node ) +( 4. finalize that node and append it to the previous region ) +( 5. continue parsing ) +@c-rpar ( c^ -> r2* ) + POP + ;parens LDA2 #0000 EQU2 ,&mismatched-parens JCN + ;parens LDA2 #0001 SUB2 ;parens STA2 ( parens-- ) + ;unroll-stack JSR2 + ;c-peek-and-finalize JMP2 + &mismatched-parens ;mismatched-parens ;error!! JSR2 + +( doesn't support weird things like []abc] or [-abc] or similar. ) +( doesn't currently handle "special" escapes such as \n ) +@c-lbrk ( c^ -> r2* ) + POP LITr 00 ;pos LDA2 ( pos [0] ) + LDAk LIT "^ NEQ ,&normal JCN INCr INC2 ( pos [negated?^] ) + &normal + #0a STHr ADD ( src* type^ ) + ;arena-pos LDA2 STH2k ( src* type^ dst* [dst*] ) + STA LIT2r 0004 ADD2r ( src* [dst+4] ) + &left-parse ( src* [dst*] ) + LDAk LIT "] EQU ,&done JCN + LDAk LIT "- EQU ,&error JCN + LDAk LIT "\ NEQ ,&left JCN INC2 + &left + LDAk STH2kr STA INC2r + DUP2 INC2 LDA LIT "- NEQ ,&pre-right JCN INC2 INC2 + LDAk LIT "] EQU ,&error JCN + LDAk LIT "- EQU ,&error JCN + &pre-right + LDAk LIT "\ NEQ ,&right JCN INC2 + &right + LDAk STH2kr STA INC2 INC2r ,&left-parse JMP + &done ( src* [dst*] ) + INC2 ;pos STA2 STH2r ( dst* ) + DUP2 ;arena-pos LDA2 ( dst dst a ) + #0004 ADD2 SUB2 #0002 DIV2 NIP ( dst (dst-(a+4))/2 ) + ;arena-pos LDA2 STH2k #0003 ADD2 STA ( dst [a] ) + ;arena-pos STA2 STH2r ( a ) + #0000 OVR2 INC2 STA2 ( a ) + DUP2 ;c-peek-and-finalize JMP2 + &error + #abcd #0000 DIV ( TODO error here ) + +@c-rbrk ( c^ -> r2* ) + POP + #0000 DIV ( invariant: should never be seen ) + +( called when we read "." ) +( ) +( allocates a dot-node and continues. ) +@c-dot ( c^ -> r2* ) + POP + #02 ;alloc3 JSR2 + DUP2 ;c-peek-and-finalize JMP2 + +( called when we read "^" ) +( ) +( allocates a caret-node and continues. ) +@c-caret ( c^ -> r2* ) + POP + #06 ;alloc3 JSR2 + DUP2 ;c-peek-and-finalize JMP2 + +( called when we read "$" ) +( ) +( allocates a dollar-node and continues. ) +@c-dollar ( c^ -> r2* ) + POP + #07 ;alloc3 JSR2 + DUP2 ;c-peek-and-finalize JMP2 + +( called when we read "\" ) +( ) +( handles special sequences: \a \b \t \n \v \f \r ) +( ) +( otherwise, allocates a literal of the next character. ) +@c-esc ( c^ -> r2* ) + POP ;read JSR2 + DUP LIT "a EQU ,&bel JCN + DUP LIT "b EQU ,&bs JCN + DUP LIT "t EQU ,&tab JCN + DUP LIT "n EQU ,&nl JCN + DUP LIT "v EQU ,&vtab JCN + DUP LIT "f EQU ,&ff JCN + DUP LIT "r EQU ,&cr JCN + &default ;c-char JMP2 + &bel POP #07 ,&default JMP + &bs POP #08 ,&default JMP + &tab POP #09 ,&default JMP + &nl POP #0a ,&default JMP + &vtab POP #0b ,&default JMP + &ff POP #0c ,&default JMP + &cr POP #0d ,&default JMP + +( called when we read any other character ) +( ) +( allocates a literal-node and continues. ) +@c-char ( c^ -> r2* ) + ;alloc-lit JSR2 ( lit ) + DUP2 ;c-peek-and-finalize JMP2 + +( called if we parse a "*" ) +( ) +( actually calling this means the code broke an invariant somewhere. ) +@c-star ( c^ -> regex* ) + POP + ;star-invariant ;error!! JSR2 + +( called if we parse a "+" ) +( ) +( actually calling this means the code broke an invariant somewhere. ) +@c-plus ( c^ -> regex* ) + POP + ;plus-invariant ;error!! JSR2 + +( called if we parse a "?" ) +( ) +( actually calling this means the code broke an invariant somewhere. ) +@c-qmark ( c^ -> regex* ) + POP + ;qmark-invariant ;error!! JSR2 + +( ALLOCATING REGEX NDOES ) + +@rx-node-sizes + ( 00 01 02 03 04 05 06 07 08 09 0a 0b ) + [ 00 03 03 04 ] [ 05 05 03 03 ] [ 04 04 00 00 ] + +@alloc3 ( mode^ -> r* ) + #0000 ROT ( 00 00 mode^ ) + #03 ;alloc JSR2 ( 00 00 mode^ addr* ) + STH2k STA ( addr <- mode ) + STH2kr INC2 STA2 ( addr+1 <- 0000 ) + STH2r JMP2r ( return addr ) + +@alloc-empty ( -> r* ) + #01 ;alloc3 JMP2 + +@alloc-lit ( c^ -> r* ) + #03 #0000 SWP2 ( 0000 c^ 03 ) + #04 ;alloc JSR2 ( 0000 c^ 03 addr* ) + STH2k STA ( addr <- 03 ) + STH2kr INC2 STA ( addr+1 <- c ) + STH2kr #0002 ADD2 STA2 ( addr+2 <- 0000 ) + STH2r JMP2r ( return addr ) + +@alloc-or ( right* left* -> r* ) + #05 ;alloc JSR2 STH2 ( r l [x] ) + #04 STH2kr STA ( r l [x] ) + STH2kr INC2 STA2 ( r [x] ) + STH2kr #0003 ADD2 STA2 ( [x] ) + STH2r JMP2r + +@alloc-star ( expr* -> r* ) + #05 ;alloc JSR2 STH2 ( expr [r] ) + #05 STH2kr STA ( expr [r] ) + DUP2 STH2kr INC2 STA2 ( expr [r] ) + #0000 STH2kr #0003 ADD2 STA2 ( expr [r] ) + STH2kr SWP2 ( r expr [r] ) + ;set-next JSR2 ( [r] ) + STH2r JMP2r + +@alloc-plus ( expr* -> r* ) + #05 ;alloc JSR2 STH2 ( expr [r] ) + #05 STH2kr STA ( expr [r] ) + DUP2 STH2kr INC2 STA2 ( expr [r] ) + #0000 STH2kr #0003 ADD2 STA2 ( expr [r] ) + STH2r SWP2 STH2k ( r expr [expr] ) + ;set-next JSR2 ( [expr] ) + STH2r JMP2r + +@alloc-qmark ( expr* -> r* ) + ;alloc-empty JSR2 STH2k ( expr e [e] ) + OVR2 ;set-next JSR2 ( expr [e] ) + #05 ;alloc JSR2 STH2 ( expr [r e] ) + #04 STH2kr STA ( expr [r e] ) + STH2kr INC2 STA2 ( [r e] ) + SWP2r STH2r STH2kr ( e r [r] ) + #0003 ADD2 STA2 ( [r] ) + STH2r JMP2r + +( if r is 0000, allocate an empty node ) +@alloc-if-null ( r* -> r2* ) + ORAk ,&return JCN POP2 ;alloc-empty JSR2 &return JMP2r + +( unroll one region of the parsing stack, returning ) +( a single node consisting of an alternation of ) +( all elements on the stack. ) +( ) +( this unrolls until it hits #ffff #ffff, which it ) +( also removes from the stack. ) +@unroll-stack ( -> start* end* ) + ;pop4 JSR2 STH2 ( r ) + #00 STH ( count items in stack frame ) + ;alloc-if-null JSR2 ( replace 0000 with empty ) + &loop ( r* ) + ;pop4 JSR2 POP2 ( r x ) + DUP2 #ffff EQU2 ( r x x-is-end? ) ,&done JCN + INCr ( items++ ) + ;alloc-or JSR2 ( r|x ) ,&loop JMP + &done + ( r ffff ) + POP2 + STHr ,&is-or JCN + STH2r JMP2r + &is-or + POP2r + ;alloc-empty JSR2 OVR2 OVR2 SWP2 ( r empty empty r ) + ;set-next-or JSR2 + JMP2r + +( add r to the top of the stock. ) +( ) +( in particular, this will write r into tail.next ) +( before replacing tail with r. ) +@push-next ( r0 r1 -> ) + ;pop4 JSR2 ( r0 r1 x0 x1 ) + DUP2 #0000 EQU2 ( r0 r1 x0 x1 x1=0? ) ,&is-zero JCN + STH2 ROT2 STH2r ( r1 x0 r0 x1 ) + ;set-next JSR2 SWP2 ( x0 r1 ) + ;push4 JSR2 + JMP2r + &is-zero POP2 POP2 ;push4 JMP2 + +( load the given address: ) +( ) +( 1. if it points to 0000, update it to target ) +( 2. otherwise, call set-next on it ) +@set-next-addr ( target* addr* -> ) + LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN + LDA2 ;set-next JMP2 + &is-zero STA2 JMP2r + +( set regex.next to target ) +( ) +( node types 1-7 are defined. ) +( ) +( all node types except star (5) and lit (3) store their next ) +( pointer one byte off of their own address. ) +( ) +( since both branches of an or (4) node are supposed to meet ) +( back up we only bother taking the left branch. otherwise ) +( you can end up double-appending things. ) +@set-next ( target* regex* -> ) + LDAk #01 LTH ,&unknown JCN + LDAk #0b GTH ,&unknown JCN + LDAk #09 GTH ,&cc JCN + LDAk #00 SWP ;rx-node-sizes ADD2 + LDA #00 SWP ADD2 #0002 SUB2 + ;set-next-addr JMP2 + &cc INC2 ;set-next-addr JMP2 + &unknown LDAk #ee ;unknown-node-type ;error!! JSR2 + +@set-next-or-addr ( target* addr* -> ) + LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN + LDA2 ;set-next-or JMP2 + &is-zero STA2 JMP2r + +( this is used when first building or-nodes ) +( structure will always be: ) +( [x1, [x2, [x3, ..., [xm, xn]]]] ) +( so we recurse on the right side but not the left. ) +@set-next-or ( target* regex* -> ) + LDAk #04 NEQ ,&!4 JCN + OVR2 OVR2 INC2 ;set-next-addr JSR2 + #0003 ADD2 ;set-next-or-addr JMP2 + &!4 ;set-next JMP2 + +( STACK OPERATIONS ) +( ) +( we always push/pop 4 bytes at a time. the stack has a fixed ) +( maximum size it can use, defined by ;stack-top. ) +( ) +( the stack can be cleared using ;reset-stack, which resets ) +( the stack pointers but does not zero out any memory. ) +( ) +( stack size is 4096 bytes here but is configurable. ) +( in some cases it could be very small but this will limit ) +( how many branches can be parsed and executed. ) + +( push 4 bytes onto the stack ) +@push4 ( str* regex* -> ) + ;assert-stack-avail JSR2 ( check for space ) + ;stack-pos LDA2 #0002 ADD2 STA2 ( cell[2:3] <- regex ) + ;stack-pos LDA2 STA2 ( cell[0:1] <- str ) + ;stack-pos LDA2 #0004 ADD2 ;stack-pos STA2 ( pos += 4 ) + JMP2r + +( pop 4 bytes from the stack ) +@pop4 ( -> str* regex* ) + ;assert-stack-exist JSR2 ( check for space ) + ;stack-pos LDA2 ( load stack-pos ) + #0002 SUB2 LDA2k STH2 ( pop and stash regex ) + #0002 SUB2 LDA2k STH2 ( pop and stash str ) + ;stack-pos STA2 ( save new stack-pos ) + STH2r STH2r ( restore str and regex ) + JMP2r + +( reset stack pointers ) +@reset-stack ( -> ) + ;stack-bot ;stack-pos STA2 JMP2r ( pos <- 0 ) + +( can more stack be allocated? ) +@stack-avail ( -> bool^ ) + ;stack-pos LDA2 ;stack-top LTH2 JMP2r + +( is the stack non-empty? ) +@stack-exist ( -> bool^ ) + ;stack-pos LDA2 ;stack-bot GTH2 JMP2r + +( error if stack is full ) +@assert-stack-avail ( -> ) + ;stack-avail JSR2 ,&ok JCN ;stack-is-full ;error!! JSR2 &ok JMP2r + +( error is stack is empty ) +@assert-stack-exist ( -> ) + ;stack-exist JSR2 ,&ok JCN ;stack-is-empty ;error!! JSR2 &ok JMP2r + +( stack-pos points to the next free stack position (or the top if full). ) +@stack-pos :stack-bot ( the next position to insert at ) + +( stack-bot is the address of the first stack position. ) +( stack-top is the address of the first byte beyond the stack. ) +@stack-bot $800 @stack-top ( holds 512 steps (2048 bytes) ) + +( ARENA OPERATIONS ) +( ) +( the arena represents a heap of memory that can easily be ) +( allocated in small amounts. ) +( ) +( the entire arena can be reclaimed using ;reset-arena, but ) +( unlike systems such as malloc/free, the arena cannot relcaim ) +( smaller amounts of memory. ) +( ) +( the arena is used to allocate regex graph nodes, which are ) +( dynamically-allocated as the regex string is parsed. once ) +( a regex is no longer needed the arena may be reclaimed. ) +( ) +( arena size is 1024 bytes here but is configurable. ) +( smaller sizes would likely be fine but will limit the ) +( overall complexity of regexes to be parsed and executed. ) + +( reclaim all the memory used by the arena ) +@reset-arena ( -> ) + ;arena-bot ;arena-pos STA2 JMP2r + +( currently caller is responsible for zeroing out memory if needed ) +@alloc ( size^ -> addr* ) + #00 SWP ( size* ) + ;arena-pos LDA2 STH2k ADD2 ( pos+size* [pos] ) + DUP2 ;arena-top GTH2 ( pos+size pos+size>top? [pos] ) + ,&error JCN ( pos+size [pos] ) + ;arena-pos STA2 ( pos += size [pos] ) + STH2r JMP2r ( pos ) + &error POP2 POP2r ;arena-is-full ;error!! JSR2 + +@arena-pos :arena-bot ( the next position to allocate ) +@arena-bot $400 @arena-top ( holds up to 1024 bytes ) + +( SUBGROUP OPERATIONS ) +( ) +( subgroups are parts of the input string that are matched by ) +( parenthesized subgroup expressions in a regex. ) +( ) +( for example, (a*)(b*)(c*) has 3 subgroup expressions. ) +( ) +( during matching, subgroups are represented by 5-bytes: ) +( ) +( - byte 1: subgroup index (1-255, 0 is a marker) ) +( - bytes 2-3: absolute address of the start of the subgroup ) +( - bytes 4-5: absolute address of the limit of the subgroup ) +( ) +( this means that to get a null-terminated subgroup string ) +( you will need to copy it somewhere else with enough space, ) +( or else mutate the input string to add a null. ) +( ) +( since input strings themselves are null-terminated, and since ) +( subgroups never include null terminators, we will always have ) +( a valid limit value even for input strings that end at #ffff. ) +( ) +( during regex parsing we will use subgroup-pos to track the ) +( next available subgroup position. ) +( ) +( some regular expressions will write to a subgroup multiple times. ) +( for example when matching ((.)x)+ against "axbx": ) +( ) +( - subgroup 1 will contain "bx" ) +( - subgroup 2 will contain "b" ) +( ) +( this may necessitate backtracking. when matching ((.)x|(.)y)+ ) +( against "axby" we will make the following assignments: ) +( ) +( - position 1: ) +( + start subgroup 1 ) +( + start then finish subgroup 2: "a" ) +( - position 2: ) +( + finish subgroup 1: "ax" ) +( - position 3: ) +( + start subgroup 1 ) +( + start then finish subgroup 2: "b" ) +( - position 4: ) +( + backtrack, reverting subgroup 2 to "a" ) +( - back to position 3 again: ) +( + start then finish subgroup 3: "b" ) +( - position 4 again: ) +( + finish subgruop 1: "by" ) +( ) +( the final subgroups will be: {1: "by", 2: "a", 3: "b"} ) + +@subgroup-start ( s* i^ -> ) + ;subgroup-pos LDA2 STH2k ( s* i^ pos* [pos*] ) + ;subgroup-top LTH2 ,&next JCN #0000 DIV ( too many subgroups ) + &next ( s* i^ [pos*] ) + STH2kr STA + STH2r INC2 STA2 + JMP2r + +@subgroup-finish ( s* i^ -> ) + ;subgroup-pos LDA2 STH2k ( s* i^ pos* [pos*] ) + ;subgroup-top LTH2 ,&next JCN #0000 DIV ( too many subgroups ) + &next ( s* i^ [pos*] ) + STH2kr LDA EQU ,&ok JCN #0000 DIV ( mismatched subgroups ) + &ok ( s* [pos*] ) + STH2kr #0003 ADD2 STA2 + STH2r #0005 ADD2 ;subgroup-pos STA2 + JMP2r + +@subgroup-branch ( -> ) + ;subgroup-pos LDA2 STH2k ( pos* [pos*] ) + ;subgroup-top LTH2 ,&next JCN #0000 DIV ( too many subgroups ) + &next + #00 STH2kr STA ( [*pos] ) + STH2r #0005 ADD2 ;subgroup-pos STA2 + JMP2r + +@subgroup-backtrack ( -> ) + ;subgroup-bot ;subgroup-pos LDA2 ( bot* pos* ) + &loop ( bot* pos* ) + EQU2k ,&done JCN + LDAk #00 EQU ,&done JCN + #0005 SUB2 ,&loop JMP + &done ( bot* pos* ) + NIP2 ;subgroup-pos STA2 + JMP2r + +( does not zero out the memory in question ) +@subgroup-reset ( -> ) + ;subgroup-bot ;subgroup-pos STA2 + JMP2r + +@subgroup-pos :subgroup-bot ( the position of the first unallocated subgroup item ) +@subgroup-bot $280 @subgroup-top ( holds up to 128 subgroup assignments (640 bytes) )