From a206871a079c5fa5b1946491f1a8f56eae278271 Mon Sep 17 00:00:00 2001 From: d_m Date: Sat, 14 Sep 2024 20:41:13 -0400 Subject: [PATCH] tar listing is working --- tar.tal | 276 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 180 insertions(+), 96 deletions(-) diff --git a/tar.tal b/tar.tal index f673b4f..2dde03f 100644 --- a/tar.tal +++ b/tar.tal @@ -3,18 +3,26 @@ ( by d_m ) ( ) ( currently only supports listing the contents of tar files ) +( ) +( see https://en.wikipedia.org/wiki/Tar_(computing)#UStar_format ) ( File1 is used to read the tar file ) -( File2 is used to write files and directories ) +( TODO File2 is used to write files and directories ) |a0 @File1 [ &vec $2 &ok $2 &stat $2 &del $1 &append $1 &name $2 &len $2 &r $2 &w $2 ] |b0 @File2 [ &vec $2 &ok $2 &stat $2 &del $1 &append $1 &name $2 &len $2 &r $2 &w $2 ] |0100 ;arg-callback ;on-stdin arg/init BRK +( exit normally ) @exit ( code^ -> BRK ) #80 ORA #0f DEO BRK +( exit abnormally ) +@panic ( -> $exit ) + #010e DEO #010f DEO BRK + +( handle all provided command-line arguments ) @arg-callback ( -> ) ;arg/count LDA DUP #00 EQU ?&missing @@ -24,102 +32,104 @@ &toomany ;too-many-arguments &error print ;usage print #01 !exit -@run ( -> ) - #00 arg/read .File1/name DEO2 list #00 !exit +( run the program ) +@run ( -> BRK ) + #00 arg/read .File1/name DEO2 list-entries #00 !exit -@on-stdin ( -> BRK ) BRK - -@usage "usage: 20 "uxncli 20 "tar.rom 20 "FILENAME 0a 00 -@missing-filename "error: 20 "missing 20 "filename 0a 00 -@too-many-arguments "error: 20 "too 20 "many 20 "arguments 0a 00 -@unsupported "unsupported 20 "format 20 00 - -@panic ( -> $exit ) - #010e DEO #010f DEO BRK +( ignore stdin once we've processed the args ) +@on-stdin ( -> BRK ) + BRK +( print a null-terminated string ) @print ( s* -> ) - &loop LDAk #00 EQU ,&eof JCN - LDAk #18 DEO INC2 ,&loop JMP - &eof POP2 JMP2r + &loop LDAk ?{ POP2 JMP2r } + LDAk #18 DEO INC2 !&loop +( print up to `len` bytes from string. stops on NUL. ) +@lprint ( s* len* -> ) + OVR2 ADD2 SWP2 ( limit* s* ) + &loop LDAk ?{ !&done } ( limit* s*) + LDAk #18 DEO INC2 GTH2k ?&loop ( limit* s+1* ) + &done POP2 POP2 JMP2r ( ) + +( read 512 bytes of header for the next tar entry. ) +( assumes .File1/name is already set. ) @read-header ( -> ok^ ) - ( assume .File1/name was already written ) #0200 .File1/len DEO2 ;header .File1/r DEO2 + ( TODO validate checksum ) .File1/ok DEI2 #0200 EQU2 JMP2r -@list ( -> ) - read-header ?&ok JMP2r &ok - ( dump-header ) +( list all the entries in the tar archive ) +@list-entries ( -> ) + read-header ?{ JMP2r } ;header/filename LDA ?&non-null #800f DEO BRK &non-null ;header/type LDA ( type^ ) - DUP #00 EQU ?list-file-v ( ) - DUP LIT "0 EQU ?list-file-v ( ) - DUP LIT "5 EQU ?list-dir-v ( ) + DUP #00 EQU ?list-file-v ( type^ ) + DUP LIT "0 EQU ?list-file-v ( type^ ) + DUP LIT "5 EQU ?list-dir-v ( type^ ) + DUP LIT "7 EQU ?list-file-v ( type^ ) !list-unsupported ( ) +( non-verbose file entry listing ) @list-file ( 00^ -> ) POP LIT "f #18 DEO #2018 DEO - ;header/filename print #0a18 DEO - ;header/size load-octal11 round-up-to-512 skip !list + ;header/filename #0064 lprint #0a18 DEO + ;header/size load-octal11 round-up-to-512 skip !list-entries +( non-verbose directory entry listing ) @list-dir ( 00^ -> ) POP LIT "d #18 DEO #2018 DEO - ;header/filename print #0a18 DEO - !list + ;header/filename #0064 lprint #0a18 DEO + !list-entries +( verbose file entry listing ) @list-file-v ( 00^ -> ) POP LIT "f #18 DEO #2018 DEO -( ;header/mode #0008 dump-mem0 ) -( ;header/owner #0008 dump-mem0 ) -( ;header/group #0008 dump-mem0 ) -( ;header/size #000b dump-mem0 ) -( ;header/mtime #000b dump-mem0 ) ;header/size load-octal11 dump-longer #2018 DEO - ;header/filename print #0a18 DEO - ;header/size load-octal11 round-up-to-512 skip !list + ;header/filename #0064 lprint #0a18 DEO + ;header/size load-octal11 round-up-to-512 skip !list-entries +( verbose directory entry listing ) @list-dir-v ( 00^ -> ) POP LIT "d #18 DEO #2018 DEO -( ;header/mode #0008 dump-mem0 ) -( ;header/owner #0008 dump-mem0 ) -( ;header/group #0008 dump-mem0 ) -( ;header/size #000b dump-mem0 ) -( ;header/mtime #000b dump-mem0 ) ;header/size load-octal11 dump-longer #2018 DEO - ;header/filename print #0a18 DEO - !list + ;header/filename #0064 lprint #0a18 DEO + !list-entries +( handle unsupported directory entry listing ) @list-unsupported ( type^ -> ) - ;unsupported print emit/byte #0a18 DEO !panic - !list + ;unsupported print DUP emit/byte #2018 DEO + LIT2 "[ 18 DEO #18 DEO LIT2 "] 18 DEO + #0a18 DEO + dump-header !panic -( write data from memory into the tar file ) +( TODO write data from memory into the tar file ) @write-memory ( filename* size* data* -> ) STH2 STH2k write-file-header ( [data* size*] ) STH2r STH2r write-file-body JMP2r ( ) .File1/len +( TODO write out the file header ) @write-file-header ( filename* size* -> ) SWP2 ;header/filename copy JMP2r write-size-2 - ( TODO: checksum ) + ( TODO: compute checksum ) LIT "0 ;header/type STA #00 ;header/linkname STA JMP2r +( TODO write file body into archive ) @write-file-body ( size* data* -> ) SWP2 .File1/len DEO2 .File1/w DEO2 JMP2r -@mod ( x* y* -> x%y* ) - DIV2k MUL2 SUB2 JMP2r - +( TODO write file size, limited to 64k ) @write-size-2 ( size* -> ) ;header/size STH2 ( size* [start*] ) LIT2r 000a ADD2r ( size* [start* last*] ) @@ -133,6 +143,8 @@ &done ( zero* [start* pos*] ) POP2 POP2r POP2r JMP2r ( ) +( TODO: copy string from src to dst ) +( TODO: need a length param too ) @copy ( src* dst* -> ) STH2 &loop @@ -140,32 +152,44 @@ POP2 POP2r JMP2r &ok INC2 INC2r !&loop -@read-error "error 20 "reading 20 "data 0a 00 - -( skips n bytes, specified as a 5-byte integer ) +( skips `n` bytes forward in File1, specified as a 5-byte integer ) +( ) +( since we can only actually read 32k at a time, and since we can ) +( only easily work with 2-byte shorts, we first read the lowest ) +( two bytes and skip that much. then, for the higher byte, we can ) +( do two skips of 32k for each unit found. ) @skip ( carry^ hi* lo* -> ) skip-lo ( carry^ hi* ) skip-hi ( carry^ ) ?skip-4g JMP2r ( ) -@skip-4g ( -> ) skip-2g ( fall-through ) +( unconditionally skip 4GiB, that is 4294967296 bytes ) +@skip-4g ( -> ) skip-2g ( >> ) + +( unconditionally skip 2GiB, that is 2147483648 bytes ) @skip-2g ( -> ) #8000 !skip-hi -( skips hi*2^16 bytes ) +( skips `hi*65536` bytes ) +( - 0001 will skip 65536 bytes ) +( - 0010 will skip 1048576 bytes ) +( - ffff will skip 4294901760 bytes ) @skip-hi ( hi* -> ) #0000 SWP2 SUB2 ( -hi* ) &loop ORAk ?&ok POP2 JMP2r ( ) &ok skip-64k INC2 !&loop ( -hi+1* ) -@skip-64k ( -> ) skip-32k ( fall-through ) +( skips exactly 65536 bytes ) +@skip-64k ( -> ) skip-32k ( >> ) + +( skips exactly 32768 bytes ) @skip-32k ( -> ) #8000 !skip-buf +( skip up to 65536 bytes ) @skip-lo ( lo* -> ) - DUP2 #8000 GTH2 ?&double !skip-buf - &double #8000 SUB2 skip-buf !skip-32k + DUP2 #8001 LTH2 ?{ skip-32k #8000 SUB2 } !skip-buf -( skips lo bytes ) -@skip-buf ( lo* -> ) +( skips up to 32768 bytes of; limited by the size of buf ) +@skip-buf ( n* -> ) ORAk ?&non-zero POP2 JMP2r &non-zero DUP2 .File1/len DEO2 ;buffer .File1/r DEO2 @@ -173,18 +197,34 @@ ;read-error print !panic &ok JMP2r +( '0' -> 00 ) +( '1' -> 01 ) +( ... ) +( '7' -> 07 ) +( anything else -> 00 ) @octal-digit ( char^ -> oct^ ) LIT "0 LTHk ?&zero SUB JMP2r &zero POP2 #00 JMP2r ( returns values between #00:0000:0000 and #01:ffff:ffff ) +( ) +( octal11 of 77777777777 = #01 #ffff #ffff, max value ) +( octal11 of 37777777777 = #00 #ffff #ffff ) +( octal11 of 00000177777 = #00 #0000 #ffff ) +( octal11 of 00000000377 = #00 #0000 #00ff ) +( octal11 of 00000000000 = #00 #0000 #0000, min value ) @load-octal11 ( addr* -> carry^ hi* lo* ) INC2k load-octal10 ( addr* hi* lo* ; load addr+1 ) STH2 STH2 ( addr* [lo* hi*] ) - LDA ( LIT "0 SUB ) octal-digit STH2r STH ( octal^ a^ [lo* b^] ) + LDA octal-digit STH2r STH ( octal^ a^ [lo* b^] ) #20 SFT #02 SFT2 STHr STH2r ( carry^ hi* lo* ) JMP2r ( carry^ hi* lo* ) ( returns values between #0000:0000 and #3fff:ffff ) +( ) +( octal10 of 7777777777 = #3fff #ffff, max value ) +( octal10 of 0000177777 = #0000 #ffff ) +( octal10 of 0000000377 = #0000 #00ff ) +( octal10 of 0000000000 = #0000 #0000, min value ) @load-octal10 ( addr* -> hi* lo* ) #0005 OVR2 ADD2 ( addr* addr+5* ) load-octal5 STH2 ( addr* [cd*] ) @@ -194,6 +234,10 @@ JMP2r ( hi* lo* ) ( returns values between #0000 and #7fff ) +( ) +( octal5 of 77777 = #7fff, max value ) +( octal5 of 00377 = #00ff ) +( octal5 of 00000 = #0000, min value ) @load-octal5 ( addr* -> num* ) #1000 LIT2r 0000 ( addr* place* [sum*] ) &loop ( pos* place* [sum*] ) @@ -206,10 +250,10 @@ ( emit 1, 2, 4, or 5 bytes as a decimal number ) @emit - &1+long STH2 STH2 ,&byte JSR STH2r STH2r - &long SWP2 ,&short JSR - &short SWP ,&byte JSR - &byte DUP #04 SFT ,&char JSR + &1+long STH2 STH2 /byte STH2r STH2r + &long SWP2 /short + &short SWP /byte + &byte DUP #04 SFT /char &char #0f AND DUP #09 GTH #27 MUL ADD #30 ADD #18 DEO JMP2r @@ -220,6 +264,9 @@ POP2 DUP2 #ffff EQU2 ?{ INC2 #0000 JMP2r } POP2 INC #0000 #0000 JMP2r +( since 4-byte integers are called `long` values, ) +( i'm calling a 5-byte integer a `longer` value. ) + @dump-longer ( carry^ long** -- ) STH2 STH2 dump-byte STH2r STH2r ( >> ) @dump-long ( long** -- ) @@ -256,45 +303,82 @@ ;header/checksum #0008 dump-mem ;header/type #0001 dump-mem ;header/linkname #0064 dump-mem - LIT2 "- 18 DEOk DEOk DEOk DEOk DEO #0a18 DEO - LIT2 "s 18 DEO #2018 DEO - ;header/size load-octal11 dump-longer #0a18 DEO - LIT2 "t 18 DEO #2018 DEO - ;header/size load-octal11 round-up-to-512 dump-longer #0a18 DEO + LIT2 "+ 18 DEOk DEOk DEOk DEOk DEO #0a18 DEO + ;uheader/ustar #0006 dump-mem + ;uheader/version #0002 dump-mem + ;uheader/owner-name #0020 dump-mem + ;uheader/group-name #0020 dump-mem + ;uheader/major #0008 dump-mem + ;uheader/minor #0008 dump-mem + ;uheader/filename-prefix #009b dump-mem + ;uheader/pad #000c dump-mem LIT2 "- 18 DEOk DEOk DEOk DEOk DEO #0a18 DEO JMP2r -( header/size is 11 octal digits; 12th digit is NUL ) -( octal 77777777777 = #01 #ffff #ffff ) -( octal 37777777777 = #ffff #ffff ) -( octal 00000177777 = #ffff ) -( octal 00000000377 = #ff ) -( octal 00000000000 = #00 ) - -( header/type -- only 0 and 5 are supported ) -( '0' normal file; also could be NUL ) -( '1' hard link ) -( '2' symlink ) -( '3' character device ) -( '4' block device ) -( '5' directory ) -( '6' fifo ) -( '7' contiguous file ) - -( header is always exactly 512 bytes ) -@header - &filename $64 - &mode $8 ( octal ) - &owner $8 ( octal ) - &group $8 ( octal ) - &size $c ( octal ) - &mtime $c ( octal ) - &checksum $8 - &type $1 ( item type ) - &linkname $64 +( some handy string constants ) +@usage "usage: 20 "uxncli 20 "tar.rom 20 "FILENAME 0a 00 +@missing-filename "error: 20 "missing 20 "filename 0a 00 +@too-many-arguments "error: 20 "too 20 "many 20 "arguments 0a 00 +@unsupported "unsupported 20 "format 20 00 +@read-error "error 20 "reading 20 "data 0a 00 ( load argument parser ) ~arg.tal -( buffer for reading up to 32k bytes of data ) +( HEADER DETAILS ) +( ) +( header -- basic v7 tar header ) +( /filename - name of file, null-terminated. if ustar, check prefix ) +( /mode - file permissions, ignored by uxn ) +( /owner - owner ID, ignored by uxn ) +( /group - group ID, ignored by uxn ) +( /size - file size as 11 octal digits. max size is 8GiB. ) +( /mtime - last modification time, ignored by uxn ) +( /checksum - 6 octal digits followed by NUL, then space ) +( /type -- only 0, 5, and 7 are supported ) +( '0' [or NUL] normal file ) +( '1' hard link ) +( '2' symlink ) +( '3' character device ) +( '4' block device ) +( '5' directory ) +( '6' fifo ) +( '7' contiguous file -- treat as normal file ) +( 'g' global extended header with metadata; POSIX.1-2001 ) +( 'x' extended header with metadata for next file; POSIX.1-2001 ) +( /linkname -- for hardlinks, the file containing the linked data ) +( uheader -- ustar header, found in v7 padding bytes ) +( /ustar -- ustar indiciator, should be "ustar " ) +( /version -- ustar version ) +( /owner-name -- owner name string, ignored by uxn ) +( /group-name -- group name string, ignored by uxn ) +( /major -- major device number, ignored by uxn ) +( /minor -- minor device number, ignored by uxn ) +( /filename-prefix -- if non-null, true path is prefix+"/"+filename ) +( ) +( the v7 header is always 512 bytes even if ustar indicator is absent. ) + +@header + &filename $64 ( 0x00: filename, string, 100 byte ) + &mode $8 ( 0x64: mode, octal-1, 8 bytes ) + &owner $8 ( 0x6c: owner, octal-1, 8 bytes ) + &group $8 ( 0x74: group, octal-1, 8 bytes ) + &size $c ( 0x7c: size, octal-1, 12 bytes ) + &mtime $c ( 0x88: mtime, octal-1, 12 bytes ) + &checksum $8 ( 0x94: checksum, octal-2, 8 bytes ) + &type $1 ( 0x9c: file type, 1 byte ) + &linkname $64 ( 0x9d: linked filename, string, 100 bytes ) + ( >> ) +@uheader + &ustar $6 ( 0x101: ustar indicator, non-terminated string, 6 bytes, "ustar " ) + &version $2 ( 0x107: ustar version, non-terminated string, 2 bytes ) + &owner-name $20 ( 0x109: owner name, string, 32 bytes ) + &group-name $20 ( 0x129: group name, string, 32 bytes ) + &major $8 ( 0x149: device major number, octal? ) + &minor $8 ( 0x151: device minor number, octal? ) + &filename-prefix $9b ( 0x159: prefix before filename, string, 155 bytes ) + &pad $c ( 0x1f4: padding, 12 bytes ) + &end ( 0x200: end of header ) + +( buffer for reading up to 32k bytes of data at a time ) |8000 @buffer $8000