nxu/tar.tal

440 lines
15 KiB
Tal
Raw Normal View History

2023-07-22 13:36:49 -04:00
( tal.tal )
( )
( by d_m )
( )
2024-09-14 20:41:13 -04:00
( see https://en.wikipedia.org/wiki/Tar_(computing)#UStar_format )
2024-09-17 22:44:29 -04:00
( )
( TODO: )
2024-09-25 23:13:04 -04:00
( - check for "ustar" header )
( - handle filename-prefix )
( - handle 'L' entries )
2024-09-17 22:44:29 -04:00
( - support creating archives )
( - arg validation should depend on mode )
( - validate checksums )
( - better error messages on unsupported files, e.g. symlinks )
( - better usage message )
( - support using "-" for stdin/stdout? )
2024-09-25 23:13:04 -04:00
( - option to ignore symlinks? )
2024-09-17 22:44:29 -04:00
( File1 is used to read/write the tar file )
( File2 is used to read/write files and directories )
2023-07-22 13:36:49 -04:00
|a0 @File1 [ &vec $2 &ok $2 &stat $2 &del $1 &append $1 &name $2 &len $2 &r $2 &w $2 ]
|b0 @File2 [ &vec $2 &ok $2 &stat $2 &del $1 &append $1 &name $2 &len $2 &r $2 &w $2 ]
|0100
;arg-callback ;on-stdin arg/init BRK
2024-09-14 20:41:13 -04:00
( exit abnormally )
@panic ( -> $exit )
#010e DEO #010f DEO BRK
( handle all provided command-line arguments )
2023-07-22 13:36:49 -04:00
@arg-callback ( -> )
;arg/count LDA
2024-09-17 22:34:19 -04:00
DUP #00 EQU ?&missing-mode
DUP #01 EQU ?&missing-file
#02 GTH ?&toomany !run
&missing-mode ;missing-mode !&error
&missing-file ;missing-filename !&error
2023-07-22 13:36:49 -04:00
&toomany ;too-many-arguments
&error print ;usage print #01 !exit
2024-09-14 20:41:13 -04:00
( run the program )
@run ( -> BRK )
2024-09-17 22:34:19 -04:00
#01 arg/read .File1/name DEO2
#00 arg/read LDA
DUP LIT "t NEQ ?{ list-entries #00 !exit }
DUP LIT "x NEQ ?{ expand-entries #00 !exit }
POP ;invalid-mode print ;usage print #00 !exit
( exit normally )
@exit ( code^ -> BRK )
#80 ORA #0f DEO BRK
2023-07-22 13:36:49 -04:00
2024-09-14 20:41:13 -04:00
( ignore stdin once we've processed the args )
@on-stdin ( -> BRK )
BRK
2023-07-22 13:36:49 -04:00
2024-09-14 20:41:13 -04:00
( print a null-terminated string )
2023-07-22 13:36:49 -04:00
@print ( s* -> )
2024-09-14 20:41:13 -04:00
&loop LDAk ?{ POP2 JMP2r }
LDAk #18 DEO INC2 !&loop
( print up to `len` bytes from string. stops on NUL. )
@lprint ( s* len* -> )
OVR2 ADD2 SWP2 ( limit* s* )
2024-09-17 22:46:25 -04:00
&loop LDAk ?{ !&done } ( limit* s* )
2024-09-14 20:41:13 -04:00
LDAk #18 DEO INC2 GTH2k ?&loop ( limit* s+1* )
&done POP2 POP2 JMP2r ( )
( read 512 bytes of header for the next tar entry. )
( assumes .File1/name is already set. )
2023-07-22 13:36:49 -04:00
@read-header ( -> ok^ )
#0200 .File1/len DEO2
;header .File1/r DEO2
2024-09-14 20:41:13 -04:00
( TODO validate checksum )
2023-07-22 13:36:49 -04:00
.File1/ok DEI2 #0200 EQU2 JMP2r
2024-09-14 20:41:13 -04:00
( list all the entries in the tar archive )
@list-entries ( -> )
read-header ?{ JMP2r }
2023-07-22 13:36:49 -04:00
;header/filename LDA ?&non-null
#800f DEO BRK
&non-null
2024-12-31 14:34:04 -05:00
;header/type LDA ( type^ )
DUP #00 EQU ?list-file ( type^ )
DUP LIT "0 EQU ?list-file ( type^ )
DUP LIT "5 EQU ?list-dir ( type^ )
DUP LIT "7 EQU ?list-file ( type^ )
DUP LIT "x EQU ?list-posix-meta ( type^ )
!list-unsupported
@list-posix-meta ( type^ -> )
2023-07-22 13:36:49 -04:00
POP
2024-12-31 14:34:04 -05:00
;header/size load-octal11 STH2 ( s0^ s1* [s2*] )
ORA ORA ?&meta-too-big
2023-07-22 13:36:49 -04:00
2024-12-31 14:34:04 -05:00
&meta-too-big
POP2r ;meta-too-big print
;header/size dump-longer #0a18 DEO
!list-unsupported
2023-07-22 13:36:49 -04:00
2024-09-25 23:13:04 -04:00
( verbose file entry listing )
2024-12-31 14:34:04 -05:00
@list-unsupported ( type^ -> )
;unsupported print
#18 DEO #2018 DEO
2024-09-25 23:13:04 -04:00
;header/size load-octal11 dump-longer #2018 DEO
2024-12-31 14:34:04 -05:00
;header/filename sanitize-path
#0064 lprint #0a18 DEO
;header/size load-octal11 round-up-to-512 skip !list-entries
2024-09-25 23:13:04 -04:00
2024-09-14 20:41:13 -04:00
( verbose file entry listing )
2024-12-31 14:34:04 -05:00
@list-file ( type^ -> )
2024-09-14 01:22:04 -04:00
POP
LIT "f #18 DEO #2018 DEO
;header/size load-octal11 dump-longer #2018 DEO
2024-09-14 20:41:13 -04:00
;header/filename #0064 lprint #0a18 DEO
;header/size load-octal11 round-up-to-512 skip !list-entries
2024-09-14 01:22:04 -04:00
2024-09-14 20:41:13 -04:00
( verbose directory entry listing )
2024-12-31 14:34:04 -05:00
@list-dir ( 00^ -> )
2024-09-14 01:22:04 -04:00
POP
LIT "d #18 DEO #2018 DEO
;header/size load-octal11 dump-longer #2018 DEO
2024-09-14 20:41:13 -04:00
;header/filename #0064 lprint #0a18 DEO
!list-entries
2024-09-14 01:22:04 -04:00
2024-09-17 22:34:19 -04:00
( expand a .tar archive in the current working directory )
@expand-entries ( -> )
read-header ?{ JMP2r }
;header/filename LDA ?&non-null
#800f DEO BRK
&non-null
;header/type LDA ( type^ )
DUP #00 EQU ?expand-file ( type^ )
DUP LIT "0 EQU ?expand-file ( type^ )
DUP LIT "5 EQU ?expand-dir ( type^ )
DUP LIT "7 EQU ?expand-file ( type^ )
2024-09-27 22:49:12 -04:00
!expand-unsupported
2024-09-17 22:34:19 -04:00
( remove leading / of an absolute path )
@sanitize-path ( s* -> s1* )
LDAk LIT "/ NEQ JMP INC2 JMP2r
2024-09-17 22:44:29 -04:00
@remainder-512 ( n* -> extra* )
2024-09-25 23:13:04 -04:00
#01ff AND2 #0200 DUP2 ROT2 SUB2 NEQ2k ?{ POP2 #0000 } NIP2 JMP2r
2024-09-17 22:34:19 -04:00
@expand-file ( type^ -> )
POP
;header/filename sanitize-path
DUP2 #0064 lprint #0a18 DEO
.File2/name DEO2
;header/size load-octal11 STH2k write
2024-09-17 22:44:29 -04:00
STH2r remainder-512 skip-lo !expand-entries
2024-09-17 22:34:19 -04:00
@expand-dir ( type^ -> )
POP
;header/filename sanitize-path
DUP2 #0064 lprint #0a18 DEO
.File2/name DEO2
#0004 .File2/len DEO2
2024-09-17 22:44:29 -04:00
#0001 .File2/w DEO2
2024-09-17 22:34:19 -04:00
!expand-entries
2024-09-27 22:49:12 -04:00
@expand-unsupported ( type^ -> )
2024-12-31 14:34:04 -05:00
;unsupported print
2024-09-27 22:49:12 -04:00
#18 DEO LIT2 ": 18 DEO #2018 DEO
;header/filename sanitize-path
2024-12-31 14:34:04 -05:00
#0064 lprint #0a18 DEO
;header/size load-octal11 round-up-to-512 skip !expand-entries
2024-09-27 22:49:12 -04:00
2024-09-18 21:07:42 -04:00
( src and dst should be paths )
@compress-entries ( src* dst* -> )
POP2 POP2 JMP2r
2024-09-17 22:34:19 -04:00
( writes `n` bytes from File1 to File2 )
( uses a 32k internal buffer )
@write ( carry^ hi* lo* -> )
2024-09-18 21:07:42 -04:00
;write-buf ;write-lo/writer STA2
write-lo write-hi ?write-4g JMP2r
2024-09-17 22:34:19 -04:00
( writes up to 32768 bytes of; limited by the size of buf )
@write-buf ( n* -> )
ORAk ?{ POP2 JMP2r } ( n* )
DUP2 .File1/len DEO2 ( n* )
;buffer .File1/r DEO2 ( n* )
DUP2 .File1/ok DEI2 EQU2 ?&ok ( n* )
POP2 ;read-error print !panic ( )
&ok ( n* )
DUP2 .File2/len DEO2 ( n* )
;buffer .File2/w DEO2 ( n* )
.File2/ok DEI2 EQU2 ?&ok2 ( )
;write-error print !panic ( )
&ok2 JMP2r ( )
2024-09-14 20:41:13 -04:00
( skips `n` bytes forward in File1, specified as a 5-byte integer )
( )
( since we can only actually read 32k at a time, and since we can )
( only easily work with 2-byte shorts, we first read the lowest )
( two bytes and skip that much. then, for the higher byte, we can )
( do two skips of 32k for each unit found. )
2023-07-22 13:36:49 -04:00
@skip ( carry^ hi* lo* -> )
2024-09-18 21:07:42 -04:00
;skip-buf ;write-lo/writer STA2
write-lo write-hi ?write-4g JMP2r
2023-07-22 13:36:49 -04:00
2024-09-25 23:13:04 -04:00
@skip-lo ( lo* -> )
;skip-buf ;write-lo/writer STA2 !write-lo
2024-09-14 20:41:13 -04:00
( skips up to 32768 bytes of; limited by the size of buf )
@skip-buf ( n* -> )
2024-09-18 21:07:42 -04:00
ORAk ?{ POP2 JMP2r }
2023-07-22 13:36:49 -04:00
DUP2 .File1/len DEO2
;buffer .File1/r DEO2
.File1/ok DEI2 EQU2 ?&ok
;read-error print !panic
&ok JMP2r
2024-09-18 21:07:42 -04:00
( unconditionally write 4GiB, that is 4294967296 bytes )
@write-4g ( -> ) write-2g ( >> )
( unconditionally write 2GiB, that is 2147483648 bytes )
@write-2g ( -> ) #8000 !write-hi
( writes `hi*65536` bytes )
( - 0001 will write 65536 bytes )
( - 0010 will write 1048576 bytes )
( - ffff will write 4294901760 bytes )
@write-hi ( hi* -> )
#0000 SWP2 SUB2 ( -hi* )
&loop ORAk ?&ok POP2 JMP2r ( )
&ok write-64k INC2 !&loop ( -hi+1* )
( writes exactly 65536 bytes )
@write-64k ( -> ) write-32k ( >> )
( writes exactly 32768 bytes )
@write-32k ( -> ) #8000 ( >> )
( write up to 65536 bytes )
@write-lo ( lo* -> )
DUP2 #8001 LTH2 ?{ write-32k #8000 SUB2 } LIT2 [ &writer =write-buf ] JMP2
2024-09-14 20:41:13 -04:00
( '0' -> 00 )
( '1' -> 01 )
( ... )
( '7' -> 07 )
( anything else -> 00 )
2023-07-22 13:36:49 -04:00
@octal-digit ( char^ -> oct^ )
2024-09-18 21:07:42 -04:00
LIT "0 SUB DUP #08 LTH ?{ POP #00 } JMP2r
2023-07-22 13:36:49 -04:00
( returns values between #00:0000:0000 and #01:ffff:ffff )
2024-09-14 20:41:13 -04:00
( )
( octal11 of 77777777777 = #01 #ffff #ffff, max value )
( octal11 of 37777777777 = #00 #ffff #ffff )
( octal11 of 00000177777 = #00 #0000 #ffff )
( octal11 of 00000000377 = #00 #0000 #00ff )
( octal11 of 00000000000 = #00 #0000 #0000, min value )
2023-07-22 13:36:49 -04:00
@load-octal11 ( addr* -> carry^ hi* lo* )
2024-09-14 01:22:04 -04:00
INC2k load-octal10 ( addr* hi* lo* ; load addr+1 )
2023-07-22 13:36:49 -04:00
STH2 STH2 ( addr* [lo* hi*] )
2024-09-14 20:41:13 -04:00
LDA octal-digit STH2r STH ( octal^ a^ [lo* b^] )
2023-07-22 13:36:49 -04:00
#20 SFT #02 SFT2 STHr STH2r ( carry^ hi* lo* )
JMP2r ( carry^ hi* lo* )
( returns values between #0000:0000 and #3fff:ffff )
2024-09-14 20:41:13 -04:00
( )
( octal10 of 7777777777 = #3fff #ffff, max value )
( octal10 of 0000177777 = #0000 #ffff )
( octal10 of 0000000377 = #0000 #00ff )
( octal10 of 0000000000 = #0000 #0000, min value )
2023-07-22 13:36:49 -04:00
@load-octal10 ( addr* -> hi* lo* )
#0005 OVR2 ADD2 ( addr* addr+5* )
load-octal5 STH2 ( addr* [cd*] )
load-octal5 SWPr STHr ( ab* c^ [d^] )
#10 SFT #01 SFT2 STH SWPr ( a^ b>>1^ [lo*] )
#10 SFT #01 SFT2 STH2r ( hi* lo* )
JMP2r ( hi* lo* )
( returns values between #0000 and #7fff )
2024-09-14 20:41:13 -04:00
( )
( octal5 of 77777 = #7fff, max value )
( octal5 of 00377 = #00ff )
( octal5 of 00000 = #0000, min value )
2023-07-22 13:36:49 -04:00
@load-octal5 ( addr* -> num* )
#1000 LIT2r 0000 ( addr* place* [sum*] )
&loop ( pos* place* [sum*] )
OVR2 LDA octal-digit ( addr* place* digit^ [sum*] )
#00 SWP ( addr* place* digit* [sum*] )
OVR2 MUL2 STH2 ADD2r ( addr* place* [sum2=sum+place*digit*] )
SWP2 INC2 SWP2 ( addr+1* place* [sum2*] )
#03 SFT2 ORAk ?&loop ( addr+1* place>>3* [sum2*] )
POP2 POP2 STH2r JMP2r ( sum2* )
2023-08-17 09:06:23 -04:00
( emit 1, 2, 4, or 5 bytes as a decimal number )
2024-12-31 14:34:04 -05:00
( @emit
2024-09-14 20:41:13 -04:00
&1+long STH2 STH2 /byte STH2r STH2r
&long SWP2 /short
&short SWP /byte
&byte DUP #04 SFT /char
2023-08-17 09:06:23 -04:00
&char #0f AND DUP #09 GTH #27 MUL ADD #30 ADD #18 DEO
2024-12-31 14:34:04 -05:00
JMP2r )
2023-08-17 09:06:23 -04:00
( round a given 5-byte size up to multiples of 512 )
2024-09-14 01:22:04 -04:00
@round-up-to-512 ( carry^ hi* lo* -> chl^** )
DUP2 #01ff AND2 ORA ?{ JMP2r }
DUP2 #fe00 GTH2 ?{ #0200 OVR2 #01ff AND2 SUB2 ADD2 JMP2r }
POP2 DUP2 #ffff EQU2 ?{ INC2 #0000 JMP2r }
POP2 INC #0000 #0000 JMP2r
2024-09-14 20:41:13 -04:00
( since 4-byte integers are called `long` values, )
( i'm calling a 5-byte integer a `longer` value. )
2024-09-14 01:22:04 -04:00
@dump-longer ( carry^ long** -- )
STH2 STH2 dump-byte STH2r STH2r ( >> )
@dump-long ( long** -- )
SWP2 dump-short ( >> )
@dump-short ( short* -- )
SWP dump-byte ( >> )
@dump-byte ( byte^ -- )
DUP #04 SFT /hex #0f AND ( >> )
&hex #30 ADD DUP #39 GTH #27 MUL ADD #18 DEO
JMP2r
@dump-mem ( start* size* -> )
OVR2 ADD2 SWP2 ( lim* start* )
LDAk dump-byte INC2 ( lim* start+1* )
&loop GTH2k ?&ok POP2 POP2 #0a18 DEO JMP2r ( lim^ pos^ )
&ok #2018 DEO LDAk dump-byte INC2 !&loop ( lim^ pos+1^ )
@dump-header ( -> )
LIT2 "- 18 DEOk DEOk DEOk DEOk DEO #0a18 DEO
;header/filename #0064 dump-mem
;header/mode #0008 dump-mem
;header/owner #0008 dump-mem
;header/group #0008 dump-mem
;header/size #000c dump-mem
;header/mtime #000c dump-mem
;header/checksum #0008 dump-mem
;header/type #0001 dump-mem
;header/linkname #0064 dump-mem
2024-09-14 20:41:13 -04:00
LIT2 "+ 18 DEOk DEOk DEOk DEOk DEO #0a18 DEO
;uheader/ustar #0006 dump-mem
;uheader/version #0002 dump-mem
;uheader/owner-name #0020 dump-mem
;uheader/group-name #0020 dump-mem
;uheader/major #0008 dump-mem
;uheader/minor #0008 dump-mem
;uheader/filename-prefix #009b dump-mem
;uheader/pad #000c dump-mem
2024-09-14 01:22:04 -04:00
LIT2 "- 18 DEOk DEOk DEOk DEOk DEO #0a18 DEO
JMP2r
2023-08-17 09:06:23 -04:00
2024-09-14 20:41:13 -04:00
( some handy string constants )
2024-09-17 22:44:29 -04:00
@usage "usage: 20 "uxncli 20 "tar.rom 20 "t|x 20 "FILENAME 0a 00
2024-09-17 22:34:19 -04:00
@missing-mode "error: 20 "missing 20 "mode 0a 00
2024-09-14 20:41:13 -04:00
@missing-filename "error: 20 "missing 20 "filename 0a 00
@too-many-arguments "error: 20 "too 20 "many 20 "arguments 0a 00
2024-09-17 22:34:19 -04:00
@invalid-mode "error: 20 "invalid 20 "mode 0a 00
2024-09-14 20:41:13 -04:00
@read-error "error 20 "reading 20 "data 0a 00
2024-09-17 22:34:19 -04:00
@write-error "error 20 "writing 20 "data 0a 00
2024-12-31 14:34:04 -05:00
@unsupported "skipped 20 "unsupported 20 "type 20 00
@meta-too-big "extended 20 "metadata 20 "field 20 "too 20 "big: 20 00
2023-07-22 13:36:49 -04:00
( load argument parser )
~arg.tal
2024-09-14 20:41:13 -04:00
( HEADER DETAILS )
( )
( header -- basic v7 tar header )
( /filename - name of file, null-terminated. if ustar, check prefix )
( /mode - file permissions, ignored by uxn )
( /owner - owner ID, ignored by uxn )
( /group - group ID, ignored by uxn )
( /size - file size as 11 octal digits. max size is 8GiB. )
( /mtime - last modification time, ignored by uxn )
( /checksum - 6 octal digits followed by NUL, then space )
( /type -- only 0, 5, and 7 are supported )
( '0' [or NUL] normal file )
( '1' hard link )
( '2' symlink )
( '3' character device )
( '4' block device )
( '5' directory )
( '6' fifo )
( '7' contiguous file -- treat as normal file )
( 'g' global extended header with metadata; POSIX.1-2001 )
( 'x' extended header with metadata for next file; POSIX.1-2001 )
2024-09-25 23:13:04 -04:00
( 'A' solaris ACL )
( 'D' gnu dump dir )
( 'E' solaris extended attrs )
( 'I' inode metadata )
( 'K' this entry's data is the long link location of next file, null-terminated? )
( 'L' this entry's data is the long filename of next file, null-terminated )
( 'M' continuation file )
( 'N' old gnu for long names )
( 'S' sparse files )
( 'V' tape/volume header )
( 'X' extended attrs, sun )
2024-09-14 20:41:13 -04:00
( /linkname -- for hardlinks, the file containing the linked data )
( uheader -- ustar header, found in v7 padding bytes )
( /ustar -- ustar indiciator, should be "ustar " )
( /version -- ustar version )
( /owner-name -- owner name string, ignored by uxn )
( /group-name -- group name string, ignored by uxn )
( /major -- major device number, ignored by uxn )
( /minor -- minor device number, ignored by uxn )
( /filename-prefix -- if non-null, true path is prefix+"/"+filename )
( )
( the v7 header is always 512 bytes even if ustar indicator is absent. )
@header
&filename $64 ( 0x00: filename, string, 100 byte )
&mode $8 ( 0x64: mode, octal-1, 8 bytes )
&owner $8 ( 0x6c: owner, octal-1, 8 bytes )
&group $8 ( 0x74: group, octal-1, 8 bytes )
&size $c ( 0x7c: size, octal-1, 12 bytes )
&mtime $c ( 0x88: mtime, octal-1, 12 bytes )
&checksum $8 ( 0x94: checksum, octal-2, 8 bytes )
&type $1 ( 0x9c: file type, 1 byte )
&linkname $64 ( 0x9d: linked filename, string, 100 bytes )
( >> )
@uheader
&ustar $6 ( 0x101: ustar indicator, non-terminated string, 6 bytes, "ustar " )
&version $2 ( 0x107: ustar version, non-terminated string, 2 bytes )
&owner-name $20 ( 0x109: owner name, string, 32 bytes )
&group-name $20 ( 0x129: group name, string, 32 bytes )
&major $8 ( 0x149: device major number, octal? )
&minor $8 ( 0x151: device minor number, octal? )
&filename-prefix $9b ( 0x159: prefix before filename, string, 155 bytes )
&pad $c ( 0x1f4: padding, 12 bytes )
&end ( 0x200: end of header )
2024-12-31 14:34:04 -05:00
( up to 8 bytes for long size )
|77f8 @long-size $8
( buffer for up to 2048 characters of long names/paths )
|7800 @long-buf $800
2024-09-25 23:13:04 -04:00
2024-09-14 20:41:13 -04:00
( buffer for reading up to 32k bytes of data at a time )
2023-07-22 13:36:49 -04:00
|8000 @buffer $8000