;
; ethflop - a floppy drive emulator over Ethernet
; Copyright (C) 2019-2024 Mateusz Viste
;
; ethflop is a small TSR that hooks itself on INT 13h and emulates a floppy
; drive. All requests for this drive are forwarded through Ethernet towards
; an ethflopd server.
;
; BUILD:
;  nasm -f bin -o ethflop.com ethflop.asm
;
; This software is published under the terms of the MIT license.
;

PROTOVER equ 0x01       ; protocol id
DBG equ 0               ; set to non-zero for enabling debug mode for:
                        ; 1=interrupt  ;  2=int+pktdrv
CPU 8086
org 100h

section .text  ; all goes into code segment

TSRBEGIN:  ; this label is used to compute the TSR size

jmp PROGSTART
STR_SIG db "EFLOP"    ; signature for locating the TSR in memory (5 bytes, so
                      ; the memory block below ends up WORD-aligned)

; ============================================================================
; === CONFIGURATION AND PKT BUFFER MEMORY BLOCK ==============================
; ============================================================================

; I am misusing the PSP location here by storing memory variables and packet
; header values inside PSP's command tail area. The command tail area is 127
; bytes big, which is a waste of space for a TSR, esp. since even the command
; line program won't ever take arguments longer than 25 bytes or so.
STACKLOC equ 0xA0   ; location (offset) of the STACK area (last word address)
VARBLOCK equ 0xA2   ; starting at A2h means my cmd line tail can be up to 33
                    ; bytes long and the vars can hold up to 94 bytes of data
LASTOPSTATUS equ VARBLOCK     ; last op res as returned by int 13h,ah=1 (1 b)
FFU equ VARBLOCK+1            ; for future use (1 byte)
PRVHANDLERBIOS equ VARBLOCK+2 ; prv int 13h handler address (4 bytes, far ptr)
PRVHANDLERDOS equ VARBLOCK+6  ; prv int 13h routine, as known by int 2F,ah=13h
PKTDRVR equ VARBLOCK+10       ; pkt drvr procedure address (4 bytes, far ptr)
PKTDRVRHANDLE equ VARBLOCK+14 ; packet driver handle (2 bytes)
PKTBUFBUSY equ VARBLOCK+16    ; "packet is in buffer" flag (0=empty) (1 byte)
BYTEVAR equ VARBLOCK+17       ; a BYTE variable for general purpose (1 byte)
ORIGSS equ VARBLOCK+18        ; used to save SS on TSR entry, restored on exit
ORIGSP equ VARBLOCK+20        ; used to save SP on TSR entry, restored on exit
                              ; two bytes available for future use here
HDR_BEGIN equ VARBLOCK+24
HDR_DMAC equ HDR_BEGIN        ; destination (server) MAC (6 bytes)
HDR_SMAC equ HDR_BEGIN+6      ; source (local) MAC (6 bytes)
HDR_ETYPE equ HDR_BEGIN+12    ; ethertype: data=0xEFDD, ctrl=0xEFDC (2 bytes)
HDR_PROTOVER equ HDR_BEGIN+14 ; protocol version (1 byte)
HDR_REQID equ HDR_BEGIN+15    ; request's seq number (1 byte)
HDR_FLOPID equ HDR_BEGIN+16   ; current virtual floppy id (8 bytes)
HDR_AX equ HDR_BEGIN+24       ; AX (2 bytes)
HDR_BX equ HDR_BEGIN+26       ; BX (2 bytes)
HDR_CX equ HDR_BEGIN+28       ; CX (2 bytes)
HDR_DX equ HDR_BEGIN+30       ; DX (2 bytes)
HDR_SECTNUM equ HDR_BEGIN+32  ; sect num for multisector READs and WRITEs (1b)
HDR_DRIVEID equ HDR_BEGIN+33  ; emulated drive: 0=A, 1=B (1 byte)
HDR_END equ HDR_BEGIN+34
                              ; two bytes available for future use here
GETCURTICK equ VARBLOCK+60    ; this is a location in the PSP where I put the
                              ; timer routine at startup (max 14 bytes)

COMPUTEPKTCSUM equ VARBLOCK+74 ; this is a location in the PSP where I put the
                               ; cksum routine at startup (max 20 bytes)

PKTDRVR_RECV equ 0x5C         ; pkt drv recv routing will be placed to the FCB
                              ; area of the PSP, there are 36 bytes of RAM
                              ; there that would be otherwise wasted

; === END OF CONFIGURATION BLOCK =============================================


; ============================================================================
; === PACKET DRIVER SENDING ROUTINE ==========================================
; ============================================================================
; sends the frame starting at PKT_DMAC. Applies HDR_ values first and computes
; checksum. Also waits until a valid answer is present in the PKT buffer.
; returns with a clear CF on succes, set CF otherwise.
; this function ALWAYS sets ah=0x80 on exit
PKTDRVR_SEND:
; save registers
push ax
push bx
push cx
push dx
push si
push di
push ds
push es
; set ds to self (ds = cs)
push cs
pop ds
; mark the buffer as 'busy', so the packet driver won't be
; tempted to put some garbage there while I craft my frame.
mov [PKTBUFBUSY], byte 1
; increment reqid
inc byte [HDR_REQID]
; I will use BYTEVAR as a "dirty flag" variable: as long as it is zero, my
; data in pkt_buf did not change, so I can reuse it for retries.
mov [BYTEVAR], byte 0
; copy headers to packet
mov cx, (HDR_END-HDR_BEGIN)/2   ; /2 because I will copy WORDS, NOT BYTES
push ds
pop es
mov di, PKT_DMAC
mov si, HDR_DMAC
cld
rep movsw      ; [ES:DI++] = [DS:SI++], CX times
; compute checksum
call COMPUTEPKTCSUM
mov [PKT_CSUM], bx ; write csum to frame buffer
; AX has timeout detection values (AL="last known tick", AH="wait ticks left")
call GETCURTICK     ; system timer is in BL now
mov al, bl          ; save it to AL...
mov ah, 37          ; how many clock ticks should I wait? 1 tick=55ms
; send the frame out
.SENDPKTOUT:
push ax
mov ah, 4                   ; SendPkt()
mov cx, (PKT_END-PKT_DMAC)  ; how many bytes
mov si, PKT_DMAC
; simulated int
pushf
cli
call far [PKTDRVR]
       ; I can potentially loose packets here. If the remote srv answers super
       ; quick, the pktdrvr routine will see that PKTBUFF is still busy, thus
       ; rejecting the packet. I observed this on a virtual environment where
       ; ethflop (under DOSEMU) is on the same machine as ethflopd and both
       ; communicate through a loopback. As a work-around for this problem,
       ; the ethflopd server delays its answer by 0.5ms, so ethflop has enough
       ; time to prepare.
       ; One may think that a good idea would be to reset PKTBUFBUSY earlier,
       ; that is - before calling pkt_send()... but this would be worse: it
       ; could lead to races! If the pkt driver wanted to feed some weird pkt
       ; just before the send routine is called, ethflop would end up sending
       ; out a copy of the just-received frame instead of its own query.
mov [PKTBUFBUSY], byte 0 ; flag the pkt buffer as 'available'
pop ax
jc short .SENDRETERR ; quit on PKTDRVR fail (neither MOV nor POP modify flags)
%if DBG = 2
mov [DBGCOLOR], byte 2   ; DBG, GREEN (pkt sent)
mov [DBGVALUE], byte 's'
call VGADBG
%endif
; wait for an answer
.WAITSOMEMORE:
; set dx to 0001h so I can use it for cmp/mov [var],0/1 (thx John Kerr-Mudd)
mov dx, 0x0100  ; DH=1  DL=0
; look for timeout (timer different than AL, AH times)
call GETCURTICK     ; system timer is in BL now
; has it changed? (AL=original tick)
cmp bl, al
je short .SKIPTIMERCHECK
; timer changed, decrease allowed ticks and check if I am still allowed to wait
dec ah
jz short .SENDRETERR    ; if timer count expired (AH=0) -> error
mov al, bl              ; update my tick reference for next check

; resend the packet after 5 ticks (ca. 250ms), and then every 8 ticks (440ms),
; ie. each time AH becomes 8-divisible. Since I started it at 37, first resend
; will occur at 32 (5 ticks later), then at 24, 16 and 8.
test ah, 7
jnz short .SKIPTIMERCHECK

; re-send query again only if pkt buf not dirty
cmp [BYTEVAR], dl         ; dl=0 (smaller than cmp [], 0)
jne short .SKIPTIMERCHECK ; dirty buffer, don't resend pkt
inc byte [PKTBUFBUSY]     ; mark pktbuf as busy, so pktdrv does not fill it
; make sure PKTBUFBUSY is exactly 1 - otherwise it was full already
cmp [PKTBUFBUSY], dh      ; dh=1 (smaller than cmp[], 1)
jne short .SKIPTIMERCHECK ; oops, got something in the meantime (yay, I guess)
jmp short .SENDPKTOUT
.SKIPTIMERCHECK:
; monitor [PKTBUFBUSY] for non-zero ('got reply')
cmp [PKTBUFBUSY], dl      ; dl=0
je short .WAITSOMEMORE
; received something: set the 'dirty' var so I know not to resend my pkt
mov [BYTEVAR], dh         ; dh=1
%if DBG = 2
mov [DBGCOLOR], byte 1    ; DBG, BLUE (pkt rcvd)
mov [DBGVALUE], byte 'r'
call VGADBG
%endif
; received something: check that protover, reqid and flopid are matching
mov si, PKT_PROTOVER
mov di, HDR_PROTOVER
mov cx, 5                 ; compare 5 words (10 bytes: PROTOVER+FLOPID+REQID)
repe cmpsw                ; compare CX words at ES:DI and DS:SI
je short .CHECKCKSUM      ; all equal? (success)
; flag the pkt buffer as 'available' and continue waiting
%if DBG = 2
mov [DBGCOLOR], byte 0x40 ; DEBUG ONLY (RED)
mov [DBGVALUE], byte 'H'
call VGADBG
%endif
mov [PKTBUFBUSY], dl      ; dl=0
jmp short .WAITSOMEMORE
.CHECKCKSUM:
; compute and validate csum
call COMPUTEPKTCSUM
cmp bx, [PKT_CSUM]
je short .SENDRET
%if DBG = 2
mov [DBGCOLOR], byte 0x40 ; DEBUG ONLY (RED)
mov [DBGVALUE], byte 'C'  ; DEBUG ONLY (RED)
call VGADBG
%endif
jmp short .WAITSOMEMORE   ; if packet invalid, keep waiting
.SENDRETERR:
stc ; set CF (csum mismatch)
.SENDRET:
; restore registers and return to caller
pop es
pop ds
pop di
pop si
pop dx
pop cx
pop bx
pop ax
; set ah=0x80
mov ah, 0x80
ret

%if DBG != 0
; output debug data to vga
LASTCOL dw 0
DBGVALUE db 0
DBGCOLOR db 0
VGADBG:
add word [cs:LASTCOL], 2
and word [cs:LASTCOL], 127
push bx
push dx
push es
mov bx, 0xB800
mov es, bx              ; es = vga segment
mov bx, [cs:LASTCOL]
mov dx, [cs:DBGVALUE]
mov [es:bx + (12 * 160)], dx   ; bg color + val
pop es
pop dx
pop bx
ret
%endif

; ============================================================================
; === RELAY TO PREV HANDLER (DOS OR BIOS) ====================================
; ============================================================================
RELAYTOPRVHANDLERDOS:      ; TSR code jumps here when I want to hand control
jmp far [cs:PRVHANDLERDOS] ; to the previous DOS handler
RELAYTOPRVHANDLERBIOS:     ; TSR code jumps here when I want to hand control
jmp far [cs:PRVHANDLERBIOS]; to the previous int 13h handler

; ============================================================================
; === THIS IS WHERE THE TSR STARTS WHEN CALLED BY DOS ========================
; ============================================================================
INTHANDLERDOS:
cmp dl, [cs:HDR_DRIVEID]        ; is DL pointing at my drive?
jne short RELAYTOPRVHANDLERDOS  ; not for me, let original handler take care

; ============================================================================
; === THIS IS WHERE THE TSR STARTS WHEN CALLED BY AN INTERRUPT CALL ==========
; ============================================================================
INTHANDLERBIOS:
cmp dl, [cs:HDR_DRIVEID]        ; is DL pointing at my drive?
jne short RELAYTOPRVHANDLERBIOS ; not for me, let original handler take care

; === If I am here, then I will handle this request myself ===================

; reset CFLAG stored on stack (assume success)
push bp    ; there are several ways to achieve this, I initiated a discussion
mov bp, sp ; with some interesting replies on alt.lang.asm - see the thread
           ; "How to modify values placed on the stack?" (27 Sep 2019)
and byte [bp+6], 0xFE ; stack contains bp, ip, cs, flags
pop bp ; restore BP to its original value

; save stack pointers and switch to my own stack block so pktdrvr is happy
mov [cs:ORIGSS], ss
mov [cs:ORIGSP], sp
push cs
pop ss  ; I should do that only after a CLI, but here I am inside an
        ; int handler, so interrupts are disabled already
mov sp, [cs:STACKLOC]

; enable interrupts - this is good for two reasons: allows nested interrupts,
; and makes it possible to use the PIT counter for timeout detection
sti

; clear direction flag, so all rep-like ops always move forward
cld

; pre-fill pkt hdr with registers, as seen at entry point
mov [cs:HDR_AX], ax
mov [cs:HDR_BX], bx
mov [cs:HDR_CX], cx
mov [cs:HDR_DX], dx

%if DBG != 0
; print int called (AH) on screen (YELLOW)
mov [cs:DBGCOLOR], byte 14
mov [cs:DBGVALUE], ah
add [cs:DBGVALUE], byte '@'
call VGADBG
%endif

; identify the int 13h query
test ah, ah            ; ah == 0? (byte shorter than cmp ah, 0)
jz short HANDLERDONE   ; special case: RESET always succeeds, ah=0, nothing to do
cmp ah, 0x01
jne short ACTION_NOT_STATUSLASTOP
; int 13h, ah=1h
mov ah, [cs:LASTOPSTATUS] ; load AH with last status op
jmp short HANDLERDONE
ACTION_NOT_STATUSLASTOP:
cmp ah, 0x02
je short ACTION_READ
cmp ah, 0x03
je short ACTION_WRITE
; unrecognized function -> let the server worry about it
call PKTDRVR_SEND     ; send frame and preset ah=0x80 ("timeout, not read")
jc short HANDLERDONE  ; abort on failure
jmp short HANDLERDONE_GOTPKT


; process the query - set ah to 0 on success errno otherwise, then jmp HANDLERDONE

ACTION_READ: ; int 13h,ah=2h: al=sectors_to_read, ch=cyl, cl=sect, dh=head
and ax, 0x00ff ; ah=0 and test al for zero (query to rd/wr 0 sects -> success)
jz short HANDLERDONE
mov [cs:HDR_SECTNUM], ah ; zero out HDR_SECTNUM (ah=0 here, and that's a byte shorter than using byte 0)
.ACTION_READ_NEXTSECT:
cmp al, [cs:HDR_SECTNUM]  ; do I have any sectors left for read?
je short HANDLERDONE_GOTPKT
call PKTDRVR_SEND     ; send frame and preset ah=0x80 ("timeout, not read")
; abort on failure
jnc short .ACTION_READ_GOTPKT
mov al, [cs:HDR_SECTNUM] ; AL tells "HOW MANY SECTORS HAVE BEEN TRANSFERED"
jmp short HANDLERDONE
.ACTION_READ_GOTPKT:
; did I get a server-side error? (PKT_AH != 0)
test [cs:PKT_AX+1], byte 0xff
jnz short HANDLERDONE_GOTPKT
; all good. write result to es:bx + 512*sect
push es
push ds
push si
push di
push cx
push cs ; ds = cs
pop ds
; recompute the destination pointer to account for sector id displacement
call COMMON_READWRITE_COMPUTE_ES_CX_SI_DI ; ES=HDR_SECTNUM * 32, CX=256, SI=PKT_DATA, DI=BX
rep movsw            ; copy CX words from DS:SI to ES:DI (destroys CX, SI, DI)
inc byte [HDR_SECTNUM]; inc without CS prefix - do it NOW, before DS changes again!
pop cx
pop di
pop si
pop ds
pop es
; proceed to next sector
jmp short .ACTION_READ_NEXTSECT

; jump here once local processing is done: either to HANDLERDONE_GOTPKT if
; PKT_DMAC contains a valid server answer, or straight to HANDLERDONE
; otherwise. In the latter case, ah is expected to contain a valid errno.
; these subroutines can appear to be at an odd place in the code - this is
; to favor usage of short jumps to it.
HANDLERDONE_GOTPKT:
mov ax, [cs:PKT_AX]
mov bx, [cs:PKT_BX]
mov cx, [cs:PKT_CX]
mov dx, [cs:PKT_DX]
HANDLERDONE:
cli ; disable interrupts - I will modify stack pointers so I can't be bothered
; save AH to [LASTOPSTATUS]
mov [cs:LASTOPSTATUS], ah
; switch back to original stack
mov ss, [cs:ORIGSS]
mov sp, [cs:ORIGSP]
; set CF in FLAGS on stack if ah != 0
test ah, ah    ; test for zero
jz short .ALLESGUT
push bp
mov bp, sp
or byte [bp+6], 0x01 ; stack contains bp, ip, cs, flags
pop bp ; restore BP to its original value
.ALLESGUT:
iret ; processing done, return from interrupt

ACTION_WRITE: ; int 13h, ah=3h (write AL sectors at CHS CH:DH:CL from ES:BX)
and ax, 0x00ff ; ah=0 and test al for zero (query to rd/wr 0 sects -> success)
jz short HANDLERDONE
mov [cs:HDR_SECTNUM], ah ; zero out HDR_SECTNUM (ah=0 here, and that's a byte shorter than using byte 0)
.ACTION_WRITE_NEXTSECT:
cmp al, [cs:HDR_SECTNUM]  ; do I have any sectors left for read?
je short HANDLERDONE_GOTPKT
; copy data from ES:BX + 512*sect to PKT_DATA
push es
push ds
push si
push di
push cx
; recompute the destination pointer to account for sector id displacement
call COMMON_READWRITE_COMPUTE_ES_CX_SI_DI ; ES=HDR_SECTNUM * 32, CX=256, SI=PKT_DATA, DI=BX
xchg si, di          ; swap si <--> di  (SI=BX, DI=PKT_DATA)
push es              ; ds = es
pop ds
push cs              ; es = cs
pop es
rep movsw            ; copy CX words from DS:SI to ES:DI (destroys CX, SI, DI)
pop cx
pop di
pop si
pop ds
pop es
call PKTDRVR_SEND     ; send frame and preset ah=0x80 ("timeout, not read")
; abort on failure
jnc short .ACTION_WRITE_GOTPKT
mov al, [cs:HDR_SECTNUM] ; AL tells "HOW MANY SECTORS HAVE BEEN TRANSFERED"
jmp short HANDLERDONE
.ACTION_WRITE_GOTPKT:
; did I get a server-side error? (PKT_AH != 0)
test byte [cs:PKT_AX+1], 0xff
jnz short HANDLERDONE_GOTPKT
; proceed to next sector
inc byte [cs:HDR_SECTNUM]
jmp short .ACTION_WRITE_NEXTSECT

; FUNCTION BELOW IS USED BY BOTH ACTION_READ AND ACTION_WRITE TO RECALCULATE
; ES SO IT POINTS TO A SECTOR POSITION IN A BUFFER
; es = ([cs:HDR_SECTNUM] * 32)
; cx = 512 / 2
; di = bx
; si = PKT_DATA
COMMON_READWRITE_COMPUTE_ES_CX_SI_DI:
push ax
mov al, [cs:HDR_SECTNUM] ; load al with sector id
mov cl, 32
mul cl                   ; ax = al * 32
push es                  ; es += ((COUNTER * 512) / 16)
pop cx                   ; es += ((COUNTER * 512) / 16)
add ax, cx               ; es += ((COUNTER * 512) / 16)
push ax                  ; es += ((COUNTER * 512) / 16)
pop es                   ; es += ((COUNTER * 512) / 16)
mov cx, 256
mov di, bx               ; reuse offset, sector count is handled by segment change
mov si, PKT_DATA         ; origin (DS:SI) is set to CS:PKT_DATA
pop ax
ret


; === PACKET BUFFER, USED FOR _BOTH_ SENDING *AND* RECEIVING FRAMES ==========
; * !!! DAMAGE MITIGATION !!! --> IT IS IMPORTANT THAT THIS BUFFER IS PLACED *
; * HERE, AT THE BOTTOM OF THE TSR CODE AND JUST BEFORE THE TSR STACK AREA.  *
; * IN THE EVENT OF A STACK OVERFLOW, ONLY THE FRAME BUFFER WILL BE DAMAGED, *
; * NOT THE TSR'S EXECUTABLE CODE.                                           *
; ****************************************************************************
PKT_DMAC db "MOJMIR" ; SAME AS HDR_xxx
PKT_SMAC db "MILENA" ; SAME AS HDR_xxx
PKT_ETYPE dw 0       ; SAME AS HDR_xxx
PKT_PROTOVER db 0    ; SAME AS HDR_xxx
PKT_REQID db 0       ; SAME AS HDR_xxx
PKT_FLOPID dw 0,0,0,0; SAME AS HDR_xxx
PKT_AX dw 0          ; AX
PKT_BX dw 0          ; BX
PKT_CX dw 0          ; CX
PKT_DX dw 0          ; DX
PKT_SECTNUM db 0     ; sector number (used for multi-sector READs and WRITEs)
PKT_FFU db 'X'       ; for future use (word-padding)
PKT_DATA:
;PKT_DATA times 512 db 0 ; SECTOR DATA (for WRITE and READ ops)
%include 'str_512.asm'
PKT_CSUM dw 0        ; CHECKSUM (16 bit)
PKT_END:  ; label used for stack positioning, must be at the end of the TSR!


; === TSR ENDS HERE ==========================================================

SRVSIDEQUERIES db "deilnrs" ; arguments that are meant for a srv-side query
SRVSIDEQUERIESLEN equ 7


; ============================================================================
; === INSTALL TSR ============================================================
; ============================================================================
INSTALLTSR:
; preset private stack size of the future TSR to 690 bytes
mov word [STACKLOC], PKT_END + 690
; continuation of cmdline parsing (si is left at the cmdline past 'i'):
; if DS:SI is a space then maybe next word is '$0..$9'? (custom stack size)
lodsb               ; al = [ds:si], si++
cmp al, ' '
jne short .SKIPCUSTSTACK
lodsb               ; al = [ds:si], si++
cmp al, '$'
jne short .SKIPCUSTSTACK
lodsb               ; al = [ds:si], si++
sub al, '0'
jc .SKIPCUSTSTACK    ; if less than 0...
jz .SKIPCUSTSTACK    ; ...or zero
mov bl, 128
mul bl               ; ax = al * 128
; save the private stack setting (offset just above 1st pushed word)
add ax, PKT_END
mov [STACKLOC], ax
.SKIPCUSTSTACK:
; if drive is B: then check if it is not a phantom ("ghost") drive
mov al, [HDR_DRIVEID]
test al, al            ; 0 = A: (for ethflop)
jz short .SKIP_GHOST_CHECK ; if not A: then user wants to install as B:
mov ax, 0x440e         ; IOCTL Query Logical Drive Map
mov bl, 2              ; 2 = B: (for the 0x440e call)
int 0x21
jc short .SKIP_GHOST_CHECK
test al, al            ; al=0 means "no ghost drive for this letter"
jz short .SKIP_GHOST_CHECK
mov ah, 0x09
mov dx, STR_GHOSTERROR
int 21h
mov ax, 0x4C06  ; terminate with error
int 21h

.SKIP_GHOST_CHECK:
; am I hooked already?
call FINDTSR ; returns cfg block in ES:BX, or CF on error
jc short .NOTINSTALLED
; already installed -> abort
mov ah, 0x09
mov dx, STR_ALREADYLOADED
int 21h
mov ax, 0x4C02     ; terminate with error
int 21h

.NOTINSTALLED:
; not installed yet - save previous handler
mov [PRVHANDLERBIOS], bx
mov [PRVHANDLERBIOS+2], es

; find packet driver
call FINDPKTDRVR ; returns pktdrvr ptr in ES:BX, CF set on error (not found)
jnc short .PACKETDRVRFOUND
mov ah, 0x09
mov dx, STR_PKTDRVRNOTFOUND
int 21h
mov ax, 0x4C04  ; terminate with error
int 21h
.PACKETDRVRFOUND:
; write packet driver addr (ES:BX) to PKTDRVR
mov [PKTDRVR], bx
mov [PKTDRVR+2], es
; init packet driver (register a handle)
call PKTDRVRREGISTER
jnc short .PKTDRVINITOK
; init failed
mov ah, 0x09
mov dx, STR_PKTDRVRINITFAIL
int 21h
mov ax, 0x4C05
int 21h
.PKTDRVINITOK: ; init ok, handle acquired

; load local MAC address
mov ah, 6        ; AH=6 is get_addr()
mov bx, [PKTDRVRHANDLE]
push cs  ; es = cs
pop es
mov di, HDR_SMAC     ; where to write the MAC to
mov cx, 6            ; expected length (ethernet = 6 bytes)
; simulate int
pushf
cli
call far [PKTDRVR]

; discover local server (and ask for currently inserted floppy)
mov word [HDR_DMAC], 0xffff
mov word [HDR_DMAC+2], 0xffff
mov word [HDR_DMAC+4], 0xffff
mov byte [PKT_AX], 0    ; ah = 0   'disk reset'
call PKTDRVR_SEND
jnc .SERVERFOUND
; ERROR - server unreachable
call PKTDRVR_RELEASE  ; release handle
mov ah, 0x09
mov dx, STR_SERVERUNREACHABLE
int 0x21
mov ax, 0x4C01 ; quit with error
int 0x21
.SERVERFOUND:
; print text message from server (pkt_data + 0x100)
mov ah, 0x09
mov dx, PKT_DATA + 0x100
int 0x21
; print a cr/lf
mov ah, 0x09
mov dx, STR_CRLF
int 0x21
; save current flopid (server sends it in pkt_data as an answer to reset)
mov ax, [PKT_DATA]
mov [HDR_FLOPID], ax
mov ax, [PKT_DATA+2]
mov [HDR_FLOPID+2], ax
mov ax, [PKT_DATA+4]
mov [HDR_FLOPID+4], ax
mov ax, [PKT_DATA+6]
mov [HDR_FLOPID+6], ax
; save server's MAC to header template
mov ax, [PKT_SMAC]
mov [HDR_DMAC], ax
mov ax, [PKT_SMAC+2]
mov [HDR_DMAC+2], ax
mov ax, [PKT_SMAC+4]
mov [HDR_DMAC+4], ax
; hook myself into the int 13h DOS chain
mov dx, DUMMYHANDLER ; set ES:BX and DS:DX to the dummyhandler to avoid
mov bx, dx           ; horrible things to happen in case someone fires an
push cs              ; int 13h before I'm done
pop ds
push cs
pop es
mov ah, 0x13         ; int 2Fh, ah=0x13 = set/get int 13h vector (DOS 3.3+)
int 0x2f ; prv handler at ds:dx now, BIOS routine at es:bx
mov [cs:PRVHANDLERDOS], dx     ; save the original handler as known by int 2f
mov [cs:PRVHANDLERDOS+2], ds
mov dx, INTHANDLERDOS          ; set DS:DX to my own (new) handler
push cs
pop ds
mov ah, 0x13                   ; call int 2Fh,ah=13h again to finish the setup
int 0x2f
push cs     ; restore DS to a sane value (DS = CS)
pop ds
; hook myself into int 13h (classic approach, but does not make DOS 3.3+ use
; the new vector - this is why the above int 2f mess is necessary)
mov ax, 0x2513         ; DOS 1+, AH=25h, AL=intnum, DS:DX=handler
mov dx, INTHANDLERBIOS ; DS is already same as CS
int 21h

; release my environment block (env seg is at offset 0x2C of the TSR's PSP)
mov es, [0x2C]  ; DS=CS already
mov ah, 0x49    ; free memory (DOS 2+) - ES must contain segment to free
int 21h
xor ax, ax      ; zero out the env seg in PSP, just in case
mov [0x2C], ax

; check TSR's stack: should be above TSR area and below program end
mov ax, [STACKLOC]
cmp ax, PKT_END
ja short .STACK_LO_OK
mov dx, STR_BADSTACKPTR_TOOLO
mov ah, 0x09
int 21h
.STACK_LO_OK:
cmp ax, ENDOFCODE
jb short .STACK_HI_OK
mov dx, STR_BADSTACKPTR_TOOHI
mov ah, 0x09
int 21h
.STACK_HI_OK:

; print message ("tsr loaded, stack size = 0x.... bytes")
mov ah, 0x09
mov dx, STR_LOADED
int 21h
; print the stack size on screen
mov dx, [STACKLOC]
sub dx, PKT_END
mov ax, dx
call PRINT_AX_HEX
mov dx, STR_BYTES
mov ah, 0x09
int 21h

; close all my handles - I won't need them any more and keeping them could
; lead to SFT leaks. Thanks to ECM for this suggestion (and many others)
; http://svardos.org/?p=forum&thread=1725345339
mov bx, word [0x32]    ; get amount of handles from PSP
.NEXTHANDLE:
mov ah, 0x3E           ; "close handle"
dec bx
int 21h
test bx, bx            ; is BX zero?
jnz short .NEXTHANDLE  ; loop for all process handles

; turn into TSR, trimming transient code - ie. everything below PKT_END+STACKSIZE
mov dx, [STACKLOC]
add dx, 15      ; make sure number of paragraphs is enough after rounding
mov cl, 4       ; convert number of bytes into number of paragraphs (/16)
shr dx, cl
mov ax, 0x3100  ; AH=31h (DOS 2+,"TSR")  AL=0 (exitcode)  DX=num of paragraphs to keep resident
int 21h


; ============================================================================
; === HELP SCREEN ============================================================
; ============================================================================
HELP:
mov ah, 0x09
mov dx, STR_HLP0
int 21h
mov dx, STR_HLP1
int 21h
; terminate
mov ax, 0x4C01
int 21h


; ============================================================================
; === THIS IS WHERE THE PROGRAM STARTS WHEN EXECUTED FROM COMMAND LINE =======
; ============================================================================

PROGSTART:

cld    ; clear direction flag so all lodsb-like ops move forward

; first of all - initialize HDR_* fields to defaults using HDR_TEMPLATE
mov cx, (HDR_END-HDR_BEGIN)/2   ; /2 because I will copy WORDS, NOT BYTES
push ds  ; cs == ds already (because TINY model)
pop es   ; now cs == ds == es (in theory DOS did it already, but who knows)
mov di, HDR_BEGIN
mov si, HDR_TEMPLATE
rep movsw      ; [ES:DI+=2] = word [DS:SI+=2], CX times

; now copy the cksum routine to its final place (memory optimization makes me
; do crazy things)
mov cx, (1+COMPUTEPKTCSUM_TEMPLATE_END-COMPUTEPKTCSUM_TEMPLATE)/2 ; +1 because it might not be word-sized
mov di, COMPUTEPKTCSUM ; cs == ds == es already (see above)
mov si, COMPUTEPKTCSUM_TEMPLATE
rep movsw      ; [ES:DI+=2] = word [DS:SI+=2], CX times

; same story for the timer routine
mov cx, (1+GETCURTICK_TEMPLATE_END-GETCURTICK_TEMPLATE)/2 ; +1 because it might not be word-sized
mov di, GETCURTICK ; cs == ds == es already (see above)
mov si, GETCURTICK_TEMPLATE
rep movsw      ; [ES:DI+=2] = word [DS:SI+=2], CX times

; copy the pkt driver receiving routine to the FCB area of the PSP to save on
; RAM when/if the program goes TSR. FCBs are useless anyway
mov cx, (1+PKTDRVR_RECV_TEMPLATE_END-PKTDRVR_RECV_TEMPLATE)/2 ; +1 because it might not be word-sized
mov di, PKTDRVR_RECV ; cs == ds == es already (see above)
mov si, PKTDRVR_RECV_TEMPLATE
rep movsw      ; [ES:DI+=2] = word [DS:SI+=2], CX times

; parse arguments (ignore spaces)
xor cx, cx
mov cl, [80h]
cmp cl, 32      ; is arg len > 32 ?
ja short HELP   ; must be invalid, go to help
test cx, cx     ; is arg len 0 ?
jz short HELP   ; if so, skip this check and go to help right away
mov si, 81h     ; otherwise scan argument for anything that is not a space
.nextbyte:
lodsb  ; load byte at DS:[SI] into AL, increment SI
; convert AL into lower case for further matching
cmp al, 'A'
jb short .locasegood
cmp al, 'Z'
ja short .locasegood
or al, 0x20  ; set char to upcase
.locasegood:
; match action
cmp al, '!'  ; '!' -> stack usage
je short STACK_USAGE
cmp al, 'u'  ; 'u' -> jump to unload
je UNLOADTSR
cmp al, 'a'  ; 'a' -> jump to install (set drive to A:)
mov byte [HDR_DRIVEID], 0
je INSTALLTSR
cmp al, 'b'  ; 'b' -> jump to install (set drive to B:)
mov byte [HDR_DRIVEID], 1
je INSTALLTSR
; test for server-side queries
mov cx, SRVSIDEQUERIESLEN
mov di, SRVSIDEQUERIES
repne scasb   ; cmp al, [ES:DI++]  (repeat CX times or until match)
je short SERVERSIDEQUERY ; do we have a match? yes -> srvside query
; last test - is al a space?
cmp al, ' '
loopz .nextbyte ; if a non-space char is present, print help
; no match? go to hell(p)
jmp HELP


; ============================================================================
; === STACK USAGE DETECTION ==================================================
; ============================================================================
STACK_USAGE:
call FINDTSR                ; TSR seg in ES now
jc short NOTINSTALLED       ; abort if not found
; find where is the difference between my PKT_END onward and TSR's PKT_END
; (PKT_END being the bottom of the stack)
mov di, PKT_END    ; ES:DI = the TSR's stack (old transient code)
mov si, di         ; DS:SI = my transient code (same unless overwritten)
mov cx, [es:STACKLOC]
sub cx, PKT_END    ; compute maximum amount of bytes to check
repe cmpsb         ; now SI (and DI) points at the first byte that is different
mov ax, [es:STACKLOC]
sub ax, si         ; AX = number of stack bytes changed
xchg ax, bx        ; save result in BX
mov ah, 0x09
mov dx, STR_STKUSAGE ; max stack used so far:
int 21h
; print the result (still stored in BX)
mov ax, bx
call PRINT_AX_HEX
mov ah, 0x09
mov dx, STR_BYTES
int 21h
mov ax, 0x4C00     ; quit
int 21h


; ============================================================================
; === EXECUTE A SERVER-SIDE ONLY QUERY =======================================
; ============================================================================
SERVERSIDEQUERY:
call FINDTSR                ; TSR seg in ES now
jc short NOTINSTALLED       ; abort if not found
; look for stack overflow (first bytes after PKT_END should be intact)
mov cx, 4
mov di, PKT_END             ; es:di is the TSR version, ds:si is my version
mov si, di
repe cmpsb                  ; cmp bytes [es:di] and [ds:si] CX times
je short .GOODSTACK
mov ah, 0x09
mov dx, STR_BADSTACK
int 21h
mov ax, 0x4C01
int 21h
.GOODSTACK:
; call stack check done, proceed with the actual query
call SENDSRVQUERY
mov ax, 0x4C00
rcl al, 1    ; set al to 1 if CF set (srv query failed)
int 21h


; ============================================================================
; === Abort because TSR not found ============================================
; ============================================================================
NOTINSTALLED:
mov ah, 0x09
mov dx, STR_NOTINSTALLED
int 21h
; terminate
mov ax, 0x4C03
int 21h


; ============================================================================
; === UNLOAD TSR =============================================================
; ============================================================================
UNLOADTSR:
; findtsr
call FINDTSR                ; TSR seg in ES now
jc short NOTINSTALLED       ; abort if not found
; restore previous int 13h handler
push ds                     ; save DS and ES to stack
push es
mov ah, 0x25                  ; SET INT VECTOR (DOS 1+)
mov dx, [es:PRVHANDLERBIOS]   ; DX (offset of new handler)
mov ds, [es:PRVHANDLERBIOS+2] ; DS (seg of new handler)
int 0x21
; inform DOS through int 2F
mov ah, 0x13
mov dx, [es:PRVHANDLERDOS]
mov ds, [es:PRVHANDLERDOS+2]
push dx   ; save DS:DX to stack, will be needed again in a moment
push ds
int 0x2f
; do it again, otherwise the 'BIOS' ptr at es:bx would be a total mess
pop ds
pop dx
int 0x2f
pop es
pop ds                      ; restore my DS and ES from stack
; unregister the packet driver handle
call PKTDRVR_RELEASE
; free TSR seg
mov ah, 0x49    ; free memory (DOS 2+) - ES must contain segment to free
int 21h
jnc short .UNLOADTSRDONE
; otherwise an error occured
mov ah, 0x09
mov dx, STR_FAILEDFREETSR
int 21h
.UNLOADTSRDONE:
; print msg
mov ah, 0x09
mov dx, STR_UNINSTALLED
int 21h
; terminate
mov ax, 0x4C00
int 21h


; ****************************************************************************
; ****************************************************************************
; * keep functions, strings and templates at the bottom to favor short jumps *
; * to all routines above                                                    *
; ****************************************************************************
; ****************************************************************************


%include "func.asm"
%include "strings.asm"


; ============================================================================
; === STUPID INT 13H HANDLER THAT ALWAYS FAILS ===============================
; ============================================================================
; this is used when installing the TSR through int 2Fh,ah=13h, there is a
; short time when I need to set the vector to any valid jump address in case
; a 13h interrupt would fire.
DUMMYHANDLER:
mov ax, 0x0100  ; ah=1 failure  al=0 in case it was a read query
; set CF on stack
push bp
mov bp, sp
or byte [bp+6], 1   ; stack contains BP, IP, CS and flags
pop bp
iret


; ****************************************************************************
; This is the template for frame's header. It is used at startup to initialize
; the HDR_* fields located in the PSP area to default (sane) values
HDR_TEMPLATE:
.HDR_DMAC db "MONIKA" ; destination (server) MAC (6 bytes)
.HDR_SMAC db "MILAN " ; source (local) MAC (6 bytes)
.HDR_ETYPE dw 0xDDEF  ; ethertype (data = 0xEFDD, control = 0xEFDC)
.HDR_PROTOVER db 1    ; protocol version
.HDR_REQID db 0       ; request's sequence number (answer must have the same)
.HDR_FLOPID dw 0,0,0,0; current virtual floppy id (1st word=0 means none)
.HDR_AX dw 0          ; AX
.HDR_BX dw 0          ; BX
.HDR_CX dw 0          ; CX
.HDR_DX dw 0          ; DX
.HDR_SECTNUM db 0     ; sector number (used for multi-sector READs and WRITEs)
.HDR_FFU db 'X'       ; for future use (word-padding)

; ****************************************************************************
; IMPORTANT: THE FUNCTIONS BELOW ARE NOT TO BE CALLED! THESE ARE TEMPLATES
; THAT ARE COPIED TO PSP AT STARTUP. COMPUTEPKTCSUM AND GETCURTICK MUST BE
; CALLED INSTEAD. THESE ROUTINES SHALL USE EXCLUSIVELY RELATIVE JUMPS,
; OTHERWISE MY CRUDE RELOCATION WILL BREAK THEM!
; ============================================================================
; === CHECKSUM COMPUTATION (CODE MAX 20 BYTES BIG!) ==========================
; ============================================================================
; computes CSUM of packet data starting at PKT_FLOPID, returns CSUM in BX
; destroys BX, CX and SI
COMPUTEPKTCSUM_TEMPLATE:
push ax
mov si, PKT_PROTOVER  ; checksum starts at protover
mov cx, (PKT_CSUM-PKT_PROTOVER)/2  ; this many words (not bytes!)
xor bx, bx     ; bx will contain the resulting csum
.CSUMNEXTWORD:
lodsw          ; AX = [DS:SI], SI += 2
rol bx, 1
xor bx, ax
loop .CSUMNEXTWORD ; repeat CX times (loop is a relative jump, so it's safe)
pop ax
ret
COMPUTEPKTCSUM_TEMPLATE_END:

; ============================================================================
; === GET SHORT TIMER STATUS (CODE MAX 14 BYTES BIG!) =========================
; ============================================================================
; reads the lowest byte of the system timer at 0040:6C and returns it in BL
; destroys BH
GETCURTICK_TEMPLATE:
push ds
xor bx, bx         ; zero out bx
mov ds, bx         ; ds points to seg 0 now
mov bl, [046Ch]    ; read lowest byte of the system 18.2 hz timer
pop ds
ret
GETCURTICK_TEMPLATE_END:

; ============================================================================
; === PACKET DRIVER RECEIVING ROUTINE (CODE MAX 36 BYTES BIG! ================
; === IT IS STORED IN THE FCB AREA OF THE PSP [0x5C..0x7F] ===================
; ============================================================================
; this function is called two times by the packet driver. One time for saying
; that a packet is coming and how big it is, so the application can prepare a
; buffer for it and hand back a recv ptr to the packet driver:
;   ax = 0
;   cx = incoming pkt len (bytes)
;   ...expects to receive a buffer in ES:DI on return (0000:0000 on error)
; Second call tells that the frame has been copied into the recv buffer:
;   ax = 1
;   DS:SI = buffer location where packet awaits
; WARNING: this function must modify ONLY registers ES and DI! Packet drivers
; can get easily confused when any other register (or flag) is modified.
PKTDRVR_RECV_TEMPLATE:
test ax, ax   ; is ax=0? yes: packet on its way, not: packet received
jz short .PKTDRVR_RECV_PREP
mov [cs:PKTBUFBUSY], byte 1   ; mark buffer as 'full' and return
%if DBG = 2
mov [cs:DBGCOLOR], byte 0x50    ; violet
mov [cs:DBGVALUE], byte '.'
call VGADBG
%endif
retf
; packet driver wants to deliver a frame
.PKTDRVR_RECV_PREP:
; do I have available storage?
cmp [cs:PKTBUFBUSY], ah   ; test for zero (ah is guaranteed to be zero here)
jne short .PKTDRVR_RECV_REJECT_BUSY
; is frame len okay?
cmp cx, (PKT_END - PKT_DMAC) ; cx (frame len) must be exactly what I expect
jne short .PKTDRVR_RECV_REJECT_SIZE
; all good - return pkt buffer (cs:PKT_DMAC) in es:di
push cs
pop es
mov di, PKT_DMAC
retf
; reject frame (set es:di to 0000:0000)
.PKTDRVR_RECV_REJECT_SIZE:
%if DBG = 2
mov [cs:DBGCOLOR], byte 0x50    ; violet
mov [cs:DBGVALUE], byte '!'
call VGADBG
%endif
.PKTDRVR_RECV_REJECT_BUSY:
%if DBG = 2
mov [cs:DBGCOLOR], byte 0x50    ; violet
mov [cs:DBGVALUE], byte '%'
call VGADBG
%endif
xor di, di ; a byte shorter than mov di, 0
mov es, di
retf
PKTDRVR_RECV_TEMPLATE_END:


; ****************************************************************************
; The label below is used at install time to make sure that the to of the
; stack fits within the entire program's size (because that's the maximum
; memory I have at my disposal).
ENDOFCODE:
