!**************************************************************************
!*
!* Boot-ROM-Code to load an operating system across a TCP/IP network.
!*
!* Module:  inflate.S
!* Purpose: Decompress the rom image
!* Entries: inflate
!*
!**************************************************************************
!*
!* Copyright (C) 1995-1998 Gero Kuhlmann <gero@gkminix.han.de>
!*
!*  This program is free software; you can redistribute it and/or modify
!*  it under the terms of the GNU General Public License as published by
!*  the Free Software Foundation; either version 2 of the License, or
!*  any later version.
!*
!*  This program is distributed in the hope that it will be useful,
!*  but WITHOUT ANY WARRANTY; without even the implied warranty of
!*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!*  GNU General Public License for more details.
!*
!*  You should have received a copy of the GNU General Public License
!*  along with this program; if not, write to the Free Software
!*  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
!*


!
!**************************************************************************
!
! Load assembler macros.
!
#include <macros.inc>
#include <memory.inc>


!
!**************************************************************************
!
! Definitions
!
BUFSIZE		equ	8192			! text buffer size
PRESENSE	equ	256			! pre-sense buffer size
THRESHOLD	equ	2			! buffer threshold

NCHAR		equ	512 - THRESHOLD + 1	! number of character codes
SONSIZE		equ	NCHAR * 2 - 1		! size of son table
ROOT		equ	SONSIZE - 1		! root position

ENDOF		equ	256			! code to indicate end of input
MAXFREQ		equ	$8000			! maximum frequency code
!
! All translation tables are stored in the data segment. However, putting
! them into the BSS segment adds too much overhead to the rom startup code
! so we define their offsets here. These offsets have to be added to the
! pointer indicating the end of the data area, which is globally stored in
! the BP register.
!
INITAB		equ	0			! ptr to initialization table
TEXTBUF		equ	INITAB + 8		! ptr to text buffer
PLEN		equ	TEXTBUF + BUFSIZE	! ptr to p_len
DLEN		equ	PLEN + 64		! ptr to d_len
CODETAB		equ	DLEN + 256		! ptr to code table
FREQ		equ	CODETAB + 256		! ptr to frequency table
PRNT		equ	FREQ + (SONSIZE + 1) * 2	! ptr to parent node tab
SON		equ	PRNT + (SONSIZE + NCHAR) * 2	! ptr to son node table
TBLSIZE		equ	SON + SONSIZE * 2		! size of all tables


!
!**************************************************************************
!
	.text

	public	inflate

	extrn	_end_of_text
	extrn	getbyte
	extrn	putbyte


!
!**************************************************************************
!
! Define local variables
!
txtptr:	.word	BUFSIZE - PRESENSE	! offset into text buffer
bitbuf:	.word	0			! input bit buffer
bitlen:	.byte	0			! input bit length
cflag:	.byte	0			! flag indicating corrupted input


!
!**************************************************************************
!
! Get next number of bits from compressed input stream
! Input:  CL  -  number of bits to get
! Output: AX  -  result
!         Carry flag set if error
! Registers changed: AX, CX
!
getnbits:

	push	dx
	push	cx
	mov	ax,bitbuf
	mov	dl,bitlen
	cmp	dl,#8
	ja	getnb2

! Get next character from compressed input stream and store it into the
! bit buffer

	call	getbyte			! get next byte
	jnz	getnb1
	inc	byte ptr cflag		! increment cflag if at end of file
getnb1:	xor	ah,ah
	mov	cl,#8
	sub	cl,dl
	shl	ax,cl			! update bit buffer
	or	ax,bitbuf
	add	dl,#8			! update bit buffer length

! Get the required number of bits from the bit buffer

getnb2:	pop	cx
	sub	dl,cl
	mov	bitlen,dl		! update bit buffer length
	mov	dx,ax
	shl	dx,cl
	mov	bitbuf,dx

	mov	dx,#$FFFF		! compute bit mask
	shl	dx,cl
	not	dx

	mov	ch,cl
	mov	cl,#16
	sub	cl,ch			! shift bit buffer to put required
	shr	ax,cl			! bits into the lower position
	and	ax,dx
	pop	dx

	cmp	byte ptr cflag,#2	! set carry flag
	cmc
	ret


!
!**************************************************************************
!
! Setup huffman decoding tables
! Input:  none
! Output: none
! Registers changed: AX, BX, CX, DX, SI, DI
!
starthuff:

	xor	si,si
	lea	di,PLEN[bp]
start1:
#ifdef IS386
	movzx	cx,byte ptr INITAB[bp+si]
#else
	mov	cl,INITAB[bp+si]
	xor	ch,ch
#endif
	inc	si
	mov	ax,si
	rep				! fill p_len table with defaults
	stosb
	cmp	si,#7
	jbe	start1

	lea	ax,PLEN[bp]
	mov	dx,di
	sub	dx,ax
	xor	si,si
	lea	di,DLEN[bp]
	lea	bx,CODETAB[bp]
start2:	mov	cx,#8
	sub	cl,PLEN[bp+si]
	mov	ax,#1
	shl	ax,cl
	mov	cx,ax
start3:	mov	ax,si
	mov	[bx],al			! fill code table
	mov	al,PLEN[bp+si]
	stosb				! fill d_len table with length of
	inc	bx			! every code in code table
	loop	start3
	inc	si
	cmp	si,dx
	jb	start2

	xor	ax,ax
	mov	cx,#NCHAR
	mov	dx,#SONSIZE
	mov	di,bp
	lea	si,(PRNT + SONSIZE * 2)[bp]
start5:	mov	word ptr FREQ[di],#1
	mov	SON[di],dx		! fill up frequency, son and parent
	mov	[si],ax			! tables for leaf entries
	add	di,#2
	add	si,#2
	inc	dx
	inc	ax
	loop	start5

	xor	dx,dx
	mov	si,bp
	lea	di,(FREQ + NCHAR * 2)[bp]
	mov	cx,#NCHAR
start6:	cmp	cx,#ROOT
	ja	start7
	mov	ax,(FREQ+0)[si]
	add	ax,(FREQ+2)[si]
	stosw
	mov	(SON - FREQ - 2)[di],dx
	mov	(PRNT+0)[si],cx		! fill up frequency, son and parent
	mov	(PRNT+2)[si],cx		! tables for root entries
	add	dx,#2
	add	si,#4
	inc	cx
	jmp	start6

start7:	xor	ax,ax
	mov	word ptr (FREQ + (SONSIZE * 2))[bp],#$FFFF
	mov	word ptr (PRNT + (ROOT * 2))[bp],ax
	ret


!
!**************************************************************************
!
! Update huffman decoding tables - dont ask me how this works, or to say
! it with the original AT&T UNIX source code: you are not supposed
! to understand this ;-))
! Input:  DX  -  last read compressed input value
! Output: none
! Registers changed: AX, BX, CX, DX, SI, DI
!
update:

	push	dx
	cmp	word ptr (FREQ + (ROOT * 2))[bp],#MAXFREQ
#ifdef IS386
	jne	near ptr upd20
#else
	je	upd1
	jmp	near ptr upd20
#endif

! Correct leaf node into first half, and set these frequencies to (freq+1)/2

upd1:	mov	si,bp
	mov	di,bp
	mov	bx,#SON
	mov	cx,#SONSIZE
upd2:	cmp	word ptr [bx+si],#SONSIZE
	jb	upd3
	mov	ax,FREQ[si]
	inc	ax
	shr	ax,#1
	mov	FREQ[di],ax
	mov	ax,[bx+si]
	mov	[bx+di],ax
	add	di,#2
upd3:	add	si,#2
	loop	upd2

! Build tree. Link sons first.

	xor	dx,dx			! i = DX
	mov	cx,#NCHAR		! j = CX
upd4:	cmp	cx,#SONSIZE
	jae	upd12

	mov	si,dx
	shl	si,#1
	lea	bx,FREQ[bp]		! &freq[0] = BX
	mov	ax,[bx+si]
	add	ax,2[bx+si]
	mov	si,cx
	shl	si,#1
	mov	[bx+si],ax		! f = AX

	mov	si,cx
	dec	si			! k*2 = SI
	shl	si,#1
upd5:	cmp	ax,[bx+si]
	jae	upd6
	sub	si,#2
	jmp	upd5
upd6:	add	si,#2

	push	ax
	mov	di,cx			! e = SI
	shl	di,#1			! p = DI
upd7:	cmp	di,si
	jbe	upd8
	mov	ax,-2[bx+di]
	mov	[bx+di],ax
	sub	di,#2
	jmp	upd7
upd8:	pop	ax			! f = AX
	mov	[bx+si],ax

	lea	bx,SON[bp]		! &son[0] = BX
	mov	di,cx			! e = SI
	shl	di,#1			! p = DI
upd9:	cmp	di,si
	jbe	upd10
	mov	ax,-2[bx+di]
	mov	[bx+di],ax
	sub	di,#2
	jmp	upd9
upd10:	mov	[bx+si],dx

	add	dx,#2
	inc	cx
	jmp	upd4

! Link parents

upd12:	xor	dx,dx			! i = DX
	lea	bx,PRNT[bp]		! &prnt[0] = BX
	lea	si,SON[bp]		! &son[i*2] = SI
upd13:	cmp	dx,#SONSIZE
	jae	upd20
	mov	di,[si]			! k*2 = DI
	shl	di,#1
	mov	[bx+di],dx
	cmp	di,#SONSIZE * 2
	jae	upd14
	mov	2[bx+di],dx
upd14:	add	si,#2
	inc	dx
	jmp	upd13

! Order frequencies

upd20:	pop	si
	shl	si,#1
	mov	dx,(PRNT + SONSIZE * 2)[bp+si]

upd21:	mov	si,dx
	shl	si,#1
	lea	bx,FREQ[bp+si]
	inc	word ptr [bx]
	mov	ax,[bx]			! k = AX

! Swap nodes when it becomes a wrong frequency order

	cmp	ax,2[bx]
	jbe	upd30
	lea	si,4[bx]		! p = SI
	mov	bx,ax			! k = BX
upd22:	lodsw
	cmp	bx,ax
	ja	upd22
	mov	cx,si
	lea	ax,(FREQ+4)[bp]
	sub	cx,ax			! l = CX
	mov	di,cx			! l*2 = DI
	shr	cx,#1

	xchg	bx,-4[si]
	mov	si,dx			! c*2 = SI
	shl	si,#1
	mov	FREQ[bp+si],bx
	mov	si,SON[bp+si]
	mov	ax,si			! i = AX
	shl	si,#1
	mov	(PRNT+0)[bp+si],cx
	cmp	ax,#SONSIZE
	jae	upd23
	mov	(PRNT+2)[bp+si],cx
upd23:	xchg	ax,SON[bp+di]
	mov	si,ax			! j = AX
	shl	si,#1			! j*2 = SI
	mov	(PRNT+0)[bp+si],dx
	cmp	ax,#SONSIZE
	jae	upd24
	mov	(PRNT+2)[bp+si],dx
upd24:	mov	si,dx			! c*2 = SI
	shl	si,#1
	mov	SON[bp+si],ax
	mov	dx,cx

upd30:	mov	si,dx
	shl	si,#1
	mov	dx,PRNT[bp+si]		! loop until we reach the root
	or	dx,dx
	jnz	upd21
	ret


!
!**************************************************************************
!
! Inflate the compressed image. This routine assumes DS = ES = SS.
! Input:  none
! Output: AL  -  0=no error, -1=error
! Registers changed: AX, BX, CX, DX, SI, DI
!
inflate:

	cld
	push	bp
	mov	bp,#_end_of_text + $000F	! let BP point to actual BSS
	and	bp,#$FFF0			! adjust to segment boundary
	mov	di,bp
	xor	ax,ax
	mov	cx,#TBLSIZE / 2
	rep					! clear out all tables
	stosw

; Read the initialization table from the input file

	call	getbyte
	jz	infle1
	mov	bl,al			! read first word of input file into
	call	getbyte			! BX
	jz	infle1
	mov	bh,al

	lea	di,INITAB+0[bp]		! DI points into initialization table
	mov	cx,#1
	mov	dx,#$01FF		! bit mask in DH
infl1:	mov	al,bl
	and	al,dh			! mask off significant bits
	stosb
	shr	bx,cl			! get next set of bits from init word
	shl	dx,#1			! adjust bit mask
	inc	cx
	cmp	cx,#5
	jbe	infl1			! continue with next bit set
	call	getbyte			! get last initialization byte
	jz	infle1
	and	al,dh			! and save the relevant bits
	stosb

! Compute summaries from initialization table

	lea	si,INITAB+0[bp]
	xor	bx,bx
	xor	dx,dx
	xor	ah,ah
infl2:	lodsb
	add	bx,ax			! addup all bytes in initialization tbl
	shl	dx,#1
	add	dx,ax
	loop	infl2

#ifdef IS186
	shl	dx,#2
#else
	shl	dx,#1
	shl	dx,#1
#endif
	mov	ax,#62			! compute remaining size of d_len table
	sub	ax,bx
	jl	infle1			! should not be negative
	mov	bx,ax

	mov	ax,#256			! compute remaining size of p_len table
	sub	ax,dx
	sbb	ax,bx
	jl	infle1			! should not be negative

	or	ah,bh			! both should not be larger than 255
	jnz	infle1
	stosb				! save p_len into initialization table
	sub	bx,ax
	jl	infle1
	mov	[di],bl			! save difference into init table

! Check that the initialization table is correct. Its sum has to be 256.

	lea	si,INITAB+0[bp]
	mov	cx,#8
	xor	bx,bx
infl3:	lodsb				! load every byte of the init table
	xor	ah,ah
	shl	ax,cl			! shift value according to its position
	rcr	ax,#1
	add	bx,ax			! add it up and continue with next
	loop	infl3			! value
	cmp	bx,#256			! sum has to be exactly 256
	je	infl4
infle1:	jmp	near ptr infl8

! Setup the huffman decoding tables

infl4:	call	starthuff		! setup huffman decoding table
	lea	di,TEXTBUF[bp]
	mov	cx,txtptr		! clear text buffer
	mov	al,#$20
	rep
	stosb

! Start the main loop reading all of the compressed data

infl5:	mov	dx,(SON + ROOT*2)[bp]	! get root
infl6:	cmp	dx,#SONSIZE		! check if leaf reached
	jae	infl7
	mov	cl,#1			! get next bit from compressed image
	call	getnbits
	jc	infle1			! check if eof reached
	add	ax,dx			! get pointer to next son
	mov	si,ax
	shl	si,#1
	mov	dx,SON[bp+si]		! get next son
	jmp	infl6

infl7:	xor	ax,ax
	sub	dx,#SONSIZE		! compute value of son
	cmp	dx,#ENDOF		! check if end of file reached
	je	infl9

	push	dx
	call	update			! update huffman tables
	pop	ax

! Put output code into decompressed image

	cmp	ax,#256			! check if its a output code
	jae	infl11
	mov	si,txtptr
	mov	TEXTBUF[bp+si],al	! put character into text table
	inc	si
	and	si,#BUFSIZE - 1		! increment text pointer
	mov	txtptr,si
	call	putbyte			! write char into decompressed image
infl10:	jmp	infl5			! continue with next character

! Decode upper 6 bits from table and copy decompressed characters from text
! text buffer into destination image

infl11:	push	ax
	mov	cl,#8			! get next byte from compressed image
	call	getnbits
	jc	infle2			! check if eof reached
	mov	si,ax
#ifdef IS386
	movzx	dx,byte ptr CODETAB[bp+si]
#else
	mov	dl,CODETAB[bp+si]	! get translation code
	xor	dh,dh
#endif
#ifdef IS186
	shl	dx,#7
#else
	mov	cl,#7			! convert conversion code into offset
	shl	dx,cl			! into text buffer
#endif
	mov	cl,DLEN[bp+si]		! get number of bits telling offset
	dec	cl			! of string in text buffer
	shl	ax,cl
	mov	bx,ax
	call	getnbits		! get index
	jc	infle2			! check if eof reached
	or	ax,bx
	and	ax,#$007F		! compute final index into text buffer
	or	ax,dx
	inc	ax
	mov	si,txtptr
	mov	di,si
	sub	si,ax
	pop	cx
	sub	cx,#256 - THRESHOLD	! compute final length

	lea	bx,TEXTBUF[bp]
infl12:	and	si,#BUFSIZE - 1		! wrap text buffer pointers at the end
	and	di,#BUFSIZE - 1
	mov	al,[bx+si]		! get next character from text buffer
	mov	[bx+di],al		! and store it into new position
	call	putbyte			! write it into decompressed image
	inc	si
	inc	di			! continue with next character
	loop	infl12
	and	di,#BUFSIZE - 1
	mov	txtptr,di		! update new text pointer
	jmp	near ptr infl5		! continue with next compressed byte

! Thats it, return to caller

infle2:	pop	ax
infl8:	mov	al,#-1			! return with error
infl9:	pop	bp
	ret


!
!**************************************************************************
!
	end
