1;; -----------------------------------------------------------------------
2;;
3;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
4;;   Copyright 2009 Intel Corporation; author: H. Peter Anvin
5;;
6;;   This program is free software; you can redistribute it and/or modify
7;;   it under the terms of the GNU General Public License as published by
8;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
9;;   Boston MA 02111-1307, USA; either version 2 of the License, or
10;;   (at your option) any later version; incorporated herein by reference.
11;;
12;; -----------------------------------------------------------------------
13
14;;
15;; pm.inc
16;;
17;; Functions to enter and exit 32-bit protected mode, handle interrupts
18;; and cross-mode calls.
19;;
20;; PM refers to 32-bit flat protected mode; RM to 16-bit real mode.
21;;
22
23		bits 16
24		section .text16
25;
26; _pm_call: call PM routine in low memory from RM
27;
28;	on stack	= PM routine to call (a 32-bit address)
29;
30;	ECX, ESI, EDI passed to the called function;
31;	EAX = EBP in the called function points to the stack frame
32;	which includes all registers (which can be changed if desired.)
33;
34;	All registers and the flags saved/restored
35;
36;	This routine is invoked by the pm_call macro.
37;
38_pm_call:
39		pushfd
40		pushad
41		push ds
42		push es
43		push fs
44		push gs
45		mov bp,sp
46		mov ax,cs
47		mov ebx,.pm
48		mov ds,ax
49		jmp enter_pm
50
51		bits 32
52		section .textnr
53.pm:
54		; EAX points to the top of the RM stack, which is EFLAGS
55		test RM_FLAGSH,02h		; RM EFLAGS.IF
56		jz .no_sti
57		sti
58.no_sti:
59		call [ebp+4*2+9*4+2]		; Entrypoint on RM stack
60		mov bx,.rm
61		jmp enter_rm
62
63		bits 16
64		section .text16
65.rm:
66		pop gs
67		pop fs
68		pop es
69		pop ds
70		popad
71		popfd
72		ret 4		; Drop entrypoint
73
74;
75; enter_pm: Go to PM with interrupt service configured
76;	EBX	  = PM entry point
77;	EAX = EBP = on exit, points to the RM stack as a 32-bit value
78;	ECX, EDX, ESI, EDI preserved across this routine
79;
80;	Assumes CS == DS
81;
82; This routine doesn't enable interrupts, but the target routine
83; can enable interrupts by executing STI.
84;
85		bits 16
86		section .text16
87enter_pm:
88		cli
89		xor eax,eax
90		mov ds,ax
91		mov ax,ss
92		mov [RealModeSSSP],sp
93		mov [RealModeSSSP+2],ax
94		movzx ebp,sp
95		shl eax,4
96		add ebp,eax		; EBP -> top of real-mode stack
97		cld
98		call enable_a20
99
100.a20ok:
101		mov byte [bcopy_gdt.TSS+5],89h	; Mark TSS unbusy
102
103		lgdt [bcopy_gdt]	; We can use the same GDT just fine
104		lidt [PM_IDT_ptr]	; Set up the IDT
105		mov eax,cr0
106		or al,1
107		mov cr0,eax		; Enter protected mode
108		jmp PM_CS32:.in_pm
109
110		bits 32
111		section .textnr
112.in_pm:
113		xor eax,eax		; Available for future use...
114		mov fs,eax
115		mov gs,eax
116		lldt ax
117
118		mov al,PM_DS32		; Set up data segments
119		mov es,eax
120		mov ds,eax
121		mov ss,eax
122
123		mov al,PM_TSS		; Be nice to Intel's VT by
124		ltr ax			; giving it a valid TR
125
126		mov esp,[PMESP]		; Load protmode %esp
127		mov eax,ebp		; EAX -> top of real-mode stack
128		jmp ebx			; Go to where we need to go
129
130;
131; enter_rm: Return to RM from PM
132;
133;	BX	= RM entry point (CS = 0)
134;	ECX, EDX, ESI, EDI preserved across this routine
135;	EAX	clobbered
136;	EBP	reserved
137;
138; This routine doesn't enable interrupts, but the target routine
139; can enable interrupts by executing STI.
140;
141		bits 32
142		section .textnr
143enter_rm:
144		cli
145		cld
146		mov [PMESP],esp		; Save exit %esp
147		jmp PM_CS16:.in_pm16	; Return to 16-bit mode first
148
149		bits 16
150		section .text16
151.in_pm16:
152		mov ax,PM_DS16		; Real-mode-like segment
153		mov es,ax
154		mov ds,ax
155		mov ss,ax
156		mov fs,ax
157		mov gs,ax
158
159		lidt [RM_IDT_ptr]	; Real-mode IDT (rm needs no GDT)
160		xor dx,dx
161		mov eax,cr0
162		and al,~1
163		mov cr0,eax
164		jmp 0:.in_rm
165
166.in_rm:					; Back in real mode
167		lss sp,[cs:RealModeSSSP]	; Restore stack
168		movzx esp,sp		; Make sure the high bits are zero
169		mov ds,dx		; Set up sane segments
170		mov es,dx
171		mov fs,dx
172		mov gs,dx
173		jmp bx			; Go to whereever we need to go...
174
175		section .data16
176		alignz 4
177
178		extern __stack_end
179PMESP		dd __stack_end		; Protected-mode ESP
180
181PM_IDT_ptr:	dw 8*256-1		; Length
182		dd IDT			; Offset
183
184;
185; This is invoked on getting an interrupt in protected mode.  At
186; this point, we need to context-switch to real mode and invoke
187; the interrupt routine.
188;
189; When this gets invoked, the registers are saved on the stack and
190; AL contains the register number.
191;
192		bits 32
193		section .textnr
194pm_irq:
195		pushad
196		movzx esi,byte [esp+8*4] ; Interrupt number
197		inc dword [CallbackCtr]
198		mov ebx,.rm
199		jmp enter_rm		; Go to real mode
200
201		bits 16
202		section .text16
203.rm:
204		pushf			; Flags on stack
205		call far [cs:esi*4]	; Call IVT entry
206		mov ebx,.pm
207		jmp enter_pm		; Go back to PM
208
209		bits 32
210		section .textnr
211.pm:
212		dec dword [CallbackCtr]
213		jnz .skip
214		call [core_pm_hook]
215.skip:
216		popad
217		add esp,4		; Drop interrupt number
218		iretd
219
220;
221; Initially, the core_pm_hook does nothing; it is available for the
222; threaded derivatives to run the scheduler, or examine the result from
223; interrupt routines.
224;
225		global core_pm_null_hook
226core_pm_null_hook:
227		ret
228
229		section .data16
230		alignz 4
231		global core_pm_hook
232core_pm_hook:	dd core_pm_null_hook
233
234		bits 16
235		section .text16
236;
237; Routines to enable and disable (yuck) A20.  These routines are gathered
238; from tips from a couple of sources, including the Linux kernel and
239; http://www.x86.org/.  The need for the delay to be as large as given here
240; is indicated by Donnie Barnes of RedHat, the problematic system being an
241; IBM ThinkPad 760EL.
242;
243
244		section .data16
245		alignz 2
246A20Ptr		dw a20_dunno
247
248		section .bss16
249		alignb 4
250A20Test		resd 1			; Counter for testing A20 status
251A20Tries	resb 1			; Times until giving up on A20
252
253		section .text16
254enable_a20:
255		pushad
256		mov byte [cs:A20Tries],255 ; Times to try to make this work
257
258try_enable_a20:
259
260;
261; First, see if we are on a system with no A20 gate, or the A20 gate
262; is already enabled for us...
263;
264a20_none:
265		call a20_test
266		jnz a20_done
267		; Otherwise, see if we had something memorized...
268		jmp word [cs:A20Ptr]
269
270;
271; Next, try the BIOS (INT 15h AX=2401h)
272;
273a20_dunno:
274a20_bios:
275		mov word [cs:A20Ptr], a20_bios
276		mov ax,2401h
277		pushf				; Some BIOSes muck with IF
278		int 15h
279		popf
280
281		call a20_test
282		jnz a20_done
283
284;
285; Enable the keyboard controller A20 gate
286;
287a20_kbc:
288		mov dl, 1			; Allow early exit
289		call empty_8042
290		jnz a20_done			; A20 live, no need to use KBC
291
292		mov word [cs:A20Ptr], a20_kbc	; Starting KBC command sequence
293
294		mov al,0D1h			; Write output port
295		out 064h, al
296		call empty_8042_uncond
297
298		mov al,0DFh			; A20 on
299		out 060h, al
300		call empty_8042_uncond
301
302		; Apparently the UHCI spec assumes that A20 toggle
303		; ends with a null command (assumed to be for sychronization?)
304		; Put it here to see if it helps anything...
305		mov al,0FFh			; Null command
306		out 064h, al
307		call empty_8042_uncond
308
309		; Verify that A20 actually is enabled.  Do that by
310		; observing a word in low memory and the same word in
311		; the HMA until they are no longer coherent.  Note that
312		; we don't do the same check in the disable case, because
313		; we don't want to *require* A20 masking (SYSLINUX should
314		; work fine without it, if the BIOS does.)
315.kbc_wait:	push cx
316		xor cx,cx
317.kbc_wait_loop:
318		call a20_test
319		jnz a20_done_pop
320		loop .kbc_wait_loop
321
322		pop cx
323;
324; Running out of options here.  Final attempt: enable the "fast A20 gate"
325;
326a20_fast:
327		mov word [cs:A20Ptr], a20_fast
328		in al, 092h
329		or al,02h
330		and al,~01h			; Don't accidentally reset the machine!
331		out 092h, al
332
333.fast_wait:	push cx
334		xor cx,cx
335.fast_wait_loop:
336		call a20_test
337		jnz a20_done_pop
338		loop .fast_wait_loop
339
340		pop cx
341
342;
343; Oh bugger.  A20 is not responding.  Try frobbing it again; eventually give up
344; and report failure to the user.
345;
346		dec byte [cs:A20Tries]
347		jnz a20_dunno		; Did we get the wrong type?
348
349		mov si, err_a20
350		pm_call pm_writestr
351		jmp kaboom
352
353		section .data16
354err_a20		db CR, LF, 'A20 gate not responding!', CR, LF, 0
355		section .text16
356
357;
358; A20 unmasked, proceed...
359;
360a20_done_pop:	pop cx
361a20_done:	popad
362		ret
363
364;
365; This routine tests if A20 is enabled (ZF = 0).  This routine
366; must not destroy any register contents.
367;
368; The no-write early out avoids the io_delay in the (presumably common)
369; case of A20 already enabled (e.g. from a previous call.)
370;
371a20_test:
372		push es
373		push cx
374		push eax
375		mov cx,0FFFFh			; HMA = segment 0FFFFh
376		mov es,cx
377		mov eax,[cs:A20Test]
378		mov cx,32			; Loop count
379		jmp .test			; First iteration = early out
380.wait:		add eax,0x430aea41		; A large prime number
381		mov [cs:A20Test],eax
382		io_delay			; Serialize, and fix delay
383.test:		cmp eax,[es:A20Test+10h]
384		loopz .wait
385.done:		pop eax
386		pop cx
387		pop es
388		ret
389
390;
391; Routine to empty the 8042 KBC controller.  If dl != 0
392; then we will test A20 in the loop and exit if A20 is
393; suddenly enabled.
394;
395empty_8042_uncond:
396		xor dl,dl
397empty_8042:
398		call a20_test
399		jz .a20_on
400		and dl,dl
401		jnz .done
402.a20_on:	io_delay
403		in al, 064h		; Status port
404		test al,1
405		jz .no_output
406		io_delay
407		in al, 060h		; Read input
408		jmp short empty_8042
409.no_output:
410		test al,2
411		jnz empty_8042
412		io_delay
413.done:		ret
414
415;
416; This initializes the protected-mode interrupt thunk set
417;
418		section .text16
419pm_init:
420		xor edi,edi
421		mov bx,IDT
422		mov di,IRQStubs
423
424		mov eax,7aeb006ah	; push byte .. jmp short ..
425
426		mov cx,8		; 8 groups of 32 IRQs
427.gloop:
428		push cx
429		mov cx,32		; 32 entries per group
430.eloop:
431		mov [bx],di		; IDT offset [15:0]
432		mov word [bx+2],PM_CS32	; IDT segment
433		mov dword [bx+4],08e00h	; IDT offset [31:16], 32-bit interrupt
434					; gate, CPL 0 (we don't have a TSS
435					; set up...)
436		add bx,8
437
438		stosd
439		; Increment IRQ, decrement jmp short offset
440		add eax,(-4 << 24)+(1 << 8)
441
442		loop .eloop
443
444		; At the end of each group, replace the EBxx with
445		; the final E9xxxxxxxx
446		add di,3
447		mov byte [di-5],0E9h	; JMP NEAR
448		mov edx,pm_irq
449		sub edx,edi
450		mov [di-4],edx
451
452		add eax,(0x80 << 24)	; Proper offset for the next one
453		pop cx
454		loop .gloop
455
456		ret
457
458		; pm_init is called before bss clearing, so put these
459		; in .earlybss!
460		section .earlybss
461		alignb 8
462IDT:		resq 256
463		global RealModeSSSP
464RealModeSSSP	resd 1			; Real-mode SS:SP
465
466		section .gentextnr	; Autogenerated 32-bit code
467IRQStubs:	resb 4*256+3*8
468
469		section .text16
470
471%include "callback.inc"			; Real-mode callbacks
472