1;; ----------------------------------------------------------------------- 2;; 3;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved 4;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin 5;; 6;; This program is free software; you can redistribute it and/or modify 7;; it under the terms of the GNU General Public License as published by 8;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, 9;; Boston MA 02111-1307, USA; either version 2 of the License, or 10;; (at your option) any later version; incorporated herein by reference. 11;; 12;; ----------------------------------------------------------------------- 13 14;; 15;; bcopy32xx.inc 16;; 17 18 19; 20; 32-bit bcopy routine 21; 22; This is the actual 32-bit portion of the bcopy and shuffle and boot 23; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the 24; sole exception being the actual relocation code at the beginning of 25; pm_shuffle_boot. 26; 27; It also really needs to live all in a single segment, for the 28; address calculcations to actually work. 29; 30 31 bits 32 32 section .bcopyxx.text 33 align 16 34; 35; pm_bcopy: 36; 37; This is the protected-mode core of the "bcopy" routine. 38; Try to do aligned transfers; if the src and dst are relatively 39; misaligned, align the dst. 40; 41; ECX is guaranteed to not be zero on entry. 42; 43; Clobbers ESI, EDI, ECX. 44; 45 46pm_bcopy: 47 push ebx 48 push edx 49 push eax 50 51 cmp esi,-1 52 je .bzero 53 54 cmp esi,edi ; If source < destination, we might 55 jb .reverse ; have to copy backwards 56 57.forward: 58 ; Initial alignment 59 mov edx,edi 60 shr edx,1 61 jnc .faa1 62 movsb 63 dec ecx 64.faa1: 65 mov al,cl 66 cmp ecx,2 67 jb .f_tiny 68 69 shr edx,1 70 jnc .faa2 71 movsw 72 sub ecx,2 73.faa2: 74 75 ; Bulk transfer 76 mov al,cl ; Save low bits 77 shr ecx,2 ; Convert to dwords 78 rep movsd ; Do our business 79 ; At this point ecx == 0 80 81 test al,2 82 jz .fab2 83 movsw 84.fab2: 85.f_tiny: 86 test al,1 87 jz .fab1 88 movsb 89.fab1: 90.done: 91 pop eax 92 pop edx 93 pop ebx 94 ret 95 96.reverse: 97 lea eax,[esi+ecx-1] ; Point to final byte 98 cmp edi,eax 99 ja .forward ; No overlap, do forward copy 100 101 std ; Reverse copy 102 lea edi,[edi+ecx-1] 103 mov esi,eax 104 105 ; Initial alignment 106 mov edx,edi 107 shr edx,1 108 jc .raa1 109 movsb 110 dec ecx 111.raa1: 112 113 dec esi 114 dec edi 115 mov al,cl 116 cmp ecx,2 117 jb .r_tiny 118 shr edx,1 119 jc .raa2 120 movsw 121 sub ecx,2 122.raa2: 123 124 ; Bulk copy 125 sub esi,2 126 sub edi,2 127 mov al,cl ; Save low bits 128 shr ecx,2 129 rep movsd 130 131 ; Final alignment 132.r_final: 133 add esi,2 134 add edi,2 135 test al,2 136 jz .rab2 137 movsw 138.rab2: 139.r_tiny: 140 inc esi 141 inc edi 142 test al,1 143 jz .rab1 144 movsb 145.rab1: 146 cld 147 jmp short .done 148 149.bzero: 150 xor eax,eax 151 152 ; Initial alignment 153 mov edx,edi 154 shr edx,1 155 jnc .zaa1 156 stosb 157 dec ecx 158.zaa1: 159 160 mov bl,cl 161 cmp ecx,2 162 jb .z_tiny 163 shr edx,1 164 jnc .zaa2 165 stosw 166 sub ecx,2 167.zaa2: 168 169 ; Bulk 170 mov bl,cl ; Save low bits 171 shr ecx,2 172 rep stosd 173 174 test bl,2 175 jz .zab2 176 stosw 177.zab2: 178.z_tiny: 179 test bl,1 180 jz .zab1 181 stosb 182.zab1: 183 jmp short .done 184 185; 186; shuffle_and_boot: 187; 188; This routine is used to shuffle memory around, followed by 189; invoking an entry point somewhere in low memory. This routine 190; can clobber any memory outside the bcopy special area. 191; 192; IMPORTANT: This routine does not set up any registers. 193; It is the responsibility of the caller to generate an appropriate entry 194; stub; *especially* when going to real mode. 195; 196; Inputs: 197; ESI -> Pointer to list of (dst, src, len) pairs(*) 198; EDI -> Pointer to safe area for list + shuffler 199; (must not overlap this code nor the RM stack) 200; ECX -> Byte count of list area (for initial copy) 201; 202; If src == -1: then the memory pointed to by (dst, len) is bzeroed; 203; this is handled inside the bcopy routine. 204; 205; If len == 0: this marks the end of the list; dst indicates 206; the entry point and src the mode (0 = pm, 1 = rm) 207; 208; (*) dst, src, and len are four bytes each 209; 210; do_raw_shuffle_and_boot is the same entry point, but with a C ABI: 211; do_raw_shuffle_and_boot(safearea, descriptors, bytecount) 212; 213 global do_raw_shuffle_and_boot 214do_raw_shuffle_and_boot: 215 mov edi,eax 216 mov esi,edx 217 218pm_shuffle: 219 cli ; End interrupt service (for good) 220 mov ebx,edi ; EBX <- descriptor list 221 lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to 222 and edx,~15 ; Align 16 to benefit the GDT 223 call pm_bcopy 224 mov esi,__bcopyxx_start ; Absolute source address 225 mov edi,edx ; Absolute target address 226 sub edx,esi ; EDX <- address delta 227 mov ecx,__bcopyxx_dwords 228 lea eax,[edx+.safe] ; Resume point 229 ; Relocate this code 230 rep movsd 231 jmp eax ; Jump to safe location 232.safe: 233 ; Give ourselves a safe stack 234 lea esp,[edx+bcopyxx_stack+__bcopyxx_end] 235 add edx,bcopy_gdt ; EDX <- new GDT 236 mov [edx+2],edx ; GDT self-pointer 237 lgdt [edx] ; Switch to local GDT 238 239 ; Now for the actual shuffling... 240.loop: 241 mov edi,[ebx] 242 mov esi,[ebx+4] 243 mov ecx,[ebx+8] 244 add ebx,12 245 jecxz .done 246 call pm_bcopy 247 jmp .loop 248.done: 249 lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT 250 push ecx ; == 0, for cleaning the flags register 251 and esi,esi 252 jz pm_shuffle_16 253 popfd ; Clean the flags 254 jmp edi ; Protected mode entry 255 256 ; We have a 16-bit entry point, so we need to return 257 ; to 16-bit mode. Note: EDX already points to the GDT. 258pm_shuffle_16: 259 mov eax,edi 260 mov [edx+PM_CS16+2],ax 261 mov [edx+PM_DS16+2],ax 262 shr eax,16 263 mov [edx+PM_CS16+4],al 264 mov [edx+PM_CS16+7],ah 265 mov [edx+PM_DS16+4],al 266 mov [edx+PM_DS16+7],ah 267 mov eax,cr0 268 and al,~1 269 popfd ; Clean the flags 270 ; No flag-changing instructions below... 271 mov dx,PM_DS16 272 mov ds,edx 273 mov es,edx 274 mov fs,edx 275 mov gs,edx 276 mov ss,edx 277 jmp PM_CS16:0 278 279 section .bcopyxx.data 280 281 alignz 16 282; GDT descriptor entry 283%macro desc 1 284bcopy_gdt.%1: 285PM_%1 equ bcopy_gdt.%1-bcopy_gdt 286%endmacro 287 288bcopy_gdt: 289 dw bcopy_gdt_size-1 ; Null descriptor - contains GDT 290 dd bcopy_gdt ; pointer for LGDT instruction 291 dw 0 292 293 ; TSS segment to keep Intel VT happy. Intel VT is 294 ; unhappy about anything that doesn't smell like a 295 ; full-blown 32-bit OS. 296 desc TSS 297 dw 104-1, DummyTSS ; 08h 32-bit task state segment 298 dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS 299 300 desc CS16 301 dd 0000ffffh ; 10h Code segment, use16, readable, 302 dd 00009b00h ; present, dpl 0, cover 64K 303 desc DS16 304 dd 0000ffffh ; 18h Data segment, use16, read/write, 305 dd 00009300h ; present, dpl 0, cover 64K 306 desc CS32 307 dd 0000ffffh ; 20h Code segment, use32, readable, 308 dd 00cf9b00h ; present, dpl 0, cover all 4G 309 desc DS32 310 dd 0000ffffh ; 28h Data segment, use32, read/write, 311 dd 00cf9300h ; present, dpl 0, cover all 4G 312 313bcopy_gdt_size: equ $-bcopy_gdt 314; 315; Space for a dummy task state segment. It should never be actually 316; accessed, but just in case it is, point to a chunk of memory that 317; has a chance to not be used for anything real... 318; 319DummyTSS equ 0x580 320 321 align 4 322RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) 323 dd 0 ; Offset 324 325bcopyxx_stack equ 128 ; We want this much stack 326 327 section .rodata 328 global __syslinux_shuffler_size 329 extern __bcopyxx_len 330 align 4 331__syslinux_shuffler_size: 332 dd __bcopyxx_len 333 334 bits 16 335 section .text16 336