1;
2; x86 format converters for HERMES
3; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
4; This source code is licensed under the GNU LGPL
5; 
6; Please refer to the file COPYING.LIB contained in the distribution for
7; licensing conditions		
8; 
9; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
10; Used with permission.
11; 
12
13BITS 32
14
15%include "common.inc"
16
17SDL_FUNC _ConvertX86p16_16BGR565
18SDL_FUNC _ConvertX86p16_16RGB555
19SDL_FUNC _ConvertX86p16_16BGR555
20SDL_FUNC _ConvertX86p16_8RGB332
21
22EXTERN _ConvertX86
23
24SECTION .text
25
26_ConvertX86p16_16BGR565:
27
28    ; check short
29    cmp ecx,BYTE 16
30    ja .L3
31
32
33.L1: ; short loop
34    mov al,[esi]
35    mov ah,[esi+1]
36    mov ebx,eax
37    mov edx,eax
38    shr eax,11
39    and eax,BYTE 11111b
40    and ebx,11111100000b
41    shl edx,11
42    add eax,ebx
43    add eax,edx
44    mov [edi],al
45    mov [edi+1],ah
46    add esi,BYTE 2
47    add edi,BYTE 2
48    dec ecx
49    jnz .L1
50.L2:
51    retn
52
53.L3: ; head
54    mov eax,edi
55    and eax,BYTE 11b
56    jz .L4
57    mov al,[esi]
58    mov ah,[esi+1]
59    mov ebx,eax
60    mov edx,eax
61    shr eax,11
62    and eax,BYTE 11111b
63    and ebx,11111100000b
64    shl edx,11
65    add eax,ebx
66    add eax,edx
67    mov [edi],al
68    mov [edi+1],ah
69    add esi,BYTE 2
70    add edi,BYTE 2
71    dec ecx
72
73.L4: ; save count
74    push ecx
75
76    ; unroll twice
77    shr ecx,1
78    
79    ; point arrays to end
80    lea esi,[esi+ecx*4]
81    lea edi,[edi+ecx*4]
82
83    ; negative counter 
84    neg ecx
85    jmp SHORT .L6
86                              
87.L5:    mov [edi+ecx*4-4],eax
88.L6:    mov eax,[esi+ecx*4]
89
90        mov ebx,[esi+ecx*4]
91        and eax,07E007E0h         
92
93        mov edx,[esi+ecx*4]
94        and ebx,0F800F800h
95
96        shr ebx,11
97        and edx,001F001Fh
98
99        shl edx,11
100        add eax,ebx
101
102        add eax,edx                 
103        inc ecx
104
105        jnz .L5                 
106         
107    mov [edi+ecx*4-4],eax
108
109    ; tail
110    pop ecx
111    and ecx,BYTE 1
112    jz .L7
113    mov al,[esi]
114    mov ah,[esi+1]
115    mov ebx,eax
116    mov edx,eax
117    shr eax,11
118    and eax,BYTE 11111b
119    and ebx,11111100000b
120    shl edx,11
121    add eax,ebx
122    add eax,edx
123    mov [edi],al
124    mov [edi+1],ah
125    add esi,BYTE 2
126    add edi,BYTE 2
127
128.L7:
129    retn
130
131
132
133
134
135
136_ConvertX86p16_16RGB555:
137
138    ; check short
139    cmp ecx,BYTE 32
140    ja .L3
141
142
143.L1: ; short loop
144    mov al,[esi]
145    mov ah,[esi+1]
146    mov ebx,eax
147    shr ebx,1
148    and ebx,     0111111111100000b
149    and eax,BYTE 0000000000011111b
150    add eax,ebx
151    mov [edi],al
152    mov [edi+1],ah
153    add esi,BYTE 2
154    add edi,BYTE 2
155    dec ecx
156    jnz .L1
157.L2:
158    retn
159
160.L3: ; head
161    mov eax,edi
162    and eax,BYTE 11b
163    jz .L4
164    mov al,[esi]
165    mov ah,[esi+1]
166    mov ebx,eax
167    shr ebx,1
168    and ebx,     0111111111100000b
169    and eax,BYTE 0000000000011111b
170    add eax,ebx
171    mov [edi],al
172    mov [edi+1],ah
173    add esi,BYTE 2
174    add edi,BYTE 2
175    dec ecx
176
177.L4: ; save ebp
178    push ebp
179
180    ; save count
181    push ecx
182
183    ; unroll four times
184    shr ecx,2
185    
186    ; point arrays to end
187    lea esi,[esi+ecx*8]
188    lea edi,[edi+ecx*8]
189
190    ; negative counter 
191    xor ebp,ebp
192    sub ebp,ecx
193
194.L5:    mov eax,[esi+ebp*8]        ; agi?
195        mov ecx,[esi+ebp*8+4]
196       
197        mov ebx,eax
198        mov edx,ecx
199
200        and eax,0FFC0FFC0h
201        and ecx,0FFC0FFC0h
202
203        shr eax,1
204        and ebx,001F001Fh
205
206        shr ecx,1
207        and edx,001F001Fh
208
209        add eax,ebx
210        add ecx,edx
211
212        mov [edi+ebp*8],eax
213        mov [edi+ebp*8+4],ecx
214
215        inc ebp
216        jnz .L5                 
217
218    ; tail
219    pop ecx
220.L6: and ecx,BYTE 11b
221    jz .L7
222    mov al,[esi]
223    mov ah,[esi+1]
224    mov ebx,eax
225    shr ebx,1
226    and ebx,     0111111111100000b
227    and eax,BYTE 0000000000011111b
228    add eax,ebx
229    mov [edi],al
230    mov [edi+1],ah
231    add esi,BYTE 2
232    add edi,BYTE 2
233    dec ecx
234    jmp SHORT .L6
235
236.L7: pop ebp
237    retn
238
239
240
241
242
243
244_ConvertX86p16_16BGR555:
245
246    ; check short
247    cmp ecx,BYTE 16
248    ja .L3
249
250	
251.L1: ; short loop
252    mov al,[esi]
253    mov ah,[esi+1]
254    mov ebx,eax
255    mov edx,eax
256    shr eax,11
257    and eax,BYTE 11111b
258    shr ebx,1
259    and ebx,1111100000b
260    shl edx,10
261    and edx,0111110000000000b
262    add eax,ebx
263    add eax,edx
264    mov [edi],al
265    mov [edi+1],ah
266    add esi,BYTE 2
267    add edi,BYTE 2
268    dec ecx
269    jnz .L1
270.L2:
271    retn
272
273.L3: ; head
274    mov eax,edi
275    and eax,BYTE 11b
276    jz .L4
277    mov al,[esi]
278    mov ah,[esi+1]
279    mov ebx,eax
280    mov edx,eax
281    shr eax,11
282    and eax,BYTE 11111b
283    shr ebx,1
284    and ebx,1111100000b
285    shl edx,10
286    and edx,0111110000000000b
287    add eax,ebx
288    add eax,edx
289    mov [edi],al
290    mov [edi+1],ah
291    add esi,BYTE 2
292    add edi,BYTE 2
293    dec ecx
294
295.L4: ; save count
296    push ecx
297
298    ; unroll twice
299    shr ecx,1
300    
301    ; point arrays to end
302    lea esi,[esi+ecx*4]
303    lea edi,[edi+ecx*4]
304
305    ; negative counter 
306    neg ecx
307    jmp SHORT .L6
308                              
309.L5:     mov [edi+ecx*4-4],eax
310.L6:     mov eax,[esi+ecx*4]
311
312        shr eax,1
313        mov ebx,[esi+ecx*4]
314        
315        and eax,03E003E0h         
316        mov edx,[esi+ecx*4]
317
318        and ebx,0F800F800h
319
320        shr ebx,11
321        and edx,001F001Fh
322
323        shl edx,10
324        add eax,ebx
325
326        add eax,edx                 
327        inc ecx
328
329        jnz .L5                 
330         
331    mov [edi+ecx*4-4],eax
332
333    ; tail
334    pop ecx
335    and ecx,BYTE 1
336    jz .L7
337    mov al,[esi]
338    mov ah,[esi+1]
339    mov ebx,eax
340    mov edx,eax
341    shr eax,11
342    and eax,BYTE 11111b
343    shr ebx,1
344    and ebx,1111100000b
345    shl edx,10
346    and edx,0111110000000000b
347    add eax,ebx
348    add eax,edx
349    mov [edi],al
350    mov [edi+1],ah
351    add esi,BYTE 2
352    add edi,BYTE 2
353
354.L7:
355    retn
356
357
358
359
360
361
362_ConvertX86p16_8RGB332:
363
364    ; check short
365    cmp ecx,BYTE 16
366    ja .L3
367
368
369.L1: ; short loop
370    mov al,[esi+0]
371    mov ah,[esi+1]
372    mov ebx,eax
373    mov edx,eax
374    and eax,BYTE 11000b         ; blue
375    shr eax,3
376    and ebx,11100000000b        ; green
377    shr ebx,6
378    and edx,1110000000000000b   ; red
379    shr edx,8
380    add eax,ebx
381    add eax,edx
382    mov [edi],al
383    add esi,BYTE 2
384    inc edi
385    dec ecx
386    jnz .L1
387.L2:
388    retn
389
390.L3: mov eax,edi
391    and eax,BYTE 11b
392    jz .L4
393    mov al,[esi+0]
394    mov ah,[esi+1]
395    mov ebx,eax
396    mov edx,eax
397    and eax,BYTE 11000b         ; blue
398    shr eax,3
399    and ebx,11100000000b        ; green
400    shr ebx,6
401    and edx,1110000000000000b   ; red
402    shr edx,8
403    add eax,ebx
404    add eax,edx
405    mov [edi],al
406    add esi,BYTE 2
407    inc edi
408    dec ecx
409    jmp SHORT .L3
410
411.L4: ; save ebp
412    push ebp
413
414    ; save count
415    push ecx
416
417    ; unroll 4 times
418    shr ecx,2
419
420    ; prestep
421    mov dl,[esi+0]
422    mov bl,[esi+1]
423    mov dh,[esi+2]
424        
425.L5:     shl edx,16
426        mov bh,[esi+3]
427        
428        shl ebx,16
429        mov dl,[esi+4]
430
431        mov dh,[esi+6]
432        mov bl,[esi+5]
433
434        and edx,00011000000110000001100000011000b
435        mov bh,[esi+7]
436
437        ror edx,16+3
438        mov eax,ebx                                     ; setup eax for reds
439
440        and ebx,00000111000001110000011100000111b
441        and eax,11100000111000001110000011100000b       ; reds
442
443        ror ebx,16-2
444        add esi,BYTE 8
445
446        ror eax,16
447        add edi,BYTE 4
448
449        add eax,ebx
450        mov bl,[esi+1]                                  ; greens
451
452        add eax,edx
453        mov dl,[esi+0]                                  ; blues
454
455        mov [edi-4],eax
456        mov dh,[esi+2]
457
458        dec ecx
459        jnz .L5                 
460    
461    ; check tail
462    pop ecx
463    and ecx,BYTE 11b
464    jz .L7
465
466.L6: ; tail
467    mov al,[esi+0]
468    mov ah,[esi+1]
469    mov ebx,eax
470    mov edx,eax
471    and eax,BYTE 11000b         ; blue
472    shr eax,3
473    and ebx,11100000000b        ; green
474    shr ebx,6
475    and edx,1110000000000000b   ; red
476    shr edx,8
477    add eax,ebx
478    add eax,edx
479    mov [edi],al
480    add esi,BYTE 2
481    inc edi
482    dec ecx
483    jnz .L6
484
485.L7: pop ebp
486    retn
487
488%ifidn __OUTPUT_FORMAT__,elf32
489section .note.GNU-stack noalloc noexec nowrite progbits
490%endif
491