1/*
2 * Copyright 2013 Tilera Corporation. All Rights Reserved.
3 *
4 *   This program is free software; you can redistribute it and/or
5 *   modify it under the terms of the GNU General Public License
6 *   as published by the Free Software Foundation, version 2.
7 *
8 *   This program is distributed in the hope that it will be useful, but
9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 *   NON INFRINGEMENT.  See the GNU General Public License for
12 *   more details.
13 *
14 * A code-rewriter that handles unaligned exception.
15 */
16
17#include <linux/smp.h>
18#include <linux/ptrace.h>
19#include <linux/slab.h>
20#include <linux/thread_info.h>
21#include <linux/uaccess.h>
22#include <linux/mman.h>
23#include <linux/types.h>
24#include <linux/err.h>
25#include <linux/module.h>
26#include <linux/compat.h>
27#include <linux/prctl.h>
28#include <asm/cacheflush.h>
29#include <asm/traps.h>
30#include <asm/uaccess.h>
31#include <asm/unaligned.h>
32#include <arch/abi.h>
33#include <arch/spr_def.h>
34#include <arch/opcode.h>
35
36
37/*
38 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
39 * exception is supported out of single_step.c
40 */
41
42int unaligned_printk;
43
44static int __init setup_unaligned_printk(char *str)
45{
46	long val;
47	if (kstrtol(str, 0, &val) != 0)
48		return 0;
49	unaligned_printk = val;
50	pr_info("Printk for each unaligned data accesses is %s\n",
51		unaligned_printk ? "enabled" : "disabled");
52	return 1;
53}
54__setup("unaligned_printk=", setup_unaligned_printk);
55
56unsigned int unaligned_fixup_count;
57
58#ifdef __tilegx__
59
60/*
61 * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
62 * The 1st 64-bit word saves fault PC address, 2nd word is the fault
63 * instruction bundle followed by 14 JIT bundles.
64 */
65
66struct unaligned_jit_fragment {
67	unsigned long       pc;
68	tilegx_bundle_bits  bundle;
69	tilegx_bundle_bits  insn[14];
70};
71
72/*
73 * Check if a nop or fnop at bundle's pipeline X0.
74 */
75
76static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
77{
78	return (((get_UnaryOpcodeExtension_X0(bundle) ==
79		  NOP_UNARY_OPCODE_X0) &&
80		 (get_RRROpcodeExtension_X0(bundle) ==
81		  UNARY_RRR_0_OPCODE_X0) &&
82		 (get_Opcode_X0(bundle) ==
83		  RRR_0_OPCODE_X0)) ||
84		((get_UnaryOpcodeExtension_X0(bundle) ==
85		  FNOP_UNARY_OPCODE_X0) &&
86		 (get_RRROpcodeExtension_X0(bundle) ==
87		  UNARY_RRR_0_OPCODE_X0) &&
88		 (get_Opcode_X0(bundle) ==
89		  RRR_0_OPCODE_X0)));
90}
91
92/*
93 * Check if nop or fnop at bundle's pipeline X1.
94 */
95
96static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
97{
98	return (((get_UnaryOpcodeExtension_X1(bundle) ==
99		  NOP_UNARY_OPCODE_X1) &&
100		 (get_RRROpcodeExtension_X1(bundle) ==
101		  UNARY_RRR_0_OPCODE_X1) &&
102		 (get_Opcode_X1(bundle) ==
103		  RRR_0_OPCODE_X1)) ||
104		((get_UnaryOpcodeExtension_X1(bundle) ==
105		  FNOP_UNARY_OPCODE_X1) &&
106		 (get_RRROpcodeExtension_X1(bundle) ==
107		  UNARY_RRR_0_OPCODE_X1) &&
108		 (get_Opcode_X1(bundle) ==
109		  RRR_0_OPCODE_X1)));
110}
111
112/*
113 * Check if nop or fnop at bundle's Y0 pipeline.
114 */
115
116static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
117{
118	return (((get_UnaryOpcodeExtension_Y0(bundle) ==
119		  NOP_UNARY_OPCODE_Y0) &&
120		 (get_RRROpcodeExtension_Y0(bundle) ==
121		  UNARY_RRR_1_OPCODE_Y0) &&
122		 (get_Opcode_Y0(bundle) ==
123		  RRR_1_OPCODE_Y0)) ||
124		((get_UnaryOpcodeExtension_Y0(bundle) ==
125		  FNOP_UNARY_OPCODE_Y0) &&
126		 (get_RRROpcodeExtension_Y0(bundle) ==
127		  UNARY_RRR_1_OPCODE_Y0) &&
128		 (get_Opcode_Y0(bundle) ==
129		  RRR_1_OPCODE_Y0)));
130}
131
132/*
133 * Check if nop or fnop at bundle's pipeline Y1.
134 */
135
136static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
137{
138	return (((get_UnaryOpcodeExtension_Y1(bundle) ==
139		  NOP_UNARY_OPCODE_Y1) &&
140		 (get_RRROpcodeExtension_Y1(bundle) ==
141		  UNARY_RRR_1_OPCODE_Y1) &&
142		 (get_Opcode_Y1(bundle) ==
143		  RRR_1_OPCODE_Y1)) ||
144		((get_UnaryOpcodeExtension_Y1(bundle) ==
145		  FNOP_UNARY_OPCODE_Y1) &&
146		 (get_RRROpcodeExtension_Y1(bundle) ==
147		  UNARY_RRR_1_OPCODE_Y1) &&
148		 (get_Opcode_Y1(bundle) ==
149		  RRR_1_OPCODE_Y1)));
150}
151
152/*
153 * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
154 */
155
156static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
157{
158	return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
159}
160
161/*
162 * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
163 */
164
165static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
166{
167	return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
168}
169
170/*
171 * Find the destination, source registers of fault unalign access instruction
172 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
173 * clob3, which are guaranteed different from any register used in the fault
174 * bundle. r_alias is used to return if the other instructions other than the
175 * unalign load/store shares same register with ra, rb and rd.
176 */
177
178static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
179		      uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
180		      uint64_t *clob3, bool *r_alias)
181{
182	int i;
183	uint64_t reg;
184	uint64_t reg_map = 0, alias_reg_map = 0, map;
185	bool alias = false;
186
187	/*
188	 * Parse fault bundle, find potential used registers and mark
189	 * corresponding bits in reg_map and alias_map. These 2 bit maps
190	 * are used to find the scratch registers and determine if there
191	 * is register alais.
192	 */
193	if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
194
195		reg = get_SrcA_Y2(bundle);
196		reg_map |= 1ULL << reg;
197		*ra = reg;
198		reg = get_SrcBDest_Y2(bundle);
199		reg_map |= 1ULL << reg;
200
201		if (rd) {
202			/* Load. */
203			*rd = reg;
204			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
205		} else {
206			/* Store. */
207			*rb = reg;
208			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
209		}
210
211		if (!is_bundle_y1_nop(bundle)) {
212			reg = get_SrcA_Y1(bundle);
213			reg_map |= (1ULL << reg);
214			map = (1ULL << reg);
215
216			reg = get_SrcB_Y1(bundle);
217			reg_map |= (1ULL << reg);
218			map |= (1ULL << reg);
219
220			reg = get_Dest_Y1(bundle);
221			reg_map |= (1ULL << reg);
222			map |= (1ULL << reg);
223
224			if (map & alias_reg_map)
225				alias = true;
226		}
227
228		if (!is_bundle_y0_nop(bundle)) {
229			reg = get_SrcA_Y0(bundle);
230			reg_map |= (1ULL << reg);
231			map = (1ULL << reg);
232
233			reg = get_SrcB_Y0(bundle);
234			reg_map |= (1ULL << reg);
235			map |= (1ULL << reg);
236
237			reg = get_Dest_Y0(bundle);
238			reg_map |= (1ULL << reg);
239			map |= (1ULL << reg);
240
241			if (map & alias_reg_map)
242				alias = true;
243		}
244	} else	{ /* X Mode Bundle. */
245
246		reg = get_SrcA_X1(bundle);
247		reg_map |= (1ULL << reg);
248		*ra = reg;
249		if (rd)	{
250			/* Load. */
251			reg = get_Dest_X1(bundle);
252			reg_map |= (1ULL << reg);
253			*rd = reg;
254			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
255		} else {
256			/* Store. */
257			reg = get_SrcB_X1(bundle);
258			reg_map |= (1ULL << reg);
259			*rb = reg;
260			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
261		}
262
263		if (!is_bundle_x0_nop(bundle)) {
264			reg = get_SrcA_X0(bundle);
265			reg_map |= (1ULL << reg);
266			map = (1ULL << reg);
267
268			reg = get_SrcB_X0(bundle);
269			reg_map |= (1ULL << reg);
270			map |= (1ULL << reg);
271
272			reg = get_Dest_X0(bundle);
273			reg_map |= (1ULL << reg);
274			map |= (1ULL << reg);
275
276			if (map & alias_reg_map)
277				alias = true;
278		}
279	}
280
281	/*
282	 * "alias" indicates if the unalign access registers have collision
283	 * with others in the same bundle. We jsut simply test all register
284	 * operands case (RRR), ignored the case with immidate. If a bundle
285	 * has no register alias, we may do fixup in a simple or fast manner.
286	 * So if an immidata field happens to hit with a register, we may end
287	 * up fall back to the generic handling.
288	 */
289
290	*r_alias = alias;
291
292	/* Flip bits on reg_map. */
293	reg_map ^= -1ULL;
294
295	/* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
296	for (i = 0; i < TREG_SP; i++) {
297		if (reg_map & (0x1ULL << i)) {
298			if (*clob1 == -1) {
299				*clob1 = i;
300			} else if (*clob2 == -1) {
301				*clob2 = i;
302			} else if (*clob3 == -1) {
303				*clob3 = i;
304				return;
305			}
306		}
307	}
308}
309
310/*
311 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
312 * is unexpected.
313 */
314
315static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
316		       uint64_t clob1, uint64_t clob2,  uint64_t clob3)
317{
318	bool unexpected = false;
319	if ((ra >= 56) && (ra != TREG_ZERO))
320		unexpected = true;
321
322	if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
323		unexpected = true;
324
325	if (rd != -1) {
326		if ((rd >= 56) && (rd != TREG_ZERO))
327			unexpected = true;
328	} else {
329		if ((rb >= 56) && (rb != TREG_ZERO))
330			unexpected = true;
331	}
332	return unexpected;
333}
334
335
336#define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
337#define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
338#define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
339#define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
340#define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
341
342#ifdef __LITTLE_ENDIAN
343#define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
344#else
345#define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
346#endif /* __LITTLE_ENDIAN */
347
348/*
349 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
350 * The corresponding static function jix_x#_###(.) generates partial or
351 * whole bundle based on the template and given arguments.
352 */
353
354#define __JIT_CODE(_X_)						\
355	asm (".pushsection .rodata.unalign_data, \"a\"\n"	\
356	     _X_"\n"						\
357	     ".popsection\n")
358
359__JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
360static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
361{
362	extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
363	return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
364		create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
365}
366
367__JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
368static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
369{
370	extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
371	return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
372		create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
373}
374
375__JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
376static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
377{
378	extern  tilegx_bundle_bits __unalign_jit_x0_addi;
379	return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
380		create_Dest_X0(rd) | create_SrcA_X0(ra) |
381		create_Imm8_X0(imm8);
382}
383
384__JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
385static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
386{
387	extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
388	return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
389		create_Dest_X1(rd) | create_SrcA_X1(ra);
390}
391
392__JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
393static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
394{
395	extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
396	return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
397		create_Dest_X0(rd) | create_SrcA_X0(ra) |
398		create_SrcB_X0(rb);
399}
400
401__JIT_CODE("__unalign_jit_x1_iret:   {iret}");
402static tilegx_bundle_bits  jit_x1_iret(void)
403{
404	extern  tilegx_bundle_bits __unalign_jit_x1_iret;
405	return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
406}
407
408__JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
409static tilegx_bundle_bits  jit_x0_fnop(void)
410{
411	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
412	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
413}
414
415static tilegx_bundle_bits  jit_x1_fnop(void)
416{
417	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
418	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
419}
420
421__JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
422static tilegx_bundle_bits  jit_y2_dummy(void)
423{
424	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
425	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
426}
427
428static tilegx_bundle_bits  jit_y1_fnop(void)
429{
430	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
431	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
432}
433
434__JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
435static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
436{
437	extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
438	return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
439		(~create_SrcA_X1(-1)) &
440		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
441		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
442}
443
444__JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
445static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
446{
447	extern  tilegx_bundle_bits __unalign_jit_x1_st;
448	return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
449		create_SrcA_X1(ra) | create_SrcB_X1(rb);
450}
451
452__JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
453static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
454{
455	extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
456	return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
457		(~create_SrcA_X1(-1)) &
458		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
459		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
460}
461
462__JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
463static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
464{
465	extern  tilegx_bundle_bits __unalign_jit_x1_ld;
466	return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
467		create_Dest_X1(rd) | create_SrcA_X1(ra);
468}
469
470__JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
471static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
472{
473	extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
474	return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
475		(~create_Dest_X1(-1)) &
476		GX_INSN_X1_MASK) | create_Dest_X1(rd) |
477		create_SrcA_X1(ra) | create_Imm8_X1(imm8);
478}
479
480__JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
481static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
482{
483	extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
484	return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
485		GX_INSN_X0_MASK) |
486		create_Dest_X0(rd) | create_SrcA_X0(ra) |
487		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
488}
489
490__JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
491static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
492{
493	extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
494	return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
495		GX_INSN_X0_MASK) |
496		create_Dest_X0(rd) | create_SrcA_X0(ra) |
497		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
498}
499
500__JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
501static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
502{
503	extern  tilegx_bundle_bits __unalign_jit_x1_addi;
504	return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
505		create_Dest_X1(rd) | create_SrcA_X1(ra) |
506		create_Imm8_X1(imm8);
507}
508
509__JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
510static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
511{
512	extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
513	return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
514		GX_INSN_X0_MASK) |
515		create_Dest_X0(rd) | create_SrcA_X0(ra) |
516		create_ShAmt_X0(imm6);
517}
518
519__JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
520static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
521{
522	extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
523	return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
524		GX_INSN_X0_MASK) |
525		create_Dest_X0(rd) | create_SrcA_X0(ra) |
526		create_ShAmt_X0(imm6);
527}
528
529__JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
530static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
531{
532	extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
533	return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
534		GX_INSN_X1_MASK) |
535		create_SrcA_X1(ra) | create_BrOff_X1(broff);
536}
537
538#undef __JIT_CODE
539
540/*
541 * This function generates unalign fixup JIT.
542 *
543 * We first find unalign load/store instruction's destination, source
544 * registers: ra, rb and rd. and 3 scratch registers by calling
545 * find_regs(...). 3 scratch clobbers should not alias with any register
546 * used in the fault bundle. Then analyze the fault bundle to determine
547 * if it's a load or store, operand width, branch or address increment etc.
548 * At last generated JIT is copied into JIT code area in user space.
549 */
550
551static
552void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
553		    int align_ctl)
554{
555	struct thread_info *info = current_thread_info();
556	struct unaligned_jit_fragment frag;
557	struct unaligned_jit_fragment *jit_code_area;
558	tilegx_bundle_bits bundle_2 = 0;
559	/* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
560	bool     bundle_2_enable = true;
561	uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
562	/*
563	 * Indicate if the unalign access
564	 * instruction's registers hit with
565	 * others in the same bundle.
566	 */
567	bool     alias = false;
568	bool     load_n_store = true;
569	bool     load_store_signed = false;
570	unsigned int  load_store_size = 8;
571	bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
572	int      y1_br_reg = 0;
573	/* True for link operation. i.e. jalr or lnk at Y1 */
574	bool     y1_lr = false;
575	int      y1_lr_reg = 0;
576	bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
577	int      x1_add_imm8 = 0;
578	bool     unexpected = false;
579	int      n = 0, k;
580
581	jit_code_area =
582		(struct unaligned_jit_fragment *)(info->unalign_jit_base);
583
584	memset((void *)&frag, 0, sizeof(frag));
585
586	/* 0: X mode, Otherwise: Y mode. */
587	if (bundle & TILEGX_BUNDLE_MODE_MASK) {
588		unsigned int mod, opcode;
589
590		if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
591		    get_RRROpcodeExtension_Y1(bundle) ==
592		    UNARY_RRR_1_OPCODE_Y1) {
593
594			opcode = get_UnaryOpcodeExtension_Y1(bundle);
595
596			/*
597			 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
598			 * pipeline.
599			 */
600			switch (opcode) {
601			case JALR_UNARY_OPCODE_Y1:
602			case JALRP_UNARY_OPCODE_Y1:
603				y1_lr = true;
604				y1_lr_reg = 55; /* Link register. */
605				/* FALLTHROUGH */
606			case JR_UNARY_OPCODE_Y1:
607			case JRP_UNARY_OPCODE_Y1:
608				y1_br = true;
609				y1_br_reg = get_SrcA_Y1(bundle);
610				break;
611			case LNK_UNARY_OPCODE_Y1:
612				/* "lnk" at Y1 pipeline. */
613				y1_lr = true;
614				y1_lr_reg = get_Dest_Y1(bundle);
615				break;
616			}
617		}
618
619		opcode = get_Opcode_Y2(bundle);
620		mod = get_Mode(bundle);
621
622		/*
623		 *  bundle_2 is bundle after making Y2 as a dummy operation
624		 *  - ld zero, sp
625		 */
626		bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
627
628		/* Make Y1 as fnop if Y1 is a branch or lnk operation. */
629		if (y1_br || y1_lr) {
630			bundle_2 &= ~(GX_INSN_Y1_MASK);
631			bundle_2 |= jit_y1_fnop();
632		}
633
634		if (is_y0_y1_nop(bundle_2))
635			bundle_2_enable = false;
636
637		if (mod == MODE_OPCODE_YC2) {
638			/* Store. */
639			load_n_store = false;
640			load_store_size = 1 << opcode;
641			load_store_signed = false;
642			find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
643				  &clob3, &alias);
644			if (load_store_size > 8)
645				unexpected = true;
646		} else {
647			/* Load. */
648			load_n_store = true;
649			if (mod == MODE_OPCODE_YB2) {
650				switch (opcode) {
651				case LD_OPCODE_Y2:
652					load_store_signed = false;
653					load_store_size = 8;
654					break;
655				case LD4S_OPCODE_Y2:
656					load_store_signed = true;
657					load_store_size = 4;
658					break;
659				case LD4U_OPCODE_Y2:
660					load_store_signed = false;
661					load_store_size = 4;
662					break;
663				default:
664					unexpected = true;
665				}
666			} else if (mod == MODE_OPCODE_YA2) {
667				if (opcode == LD2S_OPCODE_Y2) {
668					load_store_signed = true;
669					load_store_size = 2;
670				} else if (opcode == LD2U_OPCODE_Y2) {
671					load_store_signed = false;
672					load_store_size = 2;
673				} else
674					unexpected = true;
675			} else
676				unexpected = true;
677			find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
678				  &clob3, &alias);
679		}
680	} else {
681		unsigned int opcode;
682
683		/* bundle_2 is bundle after making X1 as "fnop". */
684		bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
685
686		if (is_x0_x1_nop(bundle_2))
687			bundle_2_enable = false;
688
689		if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
690			opcode = get_UnaryOpcodeExtension_X1(bundle);
691
692			if (get_RRROpcodeExtension_X1(bundle) ==
693			    UNARY_RRR_0_OPCODE_X1) {
694				load_n_store = true;
695				find_regs(bundle, &rd, &ra, &rb, &clob1,
696					  &clob2, &clob3, &alias);
697
698				switch (opcode) {
699				case LD_UNARY_OPCODE_X1:
700					load_store_signed = false;
701					load_store_size = 8;
702					break;
703				case LD4S_UNARY_OPCODE_X1:
704					load_store_signed = true;
705					/* FALLTHROUGH */
706				case LD4U_UNARY_OPCODE_X1:
707					load_store_size = 4;
708					break;
709
710				case LD2S_UNARY_OPCODE_X1:
711					load_store_signed = true;
712					/* FALLTHROUGH */
713				case LD2U_UNARY_OPCODE_X1:
714					load_store_size = 2;
715					break;
716				default:
717					unexpected = true;
718				}
719			} else {
720				load_n_store = false;
721				load_store_signed = false;
722				find_regs(bundle, 0, &ra, &rb,
723					  &clob1, &clob2, &clob3,
724					  &alias);
725
726				opcode = get_RRROpcodeExtension_X1(bundle);
727				switch (opcode)	{
728				case ST_RRR_0_OPCODE_X1:
729					load_store_size = 8;
730					break;
731				case ST4_RRR_0_OPCODE_X1:
732					load_store_size = 4;
733					break;
734				case ST2_RRR_0_OPCODE_X1:
735					load_store_size = 2;
736					break;
737				default:
738					unexpected = true;
739				}
740			}
741		} else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
742			load_n_store = true;
743			opcode = get_Imm8OpcodeExtension_X1(bundle);
744			switch (opcode)	{
745			case LD_ADD_IMM8_OPCODE_X1:
746				load_store_size = 8;
747				break;
748
749			case LD4S_ADD_IMM8_OPCODE_X1:
750				load_store_signed = true;
751				/* FALLTHROUGH */
752			case LD4U_ADD_IMM8_OPCODE_X1:
753				load_store_size = 4;
754				break;
755
756			case LD2S_ADD_IMM8_OPCODE_X1:
757				load_store_signed = true;
758				/* FALLTHROUGH */
759			case LD2U_ADD_IMM8_OPCODE_X1:
760				load_store_size = 2;
761				break;
762
763			case ST_ADD_IMM8_OPCODE_X1:
764				load_n_store = false;
765				load_store_size = 8;
766				break;
767			case ST4_ADD_IMM8_OPCODE_X1:
768				load_n_store = false;
769				load_store_size = 4;
770				break;
771			case ST2_ADD_IMM8_OPCODE_X1:
772				load_n_store = false;
773				load_store_size = 2;
774				break;
775			default:
776				unexpected = true;
777			}
778
779			if (!unexpected) {
780				x1_add = true;
781				if (load_n_store)
782					x1_add_imm8 = get_Imm8_X1(bundle);
783				else
784					x1_add_imm8 = get_Dest_Imm8_X1(bundle);
785			}
786
787			find_regs(bundle, load_n_store ? (&rd) : NULL,
788				  &ra, &rb, &clob1, &clob2, &clob3, &alias);
789		} else
790			unexpected = true;
791	}
792
793	/*
794	 * Some sanity check for register numbers extracted from fault bundle.
795	 */
796	if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
797		unexpected = true;
798
799	/* Give warning if register ra has an aligned address. */
800	if (!unexpected)
801		WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
802
803
804	/*
805	 * Fault came from kernel space, here we only need take care of
806	 * unaligned "get_user/put_user" macros defined in "uaccess.h".
807	 * Basically, we will handle bundle like this:
808	 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
809	 * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
810	 * For either load or store, byte-wise operation is performed by calling
811	 * get_user() or put_user(). If the macro returns non-zero value,
812	 * set the value to rx, otherwise set zero to rx. Finally make pc point
813	 * to next bundle and return.
814	 */
815
816	if (EX1_PL(regs->ex1) != USER_PL) {
817
818		unsigned long rx = 0;
819		unsigned long x = 0, ret = 0;
820
821		if (y1_br || y1_lr || x1_add ||
822		    (load_store_signed !=
823		     (load_n_store && load_store_size == 4))) {
824			/* No branch, link, wrong sign-ext or load/store add. */
825			unexpected = true;
826		} else if (!unexpected) {
827			if (bundle & TILEGX_BUNDLE_MODE_MASK) {
828				/*
829				 * Fault bundle is Y mode.
830				 * Check if the Y1 and Y0 is the form of
831				 * { movei rx, 0; nop/fnop }, if yes,
832				 * find the rx.
833				 */
834
835				if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
836				    && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
837				    (get_Imm8_Y1(bundle) == 0) &&
838				    is_bundle_y0_nop(bundle)) {
839					rx = get_Dest_Y1(bundle);
840				} else if ((get_Opcode_Y0(bundle) ==
841					    ADDI_OPCODE_Y0) &&
842					   (get_SrcA_Y0(bundle) == TREG_ZERO) &&
843					   (get_Imm8_Y0(bundle) == 0) &&
844					   is_bundle_y1_nop(bundle)) {
845					rx = get_Dest_Y0(bundle);
846				} else {
847					unexpected = true;
848				}
849			} else {
850				/*
851				 * Fault bundle is X mode.
852				 * Check if the X0 is 'movei rx, 0',
853				 * if yes, find the rx.
854				 */
855
856				if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
857				    && (get_Imm8OpcodeExtension_X0(bundle) ==
858					ADDI_IMM8_OPCODE_X0) &&
859				    (get_SrcA_X0(bundle) == TREG_ZERO) &&
860				    (get_Imm8_X0(bundle) == 0)) {
861					rx = get_Dest_X0(bundle);
862				} else {
863					unexpected = true;
864				}
865			}
866
867			/* rx should be less than 56. */
868			if (!unexpected && (rx >= 56))
869				unexpected = true;
870		}
871
872		if (!search_exception_tables(regs->pc)) {
873			/* No fixup in the exception tables for the pc. */
874			unexpected = true;
875		}
876
877		if (unexpected) {
878			/* Unexpected unalign kernel fault. */
879			struct task_struct *tsk = validate_current();
880
881			bust_spinlocks(1);
882
883			show_regs(regs);
884
885			if (unlikely(tsk->pid < 2)) {
886				panic("Kernel unalign fault running %s!",
887				      tsk->pid ? "init" : "the idle task");
888			}
889#ifdef SUPPORT_DIE
890			die("Oops", regs);
891#endif
892			bust_spinlocks(1);
893
894			do_group_exit(SIGKILL);
895
896		} else {
897			unsigned long i, b = 0;
898			unsigned char *ptr =
899				(unsigned char *)regs->regs[ra];
900			if (load_n_store) {
901				/* handle get_user(x, ptr) */
902				for (i = 0; i < load_store_size; i++) {
903					ret = get_user(b, ptr++);
904					if (!ret) {
905						/* Success! update x. */
906#ifdef __LITTLE_ENDIAN
907						x |= (b << (8 * i));
908#else
909						x <<= 8;
910						x |= b;
911#endif /* __LITTLE_ENDIAN */
912					} else {
913						x = 0;
914						break;
915					}
916				}
917
918				/* Sign-extend 4-byte loads. */
919				if (load_store_size == 4)
920					x = (long)(int)x;
921
922				/* Set register rd. */
923				regs->regs[rd] = x;
924
925				/* Set register rx. */
926				regs->regs[rx] = ret;
927
928				/* Bump pc. */
929				regs->pc += 8;
930
931			} else {
932				/* Handle put_user(x, ptr) */
933				x = regs->regs[rb];
934#ifdef __LITTLE_ENDIAN
935				b = x;
936#else
937				/*
938				 * Swap x in order to store x from low
939				 * to high memory same as the
940				 * little-endian case.
941				 */
942				switch (load_store_size) {
943				case 8:
944					b = swab64(x);
945					break;
946				case 4:
947					b = swab32(x);
948					break;
949				case 2:
950					b = swab16(x);
951					break;
952				}
953#endif /* __LITTLE_ENDIAN */
954				for (i = 0; i < load_store_size; i++) {
955					ret = put_user(b, ptr++);
956					if (ret)
957						break;
958					/* Success! shift 1 byte. */
959					b >>= 8;
960				}
961				/* Set register rx. */
962				regs->regs[rx] = ret;
963
964				/* Bump pc. */
965				regs->pc += 8;
966			}
967		}
968
969		unaligned_fixup_count++;
970
971		if (unaligned_printk) {
972			pr_info("%s/%d. Unalign fixup for kernel access "
973				"to userspace %lx.",
974				current->comm, current->pid, regs->regs[ra]);
975		}
976
977		/* Done! Return to the exception handler. */
978		return;
979	}
980
981	if ((align_ctl == 0) || unexpected) {
982		siginfo_t info = {
983			.si_signo = SIGBUS,
984			.si_code = BUS_ADRALN,
985			.si_addr = (unsigned char __user *)0
986		};
987		if (unaligned_printk)
988			pr_info("Unalign bundle: unexp @%llx, %llx",
989				(unsigned long long)regs->pc,
990				(unsigned long long)bundle);
991
992		if (ra < 56) {
993			unsigned long uaa = (unsigned long)regs->regs[ra];
994			/* Set bus Address. */
995			info.si_addr = (unsigned char __user *)uaa;
996		}
997
998		unaligned_fixup_count++;
999
1000		trace_unhandled_signal("unaligned fixup trap", regs,
1001				       (unsigned long)info.si_addr, SIGBUS);
1002		force_sig_info(info.si_signo, &info, current);
1003		return;
1004	}
1005
1006#ifdef __LITTLE_ENDIAN
1007#define UA_FIXUP_ADDR_DELTA          1
1008#define UA_FIXUP_BFEXT_START(_B_)    0
1009#define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1010#else /* __BIG_ENDIAN */
1011#define UA_FIXUP_ADDR_DELTA          -1
1012#define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1013#define UA_FIXUP_BFEXT_END(_B_)      63
1014#endif /* __LITTLE_ENDIAN */
1015
1016
1017
1018	if ((ra != rb) && (rd != TREG_SP) && !alias &&
1019	    !y1_br && !y1_lr && !x1_add) {
1020		/*
1021		 * Simple case: ra != rb and no register alias found,
1022		 * and no branch or link. This will be the majority.
1023		 * We can do a little better for simplae case than the
1024		 * generic scheme below.
1025		 */
1026		if (!load_n_store) {
1027			/*
1028			 * Simple store: ra != rb, no need for scratch register.
1029			 * Just store and rotate to right bytewise.
1030			 */
1031#ifdef __BIG_ENDIAN
1032			frag.insn[n++] =
1033				jit_x0_addi(ra, ra, load_store_size - 1) |
1034				jit_x1_fnop();
1035#endif /* __BIG_ENDIAN */
1036			for (k = 0; k < load_store_size; k++) {
1037				/* Store a byte. */
1038				frag.insn[n++] =
1039					jit_x0_rotli(rb, rb, 56) |
1040					jit_x1_st1_add(ra, rb,
1041						       UA_FIXUP_ADDR_DELTA);
1042			}
1043#ifdef __BIG_ENDIAN
1044			frag.insn[n] = jit_x1_addi(ra, ra, 1);
1045#else
1046			frag.insn[n] = jit_x1_addi(ra, ra,
1047						   -1 * load_store_size);
1048#endif /* __LITTLE_ENDIAN */
1049
1050			if (load_store_size == 8) {
1051				frag.insn[n] |= jit_x0_fnop();
1052			} else if (load_store_size == 4) {
1053				frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1054			} else { /* = 2 */
1055				frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1056			}
1057			n++;
1058			if (bundle_2_enable)
1059				frag.insn[n++] = bundle_2;
1060			frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1061		} else {
1062			if (rd == ra) {
1063				/* Use two clobber registers: clob1/2. */
1064				frag.insn[n++] =
1065					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1066					jit_x1_fnop();
1067				frag.insn[n++] =
1068					jit_x0_addi(clob1, ra, 7) |
1069					jit_x1_st_add(TREG_SP, clob1, -8);
1070				frag.insn[n++] =
1071					jit_x0_addi(clob2, ra, 0) |
1072					jit_x1_st(TREG_SP, clob2);
1073				frag.insn[n++] =
1074					jit_x0_fnop() |
1075					jit_x1_ldna(rd, ra);
1076				frag.insn[n++] =
1077					jit_x0_fnop() |
1078					jit_x1_ldna(clob1, clob1);
1079				/*
1080				 * Note: we must make sure that rd must not
1081				 * be sp. Recover clob1/2 from stack.
1082				 */
1083				frag.insn[n++] =
1084					jit_x0_dblalign(rd, clob1, clob2) |
1085					jit_x1_ld_add(clob2, TREG_SP, 8);
1086				frag.insn[n++] =
1087					jit_x0_fnop() |
1088					jit_x1_ld_add(clob1, TREG_SP, 16);
1089			} else {
1090				/* Use one clobber register: clob1 only. */
1091				frag.insn[n++] =
1092					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1093					jit_x1_fnop();
1094				frag.insn[n++] =
1095					jit_x0_addi(clob1, ra, 7) |
1096					jit_x1_st(TREG_SP, clob1);
1097				frag.insn[n++] =
1098					jit_x0_fnop() |
1099					jit_x1_ldna(rd, ra);
1100				frag.insn[n++] =
1101					jit_x0_fnop() |
1102					jit_x1_ldna(clob1, clob1);
1103				/*
1104				 * Note: we must make sure that rd must not
1105				 * be sp. Recover clob1 from stack.
1106				 */
1107				frag.insn[n++] =
1108					jit_x0_dblalign(rd, clob1, ra) |
1109					jit_x1_ld_add(clob1, TREG_SP, 16);
1110			}
1111
1112			if (bundle_2_enable)
1113				frag.insn[n++] = bundle_2;
1114			/*
1115			 * For non 8-byte load, extract corresponding bytes and
1116			 * signed extension.
1117			 */
1118			if (load_store_size == 4) {
1119				if (load_store_signed)
1120					frag.insn[n++] =
1121						jit_x0_bfexts(
1122							rd, rd,
1123							UA_FIXUP_BFEXT_START(4),
1124							UA_FIXUP_BFEXT_END(4)) |
1125						jit_x1_fnop();
1126				else
1127					frag.insn[n++] =
1128						jit_x0_bfextu(
1129							rd, rd,
1130							UA_FIXUP_BFEXT_START(4),
1131							UA_FIXUP_BFEXT_END(4)) |
1132						jit_x1_fnop();
1133			} else if (load_store_size == 2) {
1134				if (load_store_signed)
1135					frag.insn[n++] =
1136						jit_x0_bfexts(
1137							rd, rd,
1138							UA_FIXUP_BFEXT_START(2),
1139							UA_FIXUP_BFEXT_END(2)) |
1140						jit_x1_fnop();
1141				else
1142					frag.insn[n++] =
1143						jit_x0_bfextu(
1144							rd, rd,
1145							UA_FIXUP_BFEXT_START(2),
1146							UA_FIXUP_BFEXT_END(2)) |
1147						jit_x1_fnop();
1148			}
1149
1150			frag.insn[n++] =
1151				jit_x0_fnop()  |
1152				jit_x1_iret();
1153		}
1154	} else if (!load_n_store) {
1155
1156		/*
1157		 * Generic memory store cases: use 3 clobber registers.
1158		 *
1159		 * Alloc space for saveing clob2,1,3 on user's stack.
1160		 * register clob3 points to where clob2 saved, followed by
1161		 * clob1 and 3 from high to low memory.
1162		 */
1163		frag.insn[n++] =
1164			jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1165			jit_x1_fnop();
1166		frag.insn[n++] =
1167			jit_x0_addi(clob3, TREG_SP, 16)  |
1168			jit_x1_st_add(TREG_SP, clob3, 8);
1169#ifdef __LITTLE_ENDIAN
1170		frag.insn[n++] =
1171			jit_x0_addi(clob1, ra, 0)   |
1172			jit_x1_st_add(TREG_SP, clob1, 8);
1173#else
1174		frag.insn[n++] =
1175			jit_x0_addi(clob1, ra, load_store_size - 1)   |
1176			jit_x1_st_add(TREG_SP, clob1, 8);
1177#endif
1178		if (load_store_size == 8) {
1179			/*
1180			 * We save one byte a time, not for fast, but compact
1181			 * code. After each store, data source register shift
1182			 * right one byte. unchanged after 8 stores.
1183			 */
1184			frag.insn[n++] =
1185				jit_x0_addi(clob2, TREG_ZERO, 7)     |
1186				jit_x1_st_add(TREG_SP, clob2, 16);
1187			frag.insn[n++] =
1188				jit_x0_rotli(rb, rb, 56)      |
1189				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1190			frag.insn[n++] =
1191				jit_x0_addi(clob2, clob2, -1) |
1192				jit_x1_bnezt(clob2, -1);
1193			frag.insn[n++] =
1194				jit_x0_fnop()                 |
1195				jit_x1_addi(clob2, y1_br_reg, 0);
1196		} else if (load_store_size == 4) {
1197			frag.insn[n++] =
1198				jit_x0_addi(clob2, TREG_ZERO, 3)     |
1199				jit_x1_st_add(TREG_SP, clob2, 16);
1200			frag.insn[n++] =
1201				jit_x0_rotli(rb, rb, 56)      |
1202				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1203			frag.insn[n++] =
1204				jit_x0_addi(clob2, clob2, -1) |
1205				jit_x1_bnezt(clob2, -1);
1206			/*
1207			 * same as 8-byte case, but need shift another 4
1208			 * byte to recover rb for 4-byte store.
1209			 */
1210			frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1211				jit_x1_addi(clob2, y1_br_reg, 0);
1212		} else { /* =2 */
1213			frag.insn[n++] =
1214				jit_x0_addi(clob2, rb, 0)     |
1215				jit_x1_st_add(TREG_SP, clob2, 16);
1216			for (k = 0; k < 2; k++) {
1217				frag.insn[n++] =
1218					jit_x0_shrui(rb, rb, 8)  |
1219					jit_x1_st1_add(clob1, rb,
1220						       UA_FIXUP_ADDR_DELTA);
1221			}
1222			frag.insn[n++] =
1223				jit_x0_addi(rb, clob2, 0)       |
1224				jit_x1_addi(clob2, y1_br_reg, 0);
1225		}
1226
1227		if (bundle_2_enable)
1228			frag.insn[n++] = bundle_2;
1229
1230		if (y1_lr) {
1231			frag.insn[n++] =
1232				jit_x0_fnop()                    |
1233				jit_x1_mfspr(y1_lr_reg,
1234					     SPR_EX_CONTEXT_0_0);
1235		}
1236		if (y1_br) {
1237			frag.insn[n++] =
1238				jit_x0_fnop()                    |
1239				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1240					     clob2);
1241		}
1242		if (x1_add) {
1243			frag.insn[n++] =
1244				jit_x0_addi(ra, ra, x1_add_imm8) |
1245				jit_x1_ld_add(clob2, clob3, -8);
1246		} else {
1247			frag.insn[n++] =
1248				jit_x0_fnop()                    |
1249				jit_x1_ld_add(clob2, clob3, -8);
1250		}
1251		frag.insn[n++] =
1252			jit_x0_fnop()   |
1253			jit_x1_ld_add(clob1, clob3, -8);
1254		frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1255		frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1256
1257	} else {
1258		/*
1259		 * Generic memory load cases.
1260		 *
1261		 * Alloc space for saveing clob1,2,3 on user's stack.
1262		 * register clob3 points to where clob1 saved, followed
1263		 * by clob2 and 3 from high to low memory.
1264		 */
1265
1266		frag.insn[n++] =
1267			jit_x0_addi(TREG_SP, TREG_SP, -32) |
1268			jit_x1_fnop();
1269		frag.insn[n++] =
1270			jit_x0_addi(clob3, TREG_SP, 16) |
1271			jit_x1_st_add(TREG_SP, clob3, 8);
1272		frag.insn[n++] =
1273			jit_x0_addi(clob2, ra, 0) |
1274			jit_x1_st_add(TREG_SP, clob2, 8);
1275
1276		if (y1_br) {
1277			frag.insn[n++] =
1278				jit_x0_addi(clob1, y1_br_reg, 0) |
1279				jit_x1_st_add(TREG_SP, clob1, 16);
1280		} else {
1281			frag.insn[n++] =
1282				jit_x0_fnop() |
1283				jit_x1_st_add(TREG_SP, clob1, 16);
1284		}
1285
1286		if (bundle_2_enable)
1287			frag.insn[n++] = bundle_2;
1288
1289		if (y1_lr) {
1290			frag.insn[n++] =
1291				jit_x0_fnop()  |
1292				jit_x1_mfspr(y1_lr_reg,
1293					     SPR_EX_CONTEXT_0_0);
1294		}
1295
1296		if (y1_br) {
1297			frag.insn[n++] =
1298				jit_x0_fnop() |
1299				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1300					     clob1);
1301		}
1302
1303		frag.insn[n++] =
1304			jit_x0_addi(clob1, clob2, 7)      |
1305			jit_x1_ldna(rd, clob2);
1306		frag.insn[n++] =
1307			jit_x0_fnop()                     |
1308			jit_x1_ldna(clob1, clob1);
1309		frag.insn[n++] =
1310			jit_x0_dblalign(rd, clob1, clob2) |
1311			jit_x1_ld_add(clob1, clob3, -8);
1312		if (x1_add) {
1313			frag.insn[n++] =
1314				jit_x0_addi(ra, ra, x1_add_imm8) |
1315				jit_x1_ld_add(clob2, clob3, -8);
1316		} else {
1317			frag.insn[n++] =
1318				jit_x0_fnop()  |
1319				jit_x1_ld_add(clob2, clob3, -8);
1320		}
1321
1322		frag.insn[n++] =
1323			jit_x0_fnop() |
1324			jit_x1_ld(clob3, clob3);
1325
1326		if (load_store_size == 4) {
1327			if (load_store_signed)
1328				frag.insn[n++] =
1329					jit_x0_bfexts(
1330						rd, rd,
1331						UA_FIXUP_BFEXT_START(4),
1332						UA_FIXUP_BFEXT_END(4)) |
1333					jit_x1_fnop();
1334			else
1335				frag.insn[n++] =
1336					jit_x0_bfextu(
1337						rd, rd,
1338						UA_FIXUP_BFEXT_START(4),
1339						UA_FIXUP_BFEXT_END(4)) |
1340					jit_x1_fnop();
1341		} else if (load_store_size == 2) {
1342			if (load_store_signed)
1343				frag.insn[n++] =
1344					jit_x0_bfexts(
1345						rd, rd,
1346						UA_FIXUP_BFEXT_START(2),
1347						UA_FIXUP_BFEXT_END(2)) |
1348					jit_x1_fnop();
1349			else
1350				frag.insn[n++] =
1351					jit_x0_bfextu(
1352						rd, rd,
1353						UA_FIXUP_BFEXT_START(2),
1354						UA_FIXUP_BFEXT_END(2)) |
1355					jit_x1_fnop();
1356		}
1357
1358		frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1359	}
1360
1361	/* Max JIT bundle count is 14. */
1362	WARN_ON(n > 14);
1363
1364	if (!unexpected) {
1365		int status = 0;
1366		int idx = (regs->pc >> 3) &
1367			((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1368
1369		frag.pc = regs->pc;
1370		frag.bundle = bundle;
1371
1372		if (unaligned_printk) {
1373			pr_info("%s/%d, Unalign fixup: pc=%lx "
1374				"bundle=%lx %d %d %d %d %d %d %d %d.",
1375				current->comm, current->pid,
1376				(unsigned long)frag.pc,
1377				(unsigned long)frag.bundle,
1378				(int)alias, (int)rd, (int)ra,
1379				(int)rb, (int)bundle_2_enable,
1380				(int)y1_lr, (int)y1_br, (int)x1_add);
1381
1382			for (k = 0; k < n; k += 2)
1383				pr_info("[%d] %016llx %016llx", k,
1384					(unsigned long long)frag.insn[k],
1385					(unsigned long long)frag.insn[k+1]);
1386		}
1387
1388		/* Swap bundle byte order for big endian sys. */
1389#ifdef __BIG_ENDIAN
1390		frag.bundle = GX_INSN_BSWAP(frag.bundle);
1391		for (k = 0; k < n; k++)
1392			frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1393#endif /* __BIG_ENDIAN */
1394
1395		status = copy_to_user((void __user *)&jit_code_area[idx],
1396				      &frag, sizeof(frag));
1397		if (status) {
1398			/* Fail to copy JIT into user land. send SIGSEGV. */
1399			siginfo_t info = {
1400				.si_signo = SIGSEGV,
1401				.si_code = SEGV_MAPERR,
1402				.si_addr = (void __user *)&jit_code_area[idx]
1403			};
1404
1405			pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
1406				current->pid, current->comm,
1407				(unsigned long long)&jit_code_area[idx]);
1408
1409			trace_unhandled_signal("segfault in unalign fixup",
1410					       regs,
1411					       (unsigned long)info.si_addr,
1412					       SIGSEGV);
1413			force_sig_info(info.si_signo, &info, current);
1414			return;
1415		}
1416
1417
1418		/* Do a cheaper increment, not accurate. */
1419		unaligned_fixup_count++;
1420		__flush_icache_range((unsigned long)&jit_code_area[idx],
1421				     (unsigned long)&jit_code_area[idx] +
1422				     sizeof(frag));
1423
1424		/* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1425		__insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1426		__insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1427
1428		/* Modify pc at the start of new JIT. */
1429		regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1430		/* Set ICS in SPR_EX_CONTEXT_K_1. */
1431		regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1432	}
1433}
1434
1435
1436/*
1437 * C function to generate unalign data JIT. Called from unalign data
1438 * interrupt handler.
1439 *
1440 * First check if unalign fix is disabled or exception did not not come from
1441 * user space or sp register points to unalign address, if true, generate a
1442 * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1443 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1444 * back to exception handler.
1445 *
1446 * The exception handler will "iret" to new generated JIT code after
1447 * restoring caller saved registers. In theory, the JIT code will perform
1448 * another "iret" to resume user's program.
1449 */
1450
1451void do_unaligned(struct pt_regs *regs, int vecnum)
1452{
1453	tilegx_bundle_bits __user  *pc;
1454	tilegx_bundle_bits bundle;
1455	struct thread_info *info = current_thread_info();
1456	int align_ctl;
1457
1458	/* Checks the per-process unaligned JIT flags */
1459	align_ctl = unaligned_fixup;
1460	switch (task_thread_info(current)->align_ctl) {
1461	case PR_UNALIGN_NOPRINT:
1462		align_ctl = 1;
1463		break;
1464	case PR_UNALIGN_SIGBUS:
1465		align_ctl = 0;
1466		break;
1467	}
1468
1469	/* Enable iterrupt in order to access user land. */
1470	local_irq_enable();
1471
1472	/*
1473	 * The fault came from kernel space. Two choices:
1474	 * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1475	 *     to return -EFAULT. If no fixup, simply panic the kernel.
1476	 * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1477	 *     if it was triggered by get_user/put_user() macros. Panic the
1478	 *     kernel if it is not fixable.
1479	 */
1480
1481	if (EX1_PL(regs->ex1) != USER_PL) {
1482
1483		if (align_ctl < 1) {
1484			unaligned_fixup_count++;
1485			/* If exception came from kernel, try fix it up. */
1486			if (fixup_exception(regs)) {
1487				if (unaligned_printk)
1488					pr_info("Unalign fixup: %d %llx @%llx",
1489						(int)unaligned_fixup,
1490						(unsigned long long)regs->ex1,
1491						(unsigned long long)regs->pc);
1492				return;
1493			}
1494			/* Not fixable. Go panic. */
1495			panic("Unalign exception in Kernel. pc=%lx",
1496			      regs->pc);
1497			return;
1498		} else {
1499			/*
1500			 * Try to fix the exception. If we can't, panic the
1501			 * kernel.
1502			 */
1503			bundle = GX_INSN_BSWAP(
1504				*((tilegx_bundle_bits *)(regs->pc)));
1505			jit_bundle_gen(regs, bundle, align_ctl);
1506			return;
1507		}
1508	}
1509
1510	/*
1511	 * Fault came from user with ICS or stack is not aligned.
1512	 * If so, we will trigger SIGBUS.
1513	 */
1514	if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1515		siginfo_t info = {
1516			.si_signo = SIGBUS,
1517			.si_code = BUS_ADRALN,
1518			.si_addr = (unsigned char __user *)0
1519		};
1520
1521		if (unaligned_printk)
1522			pr_info("Unalign fixup: %d %llx @%llx",
1523				(int)unaligned_fixup,
1524				(unsigned long long)regs->ex1,
1525				(unsigned long long)regs->pc);
1526
1527		unaligned_fixup_count++;
1528
1529		trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1530		force_sig_info(info.si_signo, &info, current);
1531		return;
1532	}
1533
1534
1535	/* Read the bundle casued the exception! */
1536	pc = (tilegx_bundle_bits __user *)(regs->pc);
1537	if (get_user(bundle, pc) != 0) {
1538		/* Probably never be here since pc is valid user address.*/
1539		siginfo_t info = {
1540			.si_signo = SIGSEGV,
1541			.si_code = SEGV_MAPERR,
1542			.si_addr = (void __user *)pc
1543		};
1544		pr_err("Couldn't read instruction at %p trying to step\n", pc);
1545		trace_unhandled_signal("segfault in unalign fixup", regs,
1546				       (unsigned long)info.si_addr, SIGSEGV);
1547		force_sig_info(info.si_signo, &info, current);
1548		return;
1549	}
1550
1551	if (!info->unalign_jit_base) {
1552		void __user *user_page;
1553
1554		/*
1555		 * Allocate a page in userland.
1556		 * For 64-bit processes we try to place the mapping far
1557		 * from anything else that might be going on (specifically
1558		 * 64 GB below the top of the user address space).  If it
1559		 * happens not to be possible to put it there, it's OK;
1560		 * the kernel will choose another location and we'll
1561		 * remember it for later.
1562		 */
1563		if (is_compat_task())
1564			user_page = NULL;
1565		else
1566			user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1567				(current->pid << PAGE_SHIFT);
1568
1569		user_page = (void __user *) vm_mmap(NULL,
1570						    (unsigned long)user_page,
1571						    PAGE_SIZE,
1572						    PROT_EXEC | PROT_READ |
1573						    PROT_WRITE,
1574#ifdef CONFIG_HOMECACHE
1575						    MAP_CACHE_HOME_TASK |
1576#endif
1577						    MAP_PRIVATE |
1578						    MAP_ANONYMOUS,
1579						    0);
1580
1581		if (IS_ERR((void __force *)user_page)) {
1582			pr_err("Out of kernel pages trying do_mmap.\n");
1583			return;
1584		}
1585
1586		/* Save the address in the thread_info struct */
1587		info->unalign_jit_base = user_page;
1588		if (unaligned_printk)
1589			pr_info("Unalign bundle: %d:%d, allocate page @%llx",
1590				raw_smp_processor_id(), current->pid,
1591				(unsigned long long)user_page);
1592	}
1593
1594	/* Generate unalign JIT */
1595	jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1596}
1597
1598#endif /* __tilegx__ */
1599