1/*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Brad Volkin <bradley.d.volkin@intel.com>
25 *
26 */
27
28#include "i915_drv.h"
29
30/**
31 * DOC: batch buffer command parser
32 *
33 * Motivation:
34 * Certain OpenGL features (e.g. transform feedback, performance monitoring)
35 * require userspace code to submit batches containing commands such as
36 * MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some
37 * generations of the hardware will noop these commands in "unsecure" batches
38 * (which includes all userspace batches submitted via i915) even though the
39 * commands may be safe and represent the intended programming model of the
40 * device.
41 *
42 * The software command parser is similar in operation to the command parsing
43 * done in hardware for unsecure batches. However, the software parser allows
44 * some operations that would be noop'd by hardware, if the parser determines
45 * the operation is safe, and submits the batch as "secure" to prevent hardware
46 * parsing.
47 *
48 * Threats:
49 * At a high level, the hardware (and software) checks attempt to prevent
50 * granting userspace undue privileges. There are three categories of privilege.
51 *
52 * First, commands which are explicitly defined as privileged or which should
53 * only be used by the kernel driver. The parser generally rejects such
54 * commands, though it may allow some from the drm master process.
55 *
56 * Second, commands which access registers. To support correct/enhanced
57 * userspace functionality, particularly certain OpenGL extensions, the parser
58 * provides a whitelist of registers which userspace may safely access (for both
59 * normal and drm master processes).
60 *
61 * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
62 * The parser always rejects such commands.
63 *
64 * The majority of the problematic commands fall in the MI_* range, with only a
65 * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW).
66 *
67 * Implementation:
68 * Each ring maintains tables of commands and registers which the parser uses in
69 * scanning batch buffers submitted to that ring.
70 *
71 * Since the set of commands that the parser must check for is significantly
72 * smaller than the number of commands supported, the parser tables contain only
73 * those commands required by the parser. This generally works because command
74 * opcode ranges have standard command length encodings. So for commands that
75 * the parser does not need to check, it can easily skip them. This is
76 * implementated via a per-ring length decoding vfunc.
77 *
78 * Unfortunately, there are a number of commands that do not follow the standard
79 * length encoding for their opcode range, primarily amongst the MI_* commands.
80 * To handle this, the parser provides a way to define explicit "skip" entries
81 * in the per-ring command tables.
82 *
83 * Other command table entries map fairly directly to high level categories
84 * mentioned above: rejected, master-only, register whitelist. The parser
85 * implements a number of checks, including the privileged memory checks, via a
86 * general bitmasking mechanism.
87 */
88
89#define STD_MI_OPCODE_MASK  0xFF800000
90#define STD_3D_OPCODE_MASK  0xFFFF0000
91#define STD_2D_OPCODE_MASK  0xFFC00000
92#define STD_MFX_OPCODE_MASK 0xFFFF0000
93
94#define CMD(op, opm, f, lm, fl, ...)				\
95	{							\
96		.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0),	\
97		.cmd = { (op), (opm) }, 			\
98		.length = { (lm) },				\
99		__VA_ARGS__					\
100	}
101
102/* Convenience macros to compress the tables */
103#define SMI STD_MI_OPCODE_MASK
104#define S3D STD_3D_OPCODE_MASK
105#define S2D STD_2D_OPCODE_MASK
106#define SMFX STD_MFX_OPCODE_MASK
107#define F true
108#define S CMD_DESC_SKIP
109#define R CMD_DESC_REJECT
110#define W CMD_DESC_REGISTER
111#define B CMD_DESC_BITMASK
112#define M CMD_DESC_MASTER
113
114/*            Command                          Mask   Fixed Len   Action
115	      ---------------------------------------------------------- */
116static const struct drm_i915_cmd_descriptor common_cmds[] = {
117	CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
118	CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      R  ),
119	CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      M  ),
120	CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
121	CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
122	CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
123	CMD(  MI_SEMAPHORE_MBOX,                SMI,   !F,  0xFF,   R  ),
124	CMD(  MI_STORE_DWORD_INDEX,             SMI,   !F,  0xFF,   R  ),
125	CMD(  MI_LOAD_REGISTER_IMM(1),          SMI,   !F,  0xFF,   W,
126	      .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
127	CMD(  MI_STORE_REGISTER_MEM(1),         SMI,   !F,  0xFF,   W | B,
128	      .reg = { .offset = 1, .mask = 0x007FFFFC },
129	      .bits = {{
130			.offset = 0,
131			.mask = MI_GLOBAL_GTT,
132			.expected = 0,
133	      }},						       ),
134	CMD(  MI_LOAD_REGISTER_MEM,             SMI,   !F,  0xFF,   W | B,
135	      .reg = { .offset = 1, .mask = 0x007FFFFC },
136	      .bits = {{
137			.offset = 0,
138			.mask = MI_GLOBAL_GTT,
139			.expected = 0,
140	      }},						       ),
141	CMD(  MI_BATCH_BUFFER_START,            SMI,   !F,  0xFF,   S  ),
142};
143
144static const struct drm_i915_cmd_descriptor render_cmds[] = {
145	CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
146	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
147	CMD(  MI_PREDICATE,                     SMI,    F,  1,      S  ),
148	CMD(  MI_TOPOLOGY_FILTER,               SMI,    F,  1,      S  ),
149	CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
150	CMD(  MI_SET_CONTEXT,                   SMI,   !F,  0xFF,   R  ),
151	CMD(  MI_URB_CLEAR,                     SMI,   !F,  0xFF,   S  ),
152	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3F,   B,
153	      .bits = {{
154			.offset = 0,
155			.mask = MI_GLOBAL_GTT,
156			.expected = 0,
157	      }},						       ),
158	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0xFF,   R  ),
159	CMD(  MI_CLFLUSH,                       SMI,   !F,  0x3FF,  B,
160	      .bits = {{
161			.offset = 0,
162			.mask = MI_GLOBAL_GTT,
163			.expected = 0,
164	      }},						       ),
165	CMD(  MI_REPORT_PERF_COUNT,             SMI,   !F,  0x3F,   B,
166	      .bits = {{
167			.offset = 1,
168			.mask = MI_REPORT_PERF_COUNT_GGTT,
169			.expected = 0,
170	      }},						       ),
171	CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
172	      .bits = {{
173			.offset = 0,
174			.mask = MI_GLOBAL_GTT,
175			.expected = 0,
176	      }},						       ),
177	CMD(  GFX_OP_3DSTATE_VF_STATISTICS,     S3D,    F,  1,      S  ),
178	CMD(  PIPELINE_SELECT,                  S3D,    F,  1,      S  ),
179	CMD(  MEDIA_VFE_STATE,			S3D,   !F,  0xFFFF, B,
180	      .bits = {{
181			.offset = 2,
182			.mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK,
183			.expected = 0,
184	      }},						       ),
185	CMD(  GPGPU_OBJECT,                     S3D,   !F,  0xFF,   S  ),
186	CMD(  GPGPU_WALKER,                     S3D,   !F,  0xFF,   S  ),
187	CMD(  GFX_OP_3DSTATE_SO_DECL_LIST,      S3D,   !F,  0x1FF,  S  ),
188	CMD(  GFX_OP_PIPE_CONTROL(5),           S3D,   !F,  0xFF,   B,
189	      .bits = {{
190			.offset = 1,
191			.mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY),
192			.expected = 0,
193	      },
194	      {
195			.offset = 1,
196		        .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB |
197				 PIPE_CONTROL_STORE_DATA_INDEX),
198			.expected = 0,
199			.condition_offset = 1,
200			.condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK,
201	      }},						       ),
202};
203
204static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
205	CMD(  MI_SET_PREDICATE,                 SMI,    F,  1,      S  ),
206	CMD(  MI_RS_CONTROL,                    SMI,    F,  1,      S  ),
207	CMD(  MI_URB_ATOMIC_ALLOC,              SMI,    F,  1,      S  ),
208	CMD(  MI_RS_CONTEXT,                    SMI,    F,  1,      S  ),
209	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
210	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
211	CMD(  MI_LOAD_REGISTER_REG,             SMI,   !F,  0xFF,   R  ),
212	CMD(  MI_RS_STORE_DATA_IMM,             SMI,   !F,  0xFF,   S  ),
213	CMD(  MI_LOAD_URB_MEM,                  SMI,   !F,  0xFF,   S  ),
214	CMD(  MI_STORE_URB_MEM,                 SMI,   !F,  0xFF,   S  ),
215	CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_VS,  S3D,   !F,  0x7FF,  S  ),
216	CMD(  GFX_OP_3DSTATE_DX9_CONSTANTF_PS,  S3D,   !F,  0x7FF,  S  ),
217
218	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS,  S3D,   !F,  0x1FF,  S  ),
219	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS,  S3D,   !F,  0x1FF,  S  ),
220	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS,  S3D,   !F,  0x1FF,  S  ),
221	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS,  S3D,   !F,  0x1FF,  S  ),
222	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS,  S3D,   !F,  0x1FF,  S  ),
223};
224
225static const struct drm_i915_cmd_descriptor video_cmds[] = {
226	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
227	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
228	      .bits = {{
229			.offset = 0,
230			.mask = MI_GLOBAL_GTT,
231			.expected = 0,
232	      }},						       ),
233	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3F,   R  ),
234	CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   B,
235	      .bits = {{
236			.offset = 0,
237			.mask = MI_FLUSH_DW_NOTIFY,
238			.expected = 0,
239	      },
240	      {
241			.offset = 1,
242			.mask = MI_FLUSH_DW_USE_GTT,
243			.expected = 0,
244			.condition_offset = 0,
245			.condition_mask = MI_FLUSH_DW_OP_MASK,
246	      },
247	      {
248			.offset = 0,
249			.mask = MI_FLUSH_DW_STORE_INDEX,
250			.expected = 0,
251			.condition_offset = 0,
252			.condition_mask = MI_FLUSH_DW_OP_MASK,
253	      }},						       ),
254	CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
255	      .bits = {{
256			.offset = 0,
257			.mask = MI_GLOBAL_GTT,
258			.expected = 0,
259	      }},						       ),
260	/*
261	 * MFX_WAIT doesn't fit the way we handle length for most commands.
262	 * It has a length field but it uses a non-standard length bias.
263	 * It is always 1 dword though, so just treat it as fixed length.
264	 */
265	CMD(  MFX_WAIT,                         SMFX,   F,  1,      S  ),
266};
267
268static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
269	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
270	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
271	      .bits = {{
272			.offset = 0,
273			.mask = MI_GLOBAL_GTT,
274			.expected = 0,
275	      }},						       ),
276	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3F,   R  ),
277	CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   B,
278	      .bits = {{
279			.offset = 0,
280			.mask = MI_FLUSH_DW_NOTIFY,
281			.expected = 0,
282	      },
283	      {
284			.offset = 1,
285			.mask = MI_FLUSH_DW_USE_GTT,
286			.expected = 0,
287			.condition_offset = 0,
288			.condition_mask = MI_FLUSH_DW_OP_MASK,
289	      },
290	      {
291			.offset = 0,
292			.mask = MI_FLUSH_DW_STORE_INDEX,
293			.expected = 0,
294			.condition_offset = 0,
295			.condition_mask = MI_FLUSH_DW_OP_MASK,
296	      }},						       ),
297	CMD(  MI_CONDITIONAL_BATCH_BUFFER_END,  SMI,   !F,  0xFF,   B,
298	      .bits = {{
299			.offset = 0,
300			.mask = MI_GLOBAL_GTT,
301			.expected = 0,
302	      }},						       ),
303};
304
305static const struct drm_i915_cmd_descriptor blt_cmds[] = {
306	CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
307	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  B,
308	      .bits = {{
309			.offset = 0,
310			.mask = MI_GLOBAL_GTT,
311			.expected = 0,
312	      }},						       ),
313	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3F,   R  ),
314	CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   B,
315	      .bits = {{
316			.offset = 0,
317			.mask = MI_FLUSH_DW_NOTIFY,
318			.expected = 0,
319	      },
320	      {
321			.offset = 1,
322			.mask = MI_FLUSH_DW_USE_GTT,
323			.expected = 0,
324			.condition_offset = 0,
325			.condition_mask = MI_FLUSH_DW_OP_MASK,
326	      },
327	      {
328			.offset = 0,
329			.mask = MI_FLUSH_DW_STORE_INDEX,
330			.expected = 0,
331			.condition_offset = 0,
332			.condition_mask = MI_FLUSH_DW_OP_MASK,
333	      }},						       ),
334	CMD(  COLOR_BLT,                        S2D,   !F,  0x3F,   S  ),
335	CMD(  SRC_COPY_BLT,                     S2D,   !F,  0x3F,   S  ),
336};
337
338static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
339	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
340	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
341};
342
343#undef CMD
344#undef SMI
345#undef S3D
346#undef S2D
347#undef SMFX
348#undef F
349#undef S
350#undef R
351#undef W
352#undef B
353#undef M
354
355static const struct drm_i915_cmd_table gen7_render_cmds[] = {
356	{ common_cmds, ARRAY_SIZE(common_cmds) },
357	{ render_cmds, ARRAY_SIZE(render_cmds) },
358};
359
360static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
361	{ common_cmds, ARRAY_SIZE(common_cmds) },
362	{ render_cmds, ARRAY_SIZE(render_cmds) },
363	{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
364};
365
366static const struct drm_i915_cmd_table gen7_video_cmds[] = {
367	{ common_cmds, ARRAY_SIZE(common_cmds) },
368	{ video_cmds, ARRAY_SIZE(video_cmds) },
369};
370
371static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
372	{ common_cmds, ARRAY_SIZE(common_cmds) },
373	{ vecs_cmds, ARRAY_SIZE(vecs_cmds) },
374};
375
376static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
377	{ common_cmds, ARRAY_SIZE(common_cmds) },
378	{ blt_cmds, ARRAY_SIZE(blt_cmds) },
379};
380
381static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
382	{ common_cmds, ARRAY_SIZE(common_cmds) },
383	{ blt_cmds, ARRAY_SIZE(blt_cmds) },
384	{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
385};
386
387/*
388 * Register whitelists, sorted by increasing register offset.
389 *
390 * Some registers that userspace accesses are 64 bits. The register
391 * access commands only allow 32-bit accesses. Hence, we have to include
392 * entries for both halves of the 64-bit registers.
393 */
394
395/* Convenience macro for adding 64-bit registers */
396#define REG64(addr) (addr), (addr + sizeof(u32))
397
398static const u32 gen7_render_regs[] = {
399	REG64(HS_INVOCATION_COUNT),
400	REG64(DS_INVOCATION_COUNT),
401	REG64(IA_VERTICES_COUNT),
402	REG64(IA_PRIMITIVES_COUNT),
403	REG64(VS_INVOCATION_COUNT),
404	REG64(GS_INVOCATION_COUNT),
405	REG64(GS_PRIMITIVES_COUNT),
406	REG64(CL_INVOCATION_COUNT),
407	REG64(CL_PRIMITIVES_COUNT),
408	REG64(PS_INVOCATION_COUNT),
409	REG64(PS_DEPTH_COUNT),
410	OACONTROL, /* Only allowed for LRI and SRM. See below. */
411	GEN7_3DPRIM_END_OFFSET,
412	GEN7_3DPRIM_START_VERTEX,
413	GEN7_3DPRIM_VERTEX_COUNT,
414	GEN7_3DPRIM_INSTANCE_COUNT,
415	GEN7_3DPRIM_START_INSTANCE,
416	GEN7_3DPRIM_BASE_VERTEX,
417	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
418	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
419	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
420	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)),
421	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(0)),
422	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)),
423	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)),
424	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)),
425	GEN7_SO_WRITE_OFFSET(0),
426	GEN7_SO_WRITE_OFFSET(1),
427	GEN7_SO_WRITE_OFFSET(2),
428	GEN7_SO_WRITE_OFFSET(3),
429	GEN7_L3SQCREG1,
430	GEN7_L3CNTLREG2,
431	GEN7_L3CNTLREG3,
432};
433
434static const u32 gen7_blt_regs[] = {
435	BCS_SWCTRL,
436};
437
438static const u32 ivb_master_regs[] = {
439	FORCEWAKE_MT,
440	DERRMR,
441	GEN7_PIPE_DE_LOAD_SL(PIPE_A),
442	GEN7_PIPE_DE_LOAD_SL(PIPE_B),
443	GEN7_PIPE_DE_LOAD_SL(PIPE_C),
444};
445
446static const u32 hsw_master_regs[] = {
447	FORCEWAKE_MT,
448	DERRMR,
449};
450
451#undef REG64
452
453static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
454{
455	u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT;
456	u32 subclient =
457		(cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT;
458
459	if (client == INSTR_MI_CLIENT)
460		return 0x3F;
461	else if (client == INSTR_RC_CLIENT) {
462		if (subclient == INSTR_MEDIA_SUBCLIENT)
463			return 0xFFFF;
464		else
465			return 0xFF;
466	}
467
468	DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header);
469	return 0;
470}
471
472static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header)
473{
474	u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT;
475	u32 subclient =
476		(cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT;
477
478	if (client == INSTR_MI_CLIENT)
479		return 0x3F;
480	else if (client == INSTR_RC_CLIENT) {
481		if (subclient == INSTR_MEDIA_SUBCLIENT)
482			return 0xFFF;
483		else
484			return 0xFF;
485	}
486
487	DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header);
488	return 0;
489}
490
491static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
492{
493	u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT;
494
495	if (client == INSTR_MI_CLIENT)
496		return 0x3F;
497	else if (client == INSTR_BC_CLIENT)
498		return 0xFF;
499
500	DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
501	return 0;
502}
503
504static bool validate_cmds_sorted(struct intel_engine_cs *ring,
505				 const struct drm_i915_cmd_table *cmd_tables,
506				 int cmd_table_count)
507{
508	int i;
509	bool ret = true;
510
511	if (!cmd_tables || cmd_table_count == 0)
512		return true;
513
514	for (i = 0; i < cmd_table_count; i++) {
515		const struct drm_i915_cmd_table *table = &cmd_tables[i];
516		u32 previous = 0;
517		int j;
518
519		for (j = 0; j < table->count; j++) {
520			const struct drm_i915_cmd_descriptor *desc =
521				&table->table[i];
522			u32 curr = desc->cmd.value & desc->cmd.mask;
523
524			if (curr < previous) {
525				DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
526					  ring->id, i, j, curr, previous);
527				ret = false;
528			}
529
530			previous = curr;
531		}
532	}
533
534	return ret;
535}
536
537static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count)
538{
539	int i;
540	u32 previous = 0;
541	bool ret = true;
542
543	for (i = 0; i < reg_count; i++) {
544		u32 curr = reg_table[i];
545
546		if (curr < previous) {
547			DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
548				  ring_id, i, curr, previous);
549			ret = false;
550		}
551
552		previous = curr;
553	}
554
555	return ret;
556}
557
558static bool validate_regs_sorted(struct intel_engine_cs *ring)
559{
560	return check_sorted(ring->id, ring->reg_table, ring->reg_count) &&
561		check_sorted(ring->id, ring->master_reg_table,
562			     ring->master_reg_count);
563}
564
565struct cmd_node {
566	const struct drm_i915_cmd_descriptor *desc;
567	struct hlist_node node;
568};
569
570/*
571 * Different command ranges have different numbers of bits for the opcode. For
572 * example, MI commands use bits 31:23 while 3D commands use bits 31:16. The
573 * problem is that, for example, MI commands use bits 22:16 for other fields
574 * such as GGTT vs PPGTT bits. If we include those bits in the mask then when
575 * we mask a command from a batch it could hash to the wrong bucket due to
576 * non-opcode bits being set. But if we don't include those bits, some 3D
577 * commands may hash to the same bucket due to not including opcode bits that
578 * make the command unique. For now, we will risk hashing to the same bucket.
579 *
580 * If we attempt to generate a perfect hash, we should be able to look at bits
581 * 31:29 of a command from a batch buffer and use the full mask for that
582 * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
583 */
584#define CMD_HASH_MASK STD_MI_OPCODE_MASK
585
586static int init_hash_table(struct intel_engine_cs *ring,
587			   const struct drm_i915_cmd_table *cmd_tables,
588			   int cmd_table_count)
589{
590	int i, j;
591
592	hash_init(ring->cmd_hash);
593
594	for (i = 0; i < cmd_table_count; i++) {
595		const struct drm_i915_cmd_table *table = &cmd_tables[i];
596
597		for (j = 0; j < table->count; j++) {
598			const struct drm_i915_cmd_descriptor *desc =
599				&table->table[j];
600			struct cmd_node *desc_node =
601				kmalloc(sizeof(*desc_node), GFP_KERNEL);
602
603			if (!desc_node)
604				return -ENOMEM;
605
606			desc_node->desc = desc;
607			hash_add(ring->cmd_hash, &desc_node->node,
608				 desc->cmd.value & CMD_HASH_MASK);
609		}
610	}
611
612	return 0;
613}
614
615static void fini_hash_table(struct intel_engine_cs *ring)
616{
617	struct hlist_node *tmp;
618	struct cmd_node *desc_node;
619	int i;
620
621	hash_for_each_safe(ring->cmd_hash, i, tmp, desc_node, node) {
622		hash_del(&desc_node->node);
623		kfree(desc_node);
624	}
625}
626
627/**
628 * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer
629 * @ring: the ringbuffer to initialize
630 *
631 * Optionally initializes fields related to batch buffer command parsing in the
632 * struct intel_engine_cs based on whether the platform requires software
633 * command parsing.
634 *
635 * Return: non-zero if initialization fails
636 */
637int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
638{
639	const struct drm_i915_cmd_table *cmd_tables;
640	int cmd_table_count;
641	int ret;
642
643	if (!IS_GEN7(ring->dev))
644		return 0;
645
646	switch (ring->id) {
647	case RCS:
648		if (IS_HASWELL(ring->dev)) {
649			cmd_tables = hsw_render_ring_cmds;
650			cmd_table_count =
651				ARRAY_SIZE(hsw_render_ring_cmds);
652		} else {
653			cmd_tables = gen7_render_cmds;
654			cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
655		}
656
657		ring->reg_table = gen7_render_regs;
658		ring->reg_count = ARRAY_SIZE(gen7_render_regs);
659
660		if (IS_HASWELL(ring->dev)) {
661			ring->master_reg_table = hsw_master_regs;
662			ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
663		} else {
664			ring->master_reg_table = ivb_master_regs;
665			ring->master_reg_count = ARRAY_SIZE(ivb_master_regs);
666		}
667
668		ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
669		break;
670	case VCS:
671		cmd_tables = gen7_video_cmds;
672		cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
673		ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
674		break;
675	case BCS:
676		if (IS_HASWELL(ring->dev)) {
677			cmd_tables = hsw_blt_ring_cmds;
678			cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
679		} else {
680			cmd_tables = gen7_blt_cmds;
681			cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
682		}
683
684		ring->reg_table = gen7_blt_regs;
685		ring->reg_count = ARRAY_SIZE(gen7_blt_regs);
686
687		if (IS_HASWELL(ring->dev)) {
688			ring->master_reg_table = hsw_master_regs;
689			ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
690		} else {
691			ring->master_reg_table = ivb_master_regs;
692			ring->master_reg_count = ARRAY_SIZE(ivb_master_regs);
693		}
694
695		ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
696		break;
697	case VECS:
698		cmd_tables = hsw_vebox_cmds;
699		cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
700		/* VECS can use the same length_mask function as VCS */
701		ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
702		break;
703	default:
704		DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n",
705			  ring->id);
706		BUG();
707	}
708
709	BUG_ON(!validate_cmds_sorted(ring, cmd_tables, cmd_table_count));
710	BUG_ON(!validate_regs_sorted(ring));
711
712	if (hash_empty(ring->cmd_hash)) {
713		ret = init_hash_table(ring, cmd_tables, cmd_table_count);
714		if (ret) {
715			DRM_ERROR("CMD: cmd_parser_init failed!\n");
716			fini_hash_table(ring);
717			return ret;
718		}
719	}
720
721	ring->needs_cmd_parser = true;
722
723	return 0;
724}
725
726/**
727 * i915_cmd_parser_fini_ring() - clean up cmd parser related fields
728 * @ring: the ringbuffer to clean up
729 *
730 * Releases any resources related to command parsing that may have been
731 * initialized for the specified ring.
732 */
733void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring)
734{
735	if (!ring->needs_cmd_parser)
736		return;
737
738	fini_hash_table(ring);
739}
740
741static const struct drm_i915_cmd_descriptor*
742find_cmd_in_table(struct intel_engine_cs *ring,
743		  u32 cmd_header)
744{
745	struct cmd_node *desc_node;
746
747	hash_for_each_possible(ring->cmd_hash, desc_node, node,
748			       cmd_header & CMD_HASH_MASK) {
749		const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
750		u32 masked_cmd = desc->cmd.mask & cmd_header;
751		u32 masked_value = desc->cmd.value & desc->cmd.mask;
752
753		if (masked_cmd == masked_value)
754			return desc;
755	}
756
757	return NULL;
758}
759
760/*
761 * Returns a pointer to a descriptor for the command specified by cmd_header.
762 *
763 * The caller must supply space for a default descriptor via the default_desc
764 * parameter. If no descriptor for the specified command exists in the ring's
765 * command parser tables, this function fills in default_desc based on the
766 * ring's default length encoding and returns default_desc.
767 */
768static const struct drm_i915_cmd_descriptor*
769find_cmd(struct intel_engine_cs *ring,
770	 u32 cmd_header,
771	 struct drm_i915_cmd_descriptor *default_desc)
772{
773	const struct drm_i915_cmd_descriptor *desc;
774	u32 mask;
775
776	desc = find_cmd_in_table(ring, cmd_header);
777	if (desc)
778		return desc;
779
780	mask = ring->get_cmd_length_mask(cmd_header);
781	if (!mask)
782		return NULL;
783
784	BUG_ON(!default_desc);
785	default_desc->flags = CMD_DESC_SKIP;
786	default_desc->length.mask = mask;
787
788	return default_desc;
789}
790
791static bool valid_reg(const u32 *table, int count, u32 addr)
792{
793	if (table && count != 0) {
794		int i;
795
796		for (i = 0; i < count; i++) {
797			if (table[i] == addr)
798				return true;
799		}
800	}
801
802	return false;
803}
804
805static u32 *vmap_batch(struct drm_i915_gem_object *obj)
806{
807	int i;
808	void *addr = NULL;
809	struct sg_page_iter sg_iter;
810	struct page **pages;
811
812	pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
813	if (pages == NULL) {
814		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
815		goto finish;
816	}
817
818	i = 0;
819	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
820		pages[i] = sg_page_iter_page(&sg_iter);
821		i++;
822	}
823
824	addr = vmap(pages, i, 0, PAGE_KERNEL);
825	if (addr == NULL) {
826		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
827		goto finish;
828	}
829
830finish:
831	if (pages)
832		drm_free_large(pages);
833	return (u32*)addr;
834}
835
836/**
837 * i915_needs_cmd_parser() - should a given ring use software command parsing?
838 * @ring: the ring in question
839 *
840 * Only certain platforms require software batch buffer command parsing, and
841 * only when enabled via module paramter.
842 *
843 * Return: true if the ring requires software command parsing
844 */
845bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
846{
847	if (!ring->needs_cmd_parser)
848		return false;
849
850	/*
851	 * XXX: VLV is Gen7 and therefore has cmd_tables, but has PPGTT
852	 * disabled. That will cause all of the parser's PPGTT checks to
853	 * fail. For now, disable parsing when PPGTT is off.
854	 */
855	if (USES_PPGTT(ring->dev))
856		return false;
857
858	return (i915.enable_cmd_parser == 1);
859}
860
861static bool check_cmd(const struct intel_engine_cs *ring,
862		      const struct drm_i915_cmd_descriptor *desc,
863		      const u32 *cmd,
864		      const bool is_master,
865		      bool *oacontrol_set)
866{
867	if (desc->flags & CMD_DESC_REJECT) {
868		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
869		return false;
870	}
871
872	if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
873		DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
874				 *cmd);
875		return false;
876	}
877
878	if (desc->flags & CMD_DESC_REGISTER) {
879		u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask;
880
881		/*
882		 * OACONTROL requires some special handling for writes. We
883		 * want to make sure that any batch which enables OA also
884		 * disables it before the end of the batch. The goal is to
885		 * prevent one process from snooping on the perf data from
886		 * another process. To do that, we need to check the value
887		 * that will be written to the register. Hence, limit
888		 * OACONTROL writes to only MI_LOAD_REGISTER_IMM commands.
889		 */
890		if (reg_addr == OACONTROL) {
891			if (desc->cmd.value == MI_LOAD_REGISTER_MEM)
892				return false;
893
894			if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1))
895				*oacontrol_set = (cmd[2] != 0);
896		}
897
898		if (!valid_reg(ring->reg_table,
899			       ring->reg_count, reg_addr)) {
900			if (!is_master ||
901			    !valid_reg(ring->master_reg_table,
902				       ring->master_reg_count,
903				       reg_addr)) {
904				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n",
905						 reg_addr,
906						 *cmd,
907						 ring->id);
908				return false;
909			}
910		}
911	}
912
913	if (desc->flags & CMD_DESC_BITMASK) {
914		int i;
915
916		for (i = 0; i < MAX_CMD_DESC_BITMASKS; i++) {
917			u32 dword;
918
919			if (desc->bits[i].mask == 0)
920				break;
921
922			if (desc->bits[i].condition_mask != 0) {
923				u32 offset =
924					desc->bits[i].condition_offset;
925				u32 condition = cmd[offset] &
926					desc->bits[i].condition_mask;
927
928				if (condition == 0)
929					continue;
930			}
931
932			dword = cmd[desc->bits[i].offset] &
933				desc->bits[i].mask;
934
935			if (dword != desc->bits[i].expected) {
936				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n",
937						 *cmd,
938						 desc->bits[i].mask,
939						 desc->bits[i].expected,
940						 dword, ring->id);
941				return false;
942			}
943		}
944	}
945
946	return true;
947}
948
949#define LENGTH_BIAS 2
950
951/**
952 * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
953 * @ring: the ring on which the batch is to execute
954 * @batch_obj: the batch buffer in question
955 * @batch_start_offset: byte offset in the batch at which execution starts
956 * @is_master: is the submitting process the drm master?
957 *
958 * Parses the specified batch buffer looking for privilege violations as
959 * described in the overview.
960 *
961 * Return: non-zero if the parser finds violations or otherwise fails
962 */
963int i915_parse_cmds(struct intel_engine_cs *ring,
964		    struct drm_i915_gem_object *batch_obj,
965		    u32 batch_start_offset,
966		    bool is_master)
967{
968	int ret = 0;
969	u32 *cmd, *batch_base, *batch_end;
970	struct drm_i915_cmd_descriptor default_desc = { 0 };
971	int needs_clflush = 0;
972	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
973
974	ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush);
975	if (ret) {
976		DRM_DEBUG_DRIVER("CMD: failed to prep read\n");
977		return ret;
978	}
979
980	batch_base = vmap_batch(batch_obj);
981	if (!batch_base) {
982		DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
983		i915_gem_object_unpin_pages(batch_obj);
984		return -ENOMEM;
985	}
986
987	if (needs_clflush)
988		drm_clflush_virt_range((char *)batch_base, batch_obj->base.size);
989
990	cmd = batch_base + (batch_start_offset / sizeof(*cmd));
991	batch_end = cmd + (batch_obj->base.size / sizeof(*batch_end));
992
993	while (cmd < batch_end) {
994		const struct drm_i915_cmd_descriptor *desc;
995		u32 length;
996
997		if (*cmd == MI_BATCH_BUFFER_END)
998			break;
999
1000		desc = find_cmd(ring, *cmd, &default_desc);
1001		if (!desc) {
1002			DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
1003					 *cmd);
1004			ret = -EINVAL;
1005			break;
1006		}
1007
1008		if (desc->flags & CMD_DESC_FIXED)
1009			length = desc->length.fixed;
1010		else
1011			length = ((*cmd & desc->length.mask) + LENGTH_BIAS);
1012
1013		if ((batch_end - cmd) < length) {
1014			DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
1015					 *cmd,
1016					 length,
1017					 batch_end - cmd);
1018			ret = -EINVAL;
1019			break;
1020		}
1021
1022		if (!check_cmd(ring, desc, cmd, is_master, &oacontrol_set)) {
1023			ret = -EINVAL;
1024			break;
1025		}
1026
1027		cmd += length;
1028	}
1029
1030	if (oacontrol_set) {
1031		DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n");
1032		ret = -EINVAL;
1033	}
1034
1035	if (cmd >= batch_end) {
1036		DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
1037		ret = -EINVAL;
1038	}
1039
1040	vunmap(batch_base);
1041
1042	i915_gem_object_unpin_pages(batch_obj);
1043
1044	return ret;
1045}
1046
1047/**
1048 * i915_cmd_parser_get_version() - get the cmd parser version number
1049 *
1050 * The cmd parser maintains a simple increasing integer version number suitable
1051 * for passing to userspace clients to determine what operations are permitted.
1052 *
1053 * Return: the current version number of the cmd parser
1054 */
1055int i915_cmd_parser_get_version(void)
1056{
1057	/*
1058	 * Command parser version history
1059	 *
1060	 * 1. Initial version. Checks batches and reports violations, but leaves
1061	 *    hardware parsing enabled (so does not allow new use cases).
1062	 */
1063	return 1;
1064}
1065