1/*
2 * Copyright © 2008,2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Chris Wilson <chris@chris-wilson.co.uk>
26 *
27 */
28
29#include <drm/drmP.h>
30#include <drm/i915_drm.h>
31#include "i915_drv.h"
32#include "i915_trace.h"
33#include "intel_drv.h"
34#include <linux/dma_remapping.h>
35
36#define  __EXEC_OBJECT_HAS_PIN (1<<31)
37#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
38#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
39#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
40
41#define BATCH_OFFSET_BIAS (256*1024)
42
43struct eb_vmas {
44	struct list_head vmas;
45	int and;
46	union {
47		struct i915_vma *lut[0];
48		struct hlist_head buckets[0];
49	};
50};
51
52static struct eb_vmas *
53eb_create(struct drm_i915_gem_execbuffer2 *args)
54{
55	struct eb_vmas *eb = NULL;
56
57	if (args->flags & I915_EXEC_HANDLE_LUT) {
58		unsigned size = args->buffer_count;
59		size *= sizeof(struct i915_vma *);
60		size += sizeof(struct eb_vmas);
61		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
62	}
63
64	if (eb == NULL) {
65		unsigned size = args->buffer_count;
66		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
67		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
68		while (count > 2*size)
69			count >>= 1;
70		eb = kzalloc(count*sizeof(struct hlist_head) +
71			     sizeof(struct eb_vmas),
72			     GFP_TEMPORARY);
73		if (eb == NULL)
74			return eb;
75
76		eb->and = count - 1;
77	} else
78		eb->and = -args->buffer_count;
79
80	INIT_LIST_HEAD(&eb->vmas);
81	return eb;
82}
83
84static void
85eb_reset(struct eb_vmas *eb)
86{
87	if (eb->and >= 0)
88		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
89}
90
91static int
92eb_lookup_vmas(struct eb_vmas *eb,
93	       struct drm_i915_gem_exec_object2 *exec,
94	       const struct drm_i915_gem_execbuffer2 *args,
95	       struct i915_address_space *vm,
96	       struct drm_file *file)
97{
98	struct drm_i915_gem_object *obj;
99	struct list_head objects;
100	int i, ret;
101
102	INIT_LIST_HEAD(&objects);
103	spin_lock(&file->table_lock);
104	/* Grab a reference to the object and release the lock so we can lookup
105	 * or create the VMA without using GFP_ATOMIC */
106	for (i = 0; i < args->buffer_count; i++) {
107		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
108		if (obj == NULL) {
109			spin_unlock(&file->table_lock);
110			DRM_DEBUG("Invalid object handle %d at index %d\n",
111				   exec[i].handle, i);
112			ret = -ENOENT;
113			goto err;
114		}
115
116		if (!list_empty(&obj->obj_exec_link)) {
117			spin_unlock(&file->table_lock);
118			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
119				   obj, exec[i].handle, i);
120			ret = -EINVAL;
121			goto err;
122		}
123
124		drm_gem_object_reference(&obj->base);
125		list_add_tail(&obj->obj_exec_link, &objects);
126	}
127	spin_unlock(&file->table_lock);
128
129	i = 0;
130	while (!list_empty(&objects)) {
131		struct i915_vma *vma;
132
133		obj = list_first_entry(&objects,
134				       struct drm_i915_gem_object,
135				       obj_exec_link);
136
137		/*
138		 * NOTE: We can leak any vmas created here when something fails
139		 * later on. But that's no issue since vma_unbind can deal with
140		 * vmas which are not actually bound. And since only
141		 * lookup_or_create exists as an interface to get at the vma
142		 * from the (obj, vm) we don't run the risk of creating
143		 * duplicated vmas for the same vm.
144		 */
145		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
146		if (IS_ERR(vma)) {
147			DRM_DEBUG("Failed to lookup VMA\n");
148			ret = PTR_ERR(vma);
149			goto err;
150		}
151
152		/* Transfer ownership from the objects list to the vmas list. */
153		list_add_tail(&vma->exec_list, &eb->vmas);
154		list_del_init(&obj->obj_exec_link);
155
156		vma->exec_entry = &exec[i];
157		if (eb->and < 0) {
158			eb->lut[i] = vma;
159		} else {
160			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
161			vma->exec_handle = handle;
162			hlist_add_head(&vma->exec_node,
163				       &eb->buckets[handle & eb->and]);
164		}
165		++i;
166	}
167
168	return 0;
169
170
171err:
172	while (!list_empty(&objects)) {
173		obj = list_first_entry(&objects,
174				       struct drm_i915_gem_object,
175				       obj_exec_link);
176		list_del_init(&obj->obj_exec_link);
177		drm_gem_object_unreference(&obj->base);
178	}
179	/*
180	 * Objects already transfered to the vmas list will be unreferenced by
181	 * eb_destroy.
182	 */
183
184	return ret;
185}
186
187static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
188{
189	if (eb->and < 0) {
190		if (handle >= -eb->and)
191			return NULL;
192		return eb->lut[handle];
193	} else {
194		struct hlist_head *head;
195		struct hlist_node *node;
196
197		head = &eb->buckets[handle & eb->and];
198		hlist_for_each(node, head) {
199			struct i915_vma *vma;
200
201			vma = hlist_entry(node, struct i915_vma, exec_node);
202			if (vma->exec_handle == handle)
203				return vma;
204		}
205		return NULL;
206	}
207}
208
209static void
210i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
211{
212	struct drm_i915_gem_exec_object2 *entry;
213	struct drm_i915_gem_object *obj = vma->obj;
214
215	if (!drm_mm_node_allocated(&vma->node))
216		return;
217
218	entry = vma->exec_entry;
219
220	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
221		i915_gem_object_unpin_fence(obj);
222
223	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
224		vma->pin_count--;
225
226	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
227}
228
229static void eb_destroy(struct eb_vmas *eb)
230{
231	while (!list_empty(&eb->vmas)) {
232		struct i915_vma *vma;
233
234		vma = list_first_entry(&eb->vmas,
235				       struct i915_vma,
236				       exec_list);
237		list_del_init(&vma->exec_list);
238		i915_gem_execbuffer_unreserve_vma(vma);
239		drm_gem_object_unreference(&vma->obj->base);
240	}
241	kfree(eb);
242}
243
244static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
245{
246	return (HAS_LLC(obj->base.dev) ||
247		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
248		!obj->map_and_fenceable ||
249		obj->cache_level != I915_CACHE_NONE);
250}
251
252static int
253relocate_entry_cpu(struct drm_i915_gem_object *obj,
254		   struct drm_i915_gem_relocation_entry *reloc,
255		   uint64_t target_offset)
256{
257	struct drm_device *dev = obj->base.dev;
258	uint32_t page_offset = offset_in_page(reloc->offset);
259	uint64_t delta = reloc->delta + target_offset;
260	char *vaddr;
261	int ret;
262
263	ret = i915_gem_object_set_to_cpu_domain(obj, true);
264	if (ret)
265		return ret;
266
267	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
268				reloc->offset >> PAGE_SHIFT));
269	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
270
271	if (INTEL_INFO(dev)->gen >= 8) {
272		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
273
274		if (page_offset == 0) {
275			kunmap_atomic(vaddr);
276			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
277			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
278		}
279
280		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
281	}
282
283	kunmap_atomic(vaddr);
284
285	return 0;
286}
287
288static int
289relocate_entry_gtt(struct drm_i915_gem_object *obj,
290		   struct drm_i915_gem_relocation_entry *reloc,
291		   uint64_t target_offset)
292{
293	struct drm_device *dev = obj->base.dev;
294	struct drm_i915_private *dev_priv = dev->dev_private;
295	uint64_t delta = reloc->delta + target_offset;
296	uint64_t offset;
297	void __iomem *reloc_page;
298	int ret;
299
300	ret = i915_gem_object_set_to_gtt_domain(obj, true);
301	if (ret)
302		return ret;
303
304	ret = i915_gem_object_put_fence(obj);
305	if (ret)
306		return ret;
307
308	/* Map the page containing the relocation we're going to perform.  */
309	offset = i915_gem_obj_ggtt_offset(obj);
310	offset += reloc->offset;
311	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
312					      offset & PAGE_MASK);
313	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
314
315	if (INTEL_INFO(dev)->gen >= 8) {
316		offset += sizeof(uint32_t);
317
318		if (offset_in_page(offset) == 0) {
319			io_mapping_unmap_atomic(reloc_page);
320			reloc_page =
321				io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
322							 offset);
323		}
324
325		iowrite32(upper_32_bits(delta),
326			  reloc_page + offset_in_page(offset));
327	}
328
329	io_mapping_unmap_atomic(reloc_page);
330
331	return 0;
332}
333
334static int
335i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
336				   struct eb_vmas *eb,
337				   struct drm_i915_gem_relocation_entry *reloc)
338{
339	struct drm_device *dev = obj->base.dev;
340	struct drm_gem_object *target_obj;
341	struct drm_i915_gem_object *target_i915_obj;
342	struct i915_vma *target_vma;
343	uint64_t target_offset;
344	int ret;
345
346	/* we've already hold a reference to all valid objects */
347	target_vma = eb_get_vma(eb, reloc->target_handle);
348	if (unlikely(target_vma == NULL))
349		return -ENOENT;
350	target_i915_obj = target_vma->obj;
351	target_obj = &target_vma->obj->base;
352
353	target_offset = target_vma->node.start;
354
355	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
356	 * pipe_control writes because the gpu doesn't properly redirect them
357	 * through the ppgtt for non_secure batchbuffers. */
358	if (unlikely(IS_GEN6(dev) &&
359	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
360	    !target_i915_obj->has_global_gtt_mapping)) {
361		struct i915_vma *vma =
362			list_first_entry(&target_i915_obj->vma_list,
363					 typeof(*vma), vma_link);
364		vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
365	}
366
367	/* Validate that the target is in a valid r/w GPU domain */
368	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
369		DRM_DEBUG("reloc with multiple write domains: "
370			  "obj %p target %d offset %d "
371			  "read %08x write %08x",
372			  obj, reloc->target_handle,
373			  (int) reloc->offset,
374			  reloc->read_domains,
375			  reloc->write_domain);
376		return -EINVAL;
377	}
378	if (unlikely((reloc->write_domain | reloc->read_domains)
379		     & ~I915_GEM_GPU_DOMAINS)) {
380		DRM_DEBUG("reloc with read/write non-GPU domains: "
381			  "obj %p target %d offset %d "
382			  "read %08x write %08x",
383			  obj, reloc->target_handle,
384			  (int) reloc->offset,
385			  reloc->read_domains,
386			  reloc->write_domain);
387		return -EINVAL;
388	}
389
390	target_obj->pending_read_domains |= reloc->read_domains;
391	target_obj->pending_write_domain |= reloc->write_domain;
392
393	/* If the relocation already has the right value in it, no
394	 * more work needs to be done.
395	 */
396	if (target_offset == reloc->presumed_offset)
397		return 0;
398
399	/* Check that the relocation address is valid... */
400	if (unlikely(reloc->offset >
401		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
402		DRM_DEBUG("Relocation beyond object bounds: "
403			  "obj %p target %d offset %d size %d.\n",
404			  obj, reloc->target_handle,
405			  (int) reloc->offset,
406			  (int) obj->base.size);
407		return -EINVAL;
408	}
409	if (unlikely(reloc->offset & 3)) {
410		DRM_DEBUG("Relocation not 4-byte aligned: "
411			  "obj %p target %d offset %d.\n",
412			  obj, reloc->target_handle,
413			  (int) reloc->offset);
414		return -EINVAL;
415	}
416
417	/* We can't wait for rendering with pagefaults disabled */
418	if (obj->active && in_atomic())
419		return -EFAULT;
420
421	if (use_cpu_reloc(obj))
422		ret = relocate_entry_cpu(obj, reloc, target_offset);
423	else
424		ret = relocate_entry_gtt(obj, reloc, target_offset);
425
426	if (ret)
427		return ret;
428
429	/* and update the user's relocation entry */
430	reloc->presumed_offset = target_offset;
431
432	return 0;
433}
434
435static int
436i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
437				 struct eb_vmas *eb)
438{
439#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
440	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
441	struct drm_i915_gem_relocation_entry __user *user_relocs;
442	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
443	int remain, ret;
444
445	user_relocs = to_user_ptr(entry->relocs_ptr);
446
447	remain = entry->relocation_count;
448	while (remain) {
449		struct drm_i915_gem_relocation_entry *r = stack_reloc;
450		int count = remain;
451		if (count > ARRAY_SIZE(stack_reloc))
452			count = ARRAY_SIZE(stack_reloc);
453		remain -= count;
454
455		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
456			return -EFAULT;
457
458		do {
459			u64 offset = r->presumed_offset;
460
461			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
462			if (ret)
463				return ret;
464
465			if (r->presumed_offset != offset &&
466			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
467						    &r->presumed_offset,
468						    sizeof(r->presumed_offset))) {
469				return -EFAULT;
470			}
471
472			user_relocs++;
473			r++;
474		} while (--count);
475	}
476
477	return 0;
478#undef N_RELOC
479}
480
481static int
482i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
483				      struct eb_vmas *eb,
484				      struct drm_i915_gem_relocation_entry *relocs)
485{
486	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
487	int i, ret;
488
489	for (i = 0; i < entry->relocation_count; i++) {
490		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
491		if (ret)
492			return ret;
493	}
494
495	return 0;
496}
497
498static int
499i915_gem_execbuffer_relocate(struct eb_vmas *eb)
500{
501	struct i915_vma *vma;
502	int ret = 0;
503
504	/* This is the fast path and we cannot handle a pagefault whilst
505	 * holding the struct mutex lest the user pass in the relocations
506	 * contained within a mmaped bo. For in such a case we, the page
507	 * fault handler would call i915_gem_fault() and we would try to
508	 * acquire the struct mutex again. Obviously this is bad and so
509	 * lockdep complains vehemently.
510	 */
511	pagefault_disable();
512	list_for_each_entry(vma, &eb->vmas, exec_list) {
513		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
514		if (ret)
515			break;
516	}
517	pagefault_enable();
518
519	return ret;
520}
521
522static int
523i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
524				struct intel_engine_cs *ring,
525				bool *need_reloc)
526{
527	struct drm_i915_gem_object *obj = vma->obj;
528	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
529	uint64_t flags;
530	int ret;
531
532	flags = 0;
533	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
534		flags |= PIN_MAPPABLE;
535	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
536		flags |= PIN_GLOBAL;
537	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
538		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
539
540	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
541	if (ret)
542		return ret;
543
544	entry->flags |= __EXEC_OBJECT_HAS_PIN;
545
546	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
547		ret = i915_gem_object_get_fence(obj);
548		if (ret)
549			return ret;
550
551		if (i915_gem_object_pin_fence(obj))
552			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
553	}
554
555	if (entry->offset != vma->node.start) {
556		entry->offset = vma->node.start;
557		*need_reloc = true;
558	}
559
560	if (entry->flags & EXEC_OBJECT_WRITE) {
561		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
562		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
563	}
564
565	return 0;
566}
567
568static bool
569need_reloc_mappable(struct i915_vma *vma)
570{
571	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
572
573	if (entry->relocation_count == 0)
574		return false;
575
576	if (!i915_is_ggtt(vma->vm))
577		return false;
578
579	/* See also use_cpu_reloc() */
580	if (HAS_LLC(vma->obj->base.dev))
581		return false;
582
583	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
584		return false;
585
586	return true;
587}
588
589static bool
590eb_vma_misplaced(struct i915_vma *vma)
591{
592	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
593	struct drm_i915_gem_object *obj = vma->obj;
594
595	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
596	       !i915_is_ggtt(vma->vm));
597
598	if (entry->alignment &&
599	    vma->node.start & (entry->alignment - 1))
600		return true;
601
602	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
603		return true;
604
605	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
606	    vma->node.start < BATCH_OFFSET_BIAS)
607		return true;
608
609	return false;
610}
611
612static int
613i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
614			    struct list_head *vmas,
615			    bool *need_relocs)
616{
617	struct drm_i915_gem_object *obj;
618	struct i915_vma *vma;
619	struct i915_address_space *vm;
620	struct list_head ordered_vmas;
621	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
622	int retry;
623
624	i915_gem_retire_requests_ring(ring);
625
626	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
627
628	INIT_LIST_HEAD(&ordered_vmas);
629	while (!list_empty(vmas)) {
630		struct drm_i915_gem_exec_object2 *entry;
631		bool need_fence, need_mappable;
632
633		vma = list_first_entry(vmas, struct i915_vma, exec_list);
634		obj = vma->obj;
635		entry = vma->exec_entry;
636
637		if (!has_fenced_gpu_access)
638			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
639		need_fence =
640			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
641			obj->tiling_mode != I915_TILING_NONE;
642		need_mappable = need_fence || need_reloc_mappable(vma);
643
644		if (need_mappable) {
645			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
646			list_move(&vma->exec_list, &ordered_vmas);
647		} else
648			list_move_tail(&vma->exec_list, &ordered_vmas);
649
650		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
651		obj->base.pending_write_domain = 0;
652	}
653	list_splice(&ordered_vmas, vmas);
654
655	/* Attempt to pin all of the buffers into the GTT.
656	 * This is done in 3 phases:
657	 *
658	 * 1a. Unbind all objects that do not match the GTT constraints for
659	 *     the execbuffer (fenceable, mappable, alignment etc).
660	 * 1b. Increment pin count for already bound objects.
661	 * 2.  Bind new objects.
662	 * 3.  Decrement pin count.
663	 *
664	 * This avoid unnecessary unbinding of later objects in order to make
665	 * room for the earlier objects *unless* we need to defragment.
666	 */
667	retry = 0;
668	do {
669		int ret = 0;
670
671		/* Unbind any ill-fitting objects or pin. */
672		list_for_each_entry(vma, vmas, exec_list) {
673			if (!drm_mm_node_allocated(&vma->node))
674				continue;
675
676			if (eb_vma_misplaced(vma))
677				ret = i915_vma_unbind(vma);
678			else
679				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
680			if (ret)
681				goto err;
682		}
683
684		/* Bind fresh objects */
685		list_for_each_entry(vma, vmas, exec_list) {
686			if (drm_mm_node_allocated(&vma->node))
687				continue;
688
689			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
690			if (ret)
691				goto err;
692		}
693
694err:
695		if (ret != -ENOSPC || retry++)
696			return ret;
697
698		/* Decrement pin count for bound objects */
699		list_for_each_entry(vma, vmas, exec_list)
700			i915_gem_execbuffer_unreserve_vma(vma);
701
702		ret = i915_gem_evict_vm(vm, true);
703		if (ret)
704			return ret;
705	} while (1);
706}
707
708static int
709i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
710				  struct drm_i915_gem_execbuffer2 *args,
711				  struct drm_file *file,
712				  struct intel_engine_cs *ring,
713				  struct eb_vmas *eb,
714				  struct drm_i915_gem_exec_object2 *exec)
715{
716	struct drm_i915_gem_relocation_entry *reloc;
717	struct i915_address_space *vm;
718	struct i915_vma *vma;
719	bool need_relocs;
720	int *reloc_offset;
721	int i, total, ret;
722	unsigned count = args->buffer_count;
723
724	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
725
726	/* We may process another execbuffer during the unlock... */
727	while (!list_empty(&eb->vmas)) {
728		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
729		list_del_init(&vma->exec_list);
730		i915_gem_execbuffer_unreserve_vma(vma);
731		drm_gem_object_unreference(&vma->obj->base);
732	}
733
734	mutex_unlock(&dev->struct_mutex);
735
736	total = 0;
737	for (i = 0; i < count; i++)
738		total += exec[i].relocation_count;
739
740	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
741	reloc = drm_malloc_ab(total, sizeof(*reloc));
742	if (reloc == NULL || reloc_offset == NULL) {
743		drm_free_large(reloc);
744		drm_free_large(reloc_offset);
745		mutex_lock(&dev->struct_mutex);
746		return -ENOMEM;
747	}
748
749	total = 0;
750	for (i = 0; i < count; i++) {
751		struct drm_i915_gem_relocation_entry __user *user_relocs;
752		u64 invalid_offset = (u64)-1;
753		int j;
754
755		user_relocs = to_user_ptr(exec[i].relocs_ptr);
756
757		if (copy_from_user(reloc+total, user_relocs,
758				   exec[i].relocation_count * sizeof(*reloc))) {
759			ret = -EFAULT;
760			mutex_lock(&dev->struct_mutex);
761			goto err;
762		}
763
764		/* As we do not update the known relocation offsets after
765		 * relocating (due to the complexities in lock handling),
766		 * we need to mark them as invalid now so that we force the
767		 * relocation processing next time. Just in case the target
768		 * object is evicted and then rebound into its old
769		 * presumed_offset before the next execbuffer - if that
770		 * happened we would make the mistake of assuming that the
771		 * relocations were valid.
772		 */
773		for (j = 0; j < exec[i].relocation_count; j++) {
774			if (__copy_to_user(&user_relocs[j].presumed_offset,
775					   &invalid_offset,
776					   sizeof(invalid_offset))) {
777				ret = -EFAULT;
778				mutex_lock(&dev->struct_mutex);
779				goto err;
780			}
781		}
782
783		reloc_offset[i] = total;
784		total += exec[i].relocation_count;
785	}
786
787	ret = i915_mutex_lock_interruptible(dev);
788	if (ret) {
789		mutex_lock(&dev->struct_mutex);
790		goto err;
791	}
792
793	/* reacquire the objects */
794	eb_reset(eb);
795	ret = eb_lookup_vmas(eb, exec, args, vm, file);
796	if (ret)
797		goto err;
798
799	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
800	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
801	if (ret)
802		goto err;
803
804	list_for_each_entry(vma, &eb->vmas, exec_list) {
805		int offset = vma->exec_entry - exec;
806		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
807							    reloc + reloc_offset[offset]);
808		if (ret)
809			goto err;
810	}
811
812	/* Leave the user relocations as are, this is the painfully slow path,
813	 * and we want to avoid the complication of dropping the lock whilst
814	 * having buffers reserved in the aperture and so causing spurious
815	 * ENOSPC for random operations.
816	 */
817
818err:
819	drm_free_large(reloc);
820	drm_free_large(reloc_offset);
821	return ret;
822}
823
824static int
825i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
826				struct list_head *vmas)
827{
828	struct i915_vma *vma;
829	uint32_t flush_domains = 0;
830	bool flush_chipset = false;
831	int ret;
832
833	list_for_each_entry(vma, vmas, exec_list) {
834		struct drm_i915_gem_object *obj = vma->obj;
835		ret = i915_gem_object_sync(obj, ring);
836		if (ret)
837			return ret;
838
839		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
840			flush_chipset |= i915_gem_clflush_object(obj, false);
841
842		flush_domains |= obj->base.write_domain;
843	}
844
845	if (flush_chipset)
846		i915_gem_chipset_flush(ring->dev);
847
848	if (flush_domains & I915_GEM_DOMAIN_GTT)
849		wmb();
850
851	/* Unconditionally invalidate gpu caches and ensure that we do flush
852	 * any residual writes from the previous batch.
853	 */
854	return intel_ring_invalidate_all_caches(ring);
855}
856
857static bool
858i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
859{
860	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
861		return false;
862
863	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
864}
865
866static int
867validate_exec_list(struct drm_device *dev,
868		   struct drm_i915_gem_exec_object2 *exec,
869		   int count)
870{
871	unsigned relocs_total = 0;
872	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
873	unsigned invalid_flags;
874	int i;
875
876	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
877	if (USES_FULL_PPGTT(dev))
878		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
879
880	for (i = 0; i < count; i++) {
881		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
882		int length; /* limited by fault_in_pages_readable() */
883
884		if (exec[i].flags & invalid_flags)
885			return -EINVAL;
886
887		/* First check for malicious input causing overflow in
888		 * the worst case where we need to allocate the entire
889		 * relocation tree as a single array.
890		 */
891		if (exec[i].relocation_count > relocs_max - relocs_total)
892			return -EINVAL;
893		relocs_total += exec[i].relocation_count;
894
895		length = exec[i].relocation_count *
896			sizeof(struct drm_i915_gem_relocation_entry);
897		/*
898		 * We must check that the entire relocation array is safe
899		 * to read, but since we may need to update the presumed
900		 * offsets during execution, check for full write access.
901		 */
902		if (!access_ok(VERIFY_WRITE, ptr, length))
903			return -EFAULT;
904
905		if (likely(!i915.prefault_disable)) {
906			if (fault_in_multipages_readable(ptr, length))
907				return -EFAULT;
908		}
909	}
910
911	return 0;
912}
913
914static struct intel_context *
915i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
916			  struct intel_engine_cs *ring, const u32 ctx_id)
917{
918	struct intel_context *ctx = NULL;
919	struct i915_ctx_hang_stats *hs;
920
921	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
922		return ERR_PTR(-EINVAL);
923
924	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
925	if (IS_ERR(ctx))
926		return ctx;
927
928	hs = &ctx->hang_stats;
929	if (hs->banned) {
930		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
931		return ERR_PTR(-EIO);
932	}
933
934	if (i915.enable_execlists && !ctx->engine[ring->id].state) {
935		int ret = intel_lr_context_deferred_create(ctx, ring);
936		if (ret) {
937			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
938			return ERR_PTR(ret);
939		}
940	}
941
942	return ctx;
943}
944
945void
946i915_gem_execbuffer_move_to_active(struct list_head *vmas,
947				   struct intel_engine_cs *ring)
948{
949	u32 seqno = intel_ring_get_seqno(ring);
950	struct i915_vma *vma;
951
952	list_for_each_entry(vma, vmas, exec_list) {
953		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
954		struct drm_i915_gem_object *obj = vma->obj;
955		u32 old_read = obj->base.read_domains;
956		u32 old_write = obj->base.write_domain;
957
958		obj->base.write_domain = obj->base.pending_write_domain;
959		if (obj->base.write_domain == 0)
960			obj->base.pending_read_domains |= obj->base.read_domains;
961		obj->base.read_domains = obj->base.pending_read_domains;
962
963		i915_vma_move_to_active(vma, ring);
964		if (obj->base.write_domain) {
965			obj->dirty = 1;
966			obj->last_write_seqno = seqno;
967
968			intel_fb_obj_invalidate(obj, ring);
969
970			/* update for the implicit flush after a batch */
971			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
972		}
973		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
974			obj->last_fenced_seqno = seqno;
975			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
976				struct drm_i915_private *dev_priv = to_i915(ring->dev);
977				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
978					       &dev_priv->mm.fence_list);
979			}
980		}
981
982		trace_i915_gem_object_change_domain(obj, old_read, old_write);
983	}
984}
985
986void
987i915_gem_execbuffer_retire_commands(struct drm_device *dev,
988				    struct drm_file *file,
989				    struct intel_engine_cs *ring,
990				    struct drm_i915_gem_object *obj)
991{
992	/* Unconditionally force add_request to emit a full flush. */
993	ring->gpu_caches_dirty = true;
994
995	/* Add a breadcrumb for the completion of the batch buffer */
996	(void)__i915_add_request(ring, file, obj, NULL);
997}
998
999static int
1000i915_reset_gen7_sol_offsets(struct drm_device *dev,
1001			    struct intel_engine_cs *ring)
1002{
1003	struct drm_i915_private *dev_priv = dev->dev_private;
1004	int ret, i;
1005
1006	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1007		DRM_DEBUG("sol reset is gen7/rcs only\n");
1008		return -EINVAL;
1009	}
1010
1011	ret = intel_ring_begin(ring, 4 * 3);
1012	if (ret)
1013		return ret;
1014
1015	for (i = 0; i < 4; i++) {
1016		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1017		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1018		intel_ring_emit(ring, 0);
1019	}
1020
1021	intel_ring_advance(ring);
1022
1023	return 0;
1024}
1025
1026int
1027i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
1028			       struct intel_engine_cs *ring,
1029			       struct intel_context *ctx,
1030			       struct drm_i915_gem_execbuffer2 *args,
1031			       struct list_head *vmas,
1032			       struct drm_i915_gem_object *batch_obj,
1033			       u64 exec_start, u32 flags)
1034{
1035	struct drm_clip_rect *cliprects = NULL;
1036	struct drm_i915_private *dev_priv = dev->dev_private;
1037	u64 exec_len;
1038	int instp_mode;
1039	u32 instp_mask;
1040	int i, ret = 0;
1041
1042	if (args->num_cliprects != 0) {
1043		if (ring != &dev_priv->ring[RCS]) {
1044			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1045			return -EINVAL;
1046		}
1047
1048		if (INTEL_INFO(dev)->gen >= 5) {
1049			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1050			return -EINVAL;
1051		}
1052
1053		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1054			DRM_DEBUG("execbuf with %u cliprects\n",
1055				  args->num_cliprects);
1056			return -EINVAL;
1057		}
1058
1059		cliprects = kcalloc(args->num_cliprects,
1060				    sizeof(*cliprects),
1061				    GFP_KERNEL);
1062		if (cliprects == NULL) {
1063			ret = -ENOMEM;
1064			goto error;
1065		}
1066
1067		if (copy_from_user(cliprects,
1068				   to_user_ptr(args->cliprects_ptr),
1069				   sizeof(*cliprects)*args->num_cliprects)) {
1070			ret = -EFAULT;
1071			goto error;
1072		}
1073	} else {
1074		if (args->DR4 == 0xffffffff) {
1075			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1076			args->DR4 = 0;
1077		}
1078
1079		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1080			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1081			return -EINVAL;
1082		}
1083	}
1084
1085	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
1086	if (ret)
1087		goto error;
1088
1089	ret = i915_switch_context(ring, ctx);
1090	if (ret)
1091		goto error;
1092
1093	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1094	instp_mask = I915_EXEC_CONSTANTS_MASK;
1095	switch (instp_mode) {
1096	case I915_EXEC_CONSTANTS_REL_GENERAL:
1097	case I915_EXEC_CONSTANTS_ABSOLUTE:
1098	case I915_EXEC_CONSTANTS_REL_SURFACE:
1099		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1100			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1101			ret = -EINVAL;
1102			goto error;
1103		}
1104
1105		if (instp_mode != dev_priv->relative_constants_mode) {
1106			if (INTEL_INFO(dev)->gen < 4) {
1107				DRM_DEBUG("no rel constants on pre-gen4\n");
1108				ret = -EINVAL;
1109				goto error;
1110			}
1111
1112			if (INTEL_INFO(dev)->gen > 5 &&
1113			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1114				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1115				ret = -EINVAL;
1116				goto error;
1117			}
1118
1119			/* The HW changed the meaning on this bit on gen6 */
1120			if (INTEL_INFO(dev)->gen >= 6)
1121				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1122		}
1123		break;
1124	default:
1125		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1126		ret = -EINVAL;
1127		goto error;
1128	}
1129
1130	if (ring == &dev_priv->ring[RCS] &&
1131			instp_mode != dev_priv->relative_constants_mode) {
1132		ret = intel_ring_begin(ring, 4);
1133		if (ret)
1134			goto error;
1135
1136		intel_ring_emit(ring, MI_NOOP);
1137		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1138		intel_ring_emit(ring, INSTPM);
1139		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1140		intel_ring_advance(ring);
1141
1142		dev_priv->relative_constants_mode = instp_mode;
1143	}
1144
1145	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1146		ret = i915_reset_gen7_sol_offsets(dev, ring);
1147		if (ret)
1148			goto error;
1149	}
1150
1151	exec_len = args->batch_len;
1152	if (cliprects) {
1153		for (i = 0; i < args->num_cliprects; i++) {
1154			ret = i915_emit_box(dev, &cliprects[i],
1155					    args->DR1, args->DR4);
1156			if (ret)
1157				goto error;
1158
1159			ret = ring->dispatch_execbuffer(ring,
1160							exec_start, exec_len,
1161							flags);
1162			if (ret)
1163				goto error;
1164		}
1165	} else {
1166		ret = ring->dispatch_execbuffer(ring,
1167						exec_start, exec_len,
1168						flags);
1169		if (ret)
1170			return ret;
1171	}
1172
1173	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1174
1175	i915_gem_execbuffer_move_to_active(vmas, ring);
1176	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1177
1178error:
1179	kfree(cliprects);
1180	return ret;
1181}
1182
1183/**
1184 * Find one BSD ring to dispatch the corresponding BSD command.
1185 * The Ring ID is returned.
1186 */
1187static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1188				  struct drm_file *file)
1189{
1190	struct drm_i915_private *dev_priv = dev->dev_private;
1191	struct drm_i915_file_private *file_priv = file->driver_priv;
1192
1193	/* Check whether the file_priv is using one ring */
1194	if (file_priv->bsd_ring)
1195		return file_priv->bsd_ring->id;
1196	else {
1197		/* If no, use the ping-pong mechanism to select one ring */
1198		int ring_id;
1199
1200		mutex_lock(&dev->struct_mutex);
1201		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1202			ring_id = VCS;
1203			dev_priv->mm.bsd_ring_dispatch_index = 1;
1204		} else {
1205			ring_id = VCS2;
1206			dev_priv->mm.bsd_ring_dispatch_index = 0;
1207		}
1208		file_priv->bsd_ring = &dev_priv->ring[ring_id];
1209		mutex_unlock(&dev->struct_mutex);
1210		return ring_id;
1211	}
1212}
1213
1214static struct drm_i915_gem_object *
1215eb_get_batch(struct eb_vmas *eb)
1216{
1217	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1218
1219	/*
1220	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1221	 * to negative relocation deltas. Usually that works out ok since the
1222	 * relocate address is still positive, except when the batch is placed
1223	 * very low in the GTT. Ensure this doesn't happen.
1224	 *
1225	 * Note that actual hangs have only been observed on gen7, but for
1226	 * paranoia do it everywhere.
1227	 */
1228	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1229
1230	return vma->obj;
1231}
1232
1233static int
1234i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1235		       struct drm_file *file,
1236		       struct drm_i915_gem_execbuffer2 *args,
1237		       struct drm_i915_gem_exec_object2 *exec)
1238{
1239	struct drm_i915_private *dev_priv = dev->dev_private;
1240	struct eb_vmas *eb;
1241	struct drm_i915_gem_object *batch_obj;
1242	struct intel_engine_cs *ring;
1243	struct intel_context *ctx;
1244	struct i915_address_space *vm;
1245	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1246	u64 exec_start = args->batch_start_offset;
1247	u32 flags;
1248	int ret;
1249	bool need_relocs;
1250
1251	if (!i915_gem_check_execbuffer(args))
1252		return -EINVAL;
1253
1254	ret = validate_exec_list(dev, exec, args->buffer_count);
1255	if (ret)
1256		return ret;
1257
1258	flags = 0;
1259	if (args->flags & I915_EXEC_SECURE) {
1260		if (!file->is_master || !capable(CAP_SYS_ADMIN))
1261		    return -EPERM;
1262
1263		flags |= I915_DISPATCH_SECURE;
1264	}
1265	if (args->flags & I915_EXEC_IS_PINNED)
1266		flags |= I915_DISPATCH_PINNED;
1267
1268	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
1269		DRM_DEBUG("execbuf with unknown ring: %d\n",
1270			  (int)(args->flags & I915_EXEC_RING_MASK));
1271		return -EINVAL;
1272	}
1273
1274	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1275		ring = &dev_priv->ring[RCS];
1276	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1277		if (HAS_BSD2(dev)) {
1278			int ring_id;
1279			ring_id = gen8_dispatch_bsd_ring(dev, file);
1280			ring = &dev_priv->ring[ring_id];
1281		} else
1282			ring = &dev_priv->ring[VCS];
1283	} else
1284		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1285
1286	if (!intel_ring_initialized(ring)) {
1287		DRM_DEBUG("execbuf with invalid ring: %d\n",
1288			  (int)(args->flags & I915_EXEC_RING_MASK));
1289		return -EINVAL;
1290	}
1291
1292	if (args->buffer_count < 1) {
1293		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1294		return -EINVAL;
1295	}
1296
1297	intel_runtime_pm_get(dev_priv);
1298
1299	ret = i915_mutex_lock_interruptible(dev);
1300	if (ret)
1301		goto pre_mutex_err;
1302
1303	if (dev_priv->ums.mm_suspended) {
1304		mutex_unlock(&dev->struct_mutex);
1305		ret = -EBUSY;
1306		goto pre_mutex_err;
1307	}
1308
1309	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1310	if (IS_ERR(ctx)) {
1311		mutex_unlock(&dev->struct_mutex);
1312		ret = PTR_ERR(ctx);
1313		goto pre_mutex_err;
1314	}
1315
1316	i915_gem_context_reference(ctx);
1317
1318	if (ctx->ppgtt)
1319		vm = &ctx->ppgtt->base;
1320	else
1321		vm = &dev_priv->gtt.base;
1322
1323	eb = eb_create(args);
1324	if (eb == NULL) {
1325		i915_gem_context_unreference(ctx);
1326		mutex_unlock(&dev->struct_mutex);
1327		ret = -ENOMEM;
1328		goto pre_mutex_err;
1329	}
1330
1331	/* Look up object handles */
1332	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1333	if (ret)
1334		goto err;
1335
1336	/* take note of the batch buffer before we might reorder the lists */
1337	batch_obj = eb_get_batch(eb);
1338
1339	/* Move the objects en-masse into the GTT, evicting if necessary. */
1340	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1341	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1342	if (ret)
1343		goto err;
1344
1345	/* The objects are in their final locations, apply the relocations. */
1346	if (need_relocs)
1347		ret = i915_gem_execbuffer_relocate(eb);
1348	if (ret) {
1349		if (ret == -EFAULT) {
1350			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1351								eb, exec);
1352			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1353		}
1354		if (ret)
1355			goto err;
1356	}
1357
1358	/* Set the pending read domains for the batch buffer to COMMAND */
1359	if (batch_obj->base.pending_write_domain) {
1360		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1361		ret = -EINVAL;
1362		goto err;
1363	}
1364	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1365
1366	if (i915_needs_cmd_parser(ring)) {
1367		ret = i915_parse_cmds(ring,
1368				      batch_obj,
1369				      args->batch_start_offset,
1370				      file->is_master);
1371		if (ret)
1372			goto err;
1373
1374		/*
1375		 * XXX: Actually do this when enabling batch copy...
1376		 *
1377		 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1378		 * from MI_BATCH_BUFFER_START commands issued in the
1379		 * dispatch_execbuffer implementations. We specifically don't
1380		 * want that set when the command parser is enabled.
1381		 */
1382	}
1383
1384	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1385	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1386	 * hsw should have this fixed, but bdw mucks it up again. */
1387	if (flags & I915_DISPATCH_SECURE) {
1388		/*
1389		 * So on first glance it looks freaky that we pin the batch here
1390		 * outside of the reservation loop. But:
1391		 * - The batch is already pinned into the relevant ppgtt, so we
1392		 *   already have the backing storage fully allocated.
1393		 * - No other BO uses the global gtt (well contexts, but meh),
1394		 *   so we don't really have issues with mutliple objects not
1395		 *   fitting due to fragmentation.
1396		 * So this is actually safe.
1397		 */
1398		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1399		if (ret)
1400			goto err;
1401
1402		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1403	} else
1404		exec_start += i915_gem_obj_offset(batch_obj, vm);
1405
1406	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
1407				      &eb->vmas, batch_obj, exec_start, flags);
1408
1409	/*
1410	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1411	 * batch vma for correctness. For less ugly and less fragility this
1412	 * needs to be adjusted to also track the ggtt batch vma properly as
1413	 * active.
1414	 */
1415	if (flags & I915_DISPATCH_SECURE)
1416		i915_gem_object_ggtt_unpin(batch_obj);
1417err:
1418	/* the request owns the ref now */
1419	i915_gem_context_unreference(ctx);
1420	eb_destroy(eb);
1421
1422	mutex_unlock(&dev->struct_mutex);
1423
1424pre_mutex_err:
1425	/* intel_gpu_busy should also get a ref, so it will free when the device
1426	 * is really idle. */
1427	intel_runtime_pm_put(dev_priv);
1428	return ret;
1429}
1430
1431/*
1432 * Legacy execbuffer just creates an exec2 list from the original exec object
1433 * list array and passes it to the real function.
1434 */
1435int
1436i915_gem_execbuffer(struct drm_device *dev, void *data,
1437		    struct drm_file *file)
1438{
1439	struct drm_i915_gem_execbuffer *args = data;
1440	struct drm_i915_gem_execbuffer2 exec2;
1441	struct drm_i915_gem_exec_object *exec_list = NULL;
1442	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1443	int ret, i;
1444
1445	if (args->buffer_count < 1) {
1446		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1447		return -EINVAL;
1448	}
1449
1450	/* Copy in the exec list from userland */
1451	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1452	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1453	if (exec_list == NULL || exec2_list == NULL) {
1454		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1455			  args->buffer_count);
1456		drm_free_large(exec_list);
1457		drm_free_large(exec2_list);
1458		return -ENOMEM;
1459	}
1460	ret = copy_from_user(exec_list,
1461			     to_user_ptr(args->buffers_ptr),
1462			     sizeof(*exec_list) * args->buffer_count);
1463	if (ret != 0) {
1464		DRM_DEBUG("copy %d exec entries failed %d\n",
1465			  args->buffer_count, ret);
1466		drm_free_large(exec_list);
1467		drm_free_large(exec2_list);
1468		return -EFAULT;
1469	}
1470
1471	for (i = 0; i < args->buffer_count; i++) {
1472		exec2_list[i].handle = exec_list[i].handle;
1473		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1474		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1475		exec2_list[i].alignment = exec_list[i].alignment;
1476		exec2_list[i].offset = exec_list[i].offset;
1477		if (INTEL_INFO(dev)->gen < 4)
1478			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1479		else
1480			exec2_list[i].flags = 0;
1481	}
1482
1483	exec2.buffers_ptr = args->buffers_ptr;
1484	exec2.buffer_count = args->buffer_count;
1485	exec2.batch_start_offset = args->batch_start_offset;
1486	exec2.batch_len = args->batch_len;
1487	exec2.DR1 = args->DR1;
1488	exec2.DR4 = args->DR4;
1489	exec2.num_cliprects = args->num_cliprects;
1490	exec2.cliprects_ptr = args->cliprects_ptr;
1491	exec2.flags = I915_EXEC_RENDER;
1492	i915_execbuffer2_set_context_id(exec2, 0);
1493
1494	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1495	if (!ret) {
1496		struct drm_i915_gem_exec_object __user *user_exec_list =
1497			to_user_ptr(args->buffers_ptr);
1498
1499		/* Copy the new buffer offsets back to the user's exec list. */
1500		for (i = 0; i < args->buffer_count; i++) {
1501			ret = __copy_to_user(&user_exec_list[i].offset,
1502					     &exec2_list[i].offset,
1503					     sizeof(user_exec_list[i].offset));
1504			if (ret) {
1505				ret = -EFAULT;
1506				DRM_DEBUG("failed to copy %d exec entries "
1507					  "back to user (%d)\n",
1508					  args->buffer_count, ret);
1509				break;
1510			}
1511		}
1512	}
1513
1514	drm_free_large(exec_list);
1515	drm_free_large(exec2_list);
1516	return ret;
1517}
1518
1519int
1520i915_gem_execbuffer2(struct drm_device *dev, void *data,
1521		     struct drm_file *file)
1522{
1523	struct drm_i915_gem_execbuffer2 *args = data;
1524	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1525	int ret;
1526
1527	if (args->buffer_count < 1 ||
1528	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1529		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1530		return -EINVAL;
1531	}
1532
1533	if (args->rsvd2 != 0) {
1534		DRM_DEBUG("dirty rvsd2 field\n");
1535		return -EINVAL;
1536	}
1537
1538	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1539			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1540	if (exec2_list == NULL)
1541		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1542					   args->buffer_count);
1543	if (exec2_list == NULL) {
1544		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1545			  args->buffer_count);
1546		return -ENOMEM;
1547	}
1548	ret = copy_from_user(exec2_list,
1549			     to_user_ptr(args->buffers_ptr),
1550			     sizeof(*exec2_list) * args->buffer_count);
1551	if (ret != 0) {
1552		DRM_DEBUG("copy %d exec entries failed %d\n",
1553			  args->buffer_count, ret);
1554		drm_free_large(exec2_list);
1555		return -EFAULT;
1556	}
1557
1558	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1559	if (!ret) {
1560		/* Copy the new buffer offsets back to the user's exec list. */
1561		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1562				   to_user_ptr(args->buffers_ptr);
1563		int i;
1564
1565		for (i = 0; i < args->buffer_count; i++) {
1566			ret = __copy_to_user(&user_exec_list[i].offset,
1567					     &exec2_list[i].offset,
1568					     sizeof(user_exec_list[i].offset));
1569			if (ret) {
1570				ret = -EFAULT;
1571				DRM_DEBUG("failed to copy %d exec entries "
1572					  "back to user\n",
1573					  args->buffer_count);
1574				break;
1575			}
1576		}
1577	}
1578
1579	drm_free_large(exec2_list);
1580	return ret;
1581}
1582