1/*
2 *
3 * Copyright (c) 2009, Microsoft Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Authors:
19 *   Haiyang Zhang <haiyangz@microsoft.com>
20 *   Hank Janssen  <hjanssen@microsoft.com>
21 *   K. Y. Srinivasan <kys@microsoft.com>
22 *
23 */
24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25
26#include <linux/kernel.h>
27#include <linux/mm.h>
28#include <linux/hyperv.h>
29#include <linux/uio.h>
30
31#include "hyperv_vmbus.h"
32
33void hv_begin_read(struct hv_ring_buffer_info *rbi)
34{
35	rbi->ring_buffer->interrupt_mask = 1;
36	mb();
37}
38
39u32 hv_end_read(struct hv_ring_buffer_info *rbi)
40{
41	u32 read;
42	u32 write;
43
44	rbi->ring_buffer->interrupt_mask = 0;
45	mb();
46
47	/*
48	 * Now check to see if the ring buffer is still empty.
49	 * If it is not, we raced and we need to process new
50	 * incoming messages.
51	 */
52	hv_get_ringbuffer_availbytes(rbi, &read, &write);
53
54	return read;
55}
56
57/*
58 * When we write to the ring buffer, check if the host needs to
59 * be signaled. Here is the details of this protocol:
60 *
61 *	1. The host guarantees that while it is draining the
62 *	   ring buffer, it will set the interrupt_mask to
63 *	   indicate it does not need to be interrupted when
64 *	   new data is placed.
65 *
66 *	2. The host guarantees that it will completely drain
67 *	   the ring buffer before exiting the read loop. Further,
68 *	   once the ring buffer is empty, it will clear the
69 *	   interrupt_mask and re-check to see if new data has
70 *	   arrived.
71 */
72
73static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
74{
75	mb();
76	if (rbi->ring_buffer->interrupt_mask)
77		return false;
78
79	/* check interrupt_mask before read_index */
80	rmb();
81	/*
82	 * This is the only case we need to signal when the
83	 * ring transitions from being empty to non-empty.
84	 */
85	if (old_write == rbi->ring_buffer->read_index)
86		return true;
87
88	return false;
89}
90
91/*
92 * To optimize the flow management on the send-side,
93 * when the sender is blocked because of lack of
94 * sufficient space in the ring buffer, potential the
95 * consumer of the ring buffer can signal the producer.
96 * This is controlled by the following parameters:
97 *
98 * 1. pending_send_sz: This is the size in bytes that the
99 *    producer is trying to send.
100 * 2. The feature bit feat_pending_send_sz set to indicate if
101 *    the consumer of the ring will signal when the ring
102 *    state transitions from being full to a state where
103 *    there is room for the producer to send the pending packet.
104 */
105
106static bool hv_need_to_signal_on_read(u32 old_rd,
107					 struct hv_ring_buffer_info *rbi)
108{
109	u32 prev_write_sz;
110	u32 cur_write_sz;
111	u32 r_size;
112	u32 write_loc = rbi->ring_buffer->write_index;
113	u32 read_loc = rbi->ring_buffer->read_index;
114	u32 pending_sz = rbi->ring_buffer->pending_send_sz;
115
116	/*
117	 * If the other end is not blocked on write don't bother.
118	 */
119	if (pending_sz == 0)
120		return false;
121
122	r_size = rbi->ring_datasize;
123	cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) :
124			read_loc - write_loc;
125
126	prev_write_sz = write_loc >= old_rd ? r_size - (write_loc - old_rd) :
127			old_rd - write_loc;
128
129
130	if ((prev_write_sz < pending_sz) && (cur_write_sz >= pending_sz))
131		return true;
132
133	return false;
134}
135
136/*
137 * hv_get_next_write_location()
138 *
139 * Get the next write location for the specified ring buffer
140 *
141 */
142static inline u32
143hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
144{
145	u32 next = ring_info->ring_buffer->write_index;
146
147	return next;
148}
149
150/*
151 * hv_set_next_write_location()
152 *
153 * Set the next write location for the specified ring buffer
154 *
155 */
156static inline void
157hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
158		     u32 next_write_location)
159{
160	ring_info->ring_buffer->write_index = next_write_location;
161}
162
163/*
164 * hv_get_next_read_location()
165 *
166 * Get the next read location for the specified ring buffer
167 */
168static inline u32
169hv_get_next_read_location(struct hv_ring_buffer_info *ring_info)
170{
171	u32 next = ring_info->ring_buffer->read_index;
172
173	return next;
174}
175
176/*
177 * hv_get_next_readlocation_withoffset()
178 *
179 * Get the next read location + offset for the specified ring buffer.
180 * This allows the caller to skip
181 */
182static inline u32
183hv_get_next_readlocation_withoffset(struct hv_ring_buffer_info *ring_info,
184				 u32 offset)
185{
186	u32 next = ring_info->ring_buffer->read_index;
187
188	next += offset;
189	next %= ring_info->ring_datasize;
190
191	return next;
192}
193
194/*
195 *
196 * hv_set_next_read_location()
197 *
198 * Set the next read location for the specified ring buffer
199 *
200 */
201static inline void
202hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
203		    u32 next_read_location)
204{
205	ring_info->ring_buffer->read_index = next_read_location;
206}
207
208
209/*
210 *
211 * hv_get_ring_buffer()
212 *
213 * Get the start of the ring buffer
214 */
215static inline void *
216hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
217{
218	return (void *)ring_info->ring_buffer->buffer;
219}
220
221
222/*
223 *
224 * hv_get_ring_buffersize()
225 *
226 * Get the size of the ring buffer
227 */
228static inline u32
229hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info)
230{
231	return ring_info->ring_datasize;
232}
233
234/*
235 *
236 * hv_get_ring_bufferindices()
237 *
238 * Get the read and write indices as u64 of the specified ring buffer
239 *
240 */
241static inline u64
242hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
243{
244	return (u64)ring_info->ring_buffer->write_index << 32;
245}
246
247/*
248 *
249 * hv_copyfrom_ringbuffer()
250 *
251 * Helper routine to copy to source from ring buffer.
252 * Assume there is enough room. Handles wrap-around in src case only!!
253 *
254 */
255static u32 hv_copyfrom_ringbuffer(
256	struct hv_ring_buffer_info	*ring_info,
257	void				*dest,
258	u32				destlen,
259	u32				start_read_offset)
260{
261	void *ring_buffer = hv_get_ring_buffer(ring_info);
262	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
263
264	u32 frag_len;
265
266	/* wrap-around detected at the src */
267	if (destlen > ring_buffer_size - start_read_offset) {
268		frag_len = ring_buffer_size - start_read_offset;
269
270		memcpy(dest, ring_buffer + start_read_offset, frag_len);
271		memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
272	} else
273
274		memcpy(dest, ring_buffer + start_read_offset, destlen);
275
276
277	start_read_offset += destlen;
278	start_read_offset %= ring_buffer_size;
279
280	return start_read_offset;
281}
282
283
284/*
285 *
286 * hv_copyto_ringbuffer()
287 *
288 * Helper routine to copy from source to ring buffer.
289 * Assume there is enough room. Handles wrap-around in dest case only!!
290 *
291 */
292static u32 hv_copyto_ringbuffer(
293	struct hv_ring_buffer_info	*ring_info,
294	u32				start_write_offset,
295	void				*src,
296	u32				srclen)
297{
298	void *ring_buffer = hv_get_ring_buffer(ring_info);
299	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
300	u32 frag_len;
301
302	/* wrap-around detected! */
303	if (srclen > ring_buffer_size - start_write_offset) {
304		frag_len = ring_buffer_size - start_write_offset;
305		memcpy(ring_buffer + start_write_offset, src, frag_len);
306		memcpy(ring_buffer, src + frag_len, srclen - frag_len);
307	} else
308		memcpy(ring_buffer + start_write_offset, src, srclen);
309
310	start_write_offset += srclen;
311	start_write_offset %= ring_buffer_size;
312
313	return start_write_offset;
314}
315
316/*
317 *
318 * hv_ringbuffer_get_debuginfo()
319 *
320 * Get various debug metrics for the specified ring buffer
321 *
322 */
323void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
324			    struct hv_ring_buffer_debug_info *debug_info)
325{
326	u32 bytes_avail_towrite;
327	u32 bytes_avail_toread;
328
329	if (ring_info->ring_buffer) {
330		hv_get_ringbuffer_availbytes(ring_info,
331					&bytes_avail_toread,
332					&bytes_avail_towrite);
333
334		debug_info->bytes_avail_toread = bytes_avail_toread;
335		debug_info->bytes_avail_towrite = bytes_avail_towrite;
336		debug_info->current_read_index =
337			ring_info->ring_buffer->read_index;
338		debug_info->current_write_index =
339			ring_info->ring_buffer->write_index;
340		debug_info->current_interrupt_mask =
341			ring_info->ring_buffer->interrupt_mask;
342	}
343}
344
345/*
346 *
347 * hv_ringbuffer_init()
348 *
349 *Initialize the ring buffer
350 *
351 */
352int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
353		   void *buffer, u32 buflen)
354{
355	if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
356		return -EINVAL;
357
358	memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
359
360	ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
361	ring_info->ring_buffer->read_index =
362		ring_info->ring_buffer->write_index = 0;
363
364	/*
365	 * Set the feature bit for enabling flow control.
366	 */
367	ring_info->ring_buffer->feature_bits.value = 1;
368
369	ring_info->ring_size = buflen;
370	ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
371
372	spin_lock_init(&ring_info->ring_lock);
373
374	return 0;
375}
376
377/*
378 *
379 * hv_ringbuffer_cleanup()
380 *
381 * Cleanup the ring buffer
382 *
383 */
384void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
385{
386}
387
388/*
389 *
390 * hv_ringbuffer_write()
391 *
392 * Write to the ring buffer
393 *
394 */
395int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
396		    struct kvec *kv_list, u32 kv_count, bool *signal)
397{
398	int i = 0;
399	u32 bytes_avail_towrite;
400	u32 bytes_avail_toread;
401	u32 totalbytes_towrite = 0;
402
403	u32 next_write_location;
404	u32 old_write;
405	u64 prev_indices = 0;
406	unsigned long flags;
407
408	for (i = 0; i < kv_count; i++)
409		totalbytes_towrite += kv_list[i].iov_len;
410
411	totalbytes_towrite += sizeof(u64);
412
413	spin_lock_irqsave(&outring_info->ring_lock, flags);
414
415	hv_get_ringbuffer_availbytes(outring_info,
416				&bytes_avail_toread,
417				&bytes_avail_towrite);
418
419
420	/* If there is only room for the packet, assume it is full. */
421	/* Otherwise, the next time around, we think the ring buffer */
422	/* is empty since the read index == write index */
423	if (bytes_avail_towrite <= totalbytes_towrite) {
424		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
425		return -EAGAIN;
426	}
427
428	/* Write to the ring buffer */
429	next_write_location = hv_get_next_write_location(outring_info);
430
431	old_write = next_write_location;
432
433	for (i = 0; i < kv_count; i++) {
434		next_write_location = hv_copyto_ringbuffer(outring_info,
435						     next_write_location,
436						     kv_list[i].iov_base,
437						     kv_list[i].iov_len);
438	}
439
440	/* Set previous packet start */
441	prev_indices = hv_get_ring_bufferindices(outring_info);
442
443	next_write_location = hv_copyto_ringbuffer(outring_info,
444					     next_write_location,
445					     &prev_indices,
446					     sizeof(u64));
447
448	/* Issue a full memory barrier before updating the write index */
449	mb();
450
451	/* Now, update the write location */
452	hv_set_next_write_location(outring_info, next_write_location);
453
454
455	spin_unlock_irqrestore(&outring_info->ring_lock, flags);
456
457	*signal = hv_need_to_signal(old_write, outring_info);
458	return 0;
459}
460
461
462/*
463 *
464 * hv_ringbuffer_peek()
465 *
466 * Read without advancing the read index
467 *
468 */
469int hv_ringbuffer_peek(struct hv_ring_buffer_info *Inring_info,
470		   void *Buffer, u32 buflen)
471{
472	u32 bytes_avail_towrite;
473	u32 bytes_avail_toread;
474	u32 next_read_location = 0;
475	unsigned long flags;
476
477	spin_lock_irqsave(&Inring_info->ring_lock, flags);
478
479	hv_get_ringbuffer_availbytes(Inring_info,
480				&bytes_avail_toread,
481				&bytes_avail_towrite);
482
483	/* Make sure there is something to read */
484	if (bytes_avail_toread < buflen) {
485
486		spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
487
488		return -EAGAIN;
489	}
490
491	/* Convert to byte offset */
492	next_read_location = hv_get_next_read_location(Inring_info);
493
494	next_read_location = hv_copyfrom_ringbuffer(Inring_info,
495						Buffer,
496						buflen,
497						next_read_location);
498
499	spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
500
501	return 0;
502}
503
504
505/*
506 *
507 * hv_ringbuffer_read()
508 *
509 * Read and advance the read index
510 *
511 */
512int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer,
513		   u32 buflen, u32 offset, bool *signal)
514{
515	u32 bytes_avail_towrite;
516	u32 bytes_avail_toread;
517	u32 next_read_location = 0;
518	u64 prev_indices = 0;
519	unsigned long flags;
520	u32 old_read;
521
522	if (buflen <= 0)
523		return -EINVAL;
524
525	spin_lock_irqsave(&inring_info->ring_lock, flags);
526
527	hv_get_ringbuffer_availbytes(inring_info,
528				&bytes_avail_toread,
529				&bytes_avail_towrite);
530
531	old_read = bytes_avail_toread;
532
533	/* Make sure there is something to read */
534	if (bytes_avail_toread < buflen) {
535		spin_unlock_irqrestore(&inring_info->ring_lock, flags);
536
537		return -EAGAIN;
538	}
539
540	next_read_location =
541		hv_get_next_readlocation_withoffset(inring_info, offset);
542
543	next_read_location = hv_copyfrom_ringbuffer(inring_info,
544						buffer,
545						buflen,
546						next_read_location);
547
548	next_read_location = hv_copyfrom_ringbuffer(inring_info,
549						&prev_indices,
550						sizeof(u64),
551						next_read_location);
552
553	/* Make sure all reads are done before we update the read index since */
554	/* the writer may start writing to the read area once the read index */
555	/*is updated */
556	mb();
557
558	/* Update the read index */
559	hv_set_next_read_location(inring_info, next_read_location);
560
561	spin_unlock_irqrestore(&inring_info->ring_lock, flags);
562
563	*signal = hv_need_to_signal_on_read(old_read, inring_info);
564
565	return 0;
566}
567