1/*
2   drbd_proc.c
3
4   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10   drbd is free software; you can redistribute it and/or modify
11   it under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 2, or (at your option)
13   any later version.
14
15   drbd is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with drbd; see the file COPYING.  If not, write to
22   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/drbd.h>
34#include "drbd_int.h"
35
36static int drbd_proc_open(struct inode *inode, struct file *file);
37static int drbd_proc_release(struct inode *inode, struct file *file);
38
39
40struct proc_dir_entry *drbd_proc;
41const struct file_operations drbd_proc_fops = {
42	.owner		= THIS_MODULE,
43	.open		= drbd_proc_open,
44	.read		= seq_read,
45	.llseek		= seq_lseek,
46	.release	= drbd_proc_release,
47};
48
49static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
50{
51	/* v is in kB/sec. We don't expect TiByte/sec yet. */
52	if (unlikely(v >= 1000000)) {
53		/* cool: > GiByte/s */
54		seq_printf(seq, "%ld,", v / 1000000);
55		v %= 1000000;
56		seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
57	} else if (likely(v >= 1000))
58		seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
59	else
60		seq_printf(seq, "%ld", v);
61}
62
63static void drbd_get_syncer_progress(struct drbd_device *device,
64		union drbd_dev_state state, unsigned long *rs_total,
65		unsigned long *bits_left, unsigned int *per_mil_done)
66{
67	/* this is to break it at compile time when we change that, in case we
68	 * want to support more than (1<<32) bits on a 32bit arch. */
69	typecheck(unsigned long, device->rs_total);
70	*rs_total = device->rs_total;
71
72	/* note: both rs_total and rs_left are in bits, i.e. in
73	 * units of BM_BLOCK_SIZE.
74	 * for the percentage, we don't care. */
75
76	if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
77		*bits_left = device->ov_left;
78	else
79		*bits_left = drbd_bm_total_weight(device) - device->rs_failed;
80	/* >> 10 to prevent overflow,
81	 * +1 to prevent division by zero */
82	if (*bits_left > *rs_total) {
83		/* D'oh. Maybe a logic bug somewhere.  More likely just a race
84		 * between state change and reset of rs_total.
85		 */
86		*bits_left = *rs_total;
87		*per_mil_done = *rs_total ? 0 : 1000;
88	} else {
89		/* Make sure the division happens in long context.
90		 * We allow up to one petabyte storage right now,
91		 * at a granularity of 4k per bit that is 2**38 bits.
92		 * After shift right and multiplication by 1000,
93		 * this should still fit easily into a 32bit long,
94		 * so we don't need a 64bit division on 32bit arch.
95		 * Note: currently we don't support such large bitmaps on 32bit
96		 * arch anyways, but no harm done to be prepared for it here.
97		 */
98		unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
99		unsigned long left = *bits_left >> shift;
100		unsigned long total = 1UL + (*rs_total >> shift);
101		unsigned long tmp = 1000UL - left * 1000UL/total;
102		*per_mil_done = tmp;
103	}
104}
105
106
107/*lge
108 * progress bars shamelessly adapted from driver/md/md.c
109 * output looks like
110 *	[=====>..............] 33.5% (23456/123456)
111 *	finish: 2:20:20 speed: 6,345 (6,456) K/sec
112 */
113static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
114		union drbd_dev_state state)
115{
116	unsigned long db, dt, dbdt, rt, rs_total, rs_left;
117	unsigned int res;
118	int i, x, y;
119	int stalled = 0;
120
121	drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
122
123	x = res/50;
124	y = 20-x;
125	seq_printf(seq, "\t[");
126	for (i = 1; i < x; i++)
127		seq_printf(seq, "=");
128	seq_printf(seq, ">");
129	for (i = 0; i < y; i++)
130		seq_printf(seq, ".");
131	seq_printf(seq, "] ");
132
133	if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
134		seq_printf(seq, "verified:");
135	else
136		seq_printf(seq, "sync'ed:");
137	seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
138
139	/* if more than a few GB, display in MB */
140	if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
141		seq_printf(seq, "(%lu/%lu)M",
142			    (unsigned long) Bit2KB(rs_left >> 10),
143			    (unsigned long) Bit2KB(rs_total >> 10));
144	else
145		seq_printf(seq, "(%lu/%lu)K",
146			    (unsigned long) Bit2KB(rs_left),
147			    (unsigned long) Bit2KB(rs_total));
148
149	seq_printf(seq, "\n\t");
150
151	/* see drivers/md/md.c
152	 * We do not want to overflow, so the order of operands and
153	 * the * 100 / 100 trick are important. We do a +1 to be
154	 * safe against division by zero. We only estimate anyway.
155	 *
156	 * dt: time from mark until now
157	 * db: blocks written from mark until now
158	 * rt: remaining time
159	 */
160	/* Rolling marks. last_mark+1 may just now be modified.  last_mark+2 is
161	 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
162	 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
163	/* ------------------------ ~18s average ------------------------ */
164	i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
165	dt = (jiffies - device->rs_mark_time[i]) / HZ;
166	if (dt > 180)
167		stalled = 1;
168
169	if (!dt)
170		dt++;
171	db = device->rs_mark_left[i] - rs_left;
172	rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
173
174	seq_printf(seq, "finish: %lu:%02lu:%02lu",
175		rt / 3600, (rt % 3600) / 60, rt % 60);
176
177	dbdt = Bit2KB(db/dt);
178	seq_printf(seq, " speed: ");
179	seq_printf_with_thousands_grouping(seq, dbdt);
180	seq_printf(seq, " (");
181	/* ------------------------- ~3s average ------------------------ */
182	if (proc_details >= 1) {
183		/* this is what drbd_rs_should_slow_down() uses */
184		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
185		dt = (jiffies - device->rs_mark_time[i]) / HZ;
186		if (!dt)
187			dt++;
188		db = device->rs_mark_left[i] - rs_left;
189		dbdt = Bit2KB(db/dt);
190		seq_printf_with_thousands_grouping(seq, dbdt);
191		seq_printf(seq, " -- ");
192	}
193
194	/* --------------------- long term average ---------------------- */
195	/* mean speed since syncer started
196	 * we do account for PausedSync periods */
197	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
198	if (dt == 0)
199		dt = 1;
200	db = rs_total - rs_left;
201	dbdt = Bit2KB(db/dt);
202	seq_printf_with_thousands_grouping(seq, dbdt);
203	seq_printf(seq, ")");
204
205	if (state.conn == C_SYNC_TARGET ||
206	    state.conn == C_VERIFY_S) {
207		seq_printf(seq, " want: ");
208		seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
209	}
210	seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
211
212	if (proc_details >= 1) {
213		/* 64 bit:
214		 * we convert to sectors in the display below. */
215		unsigned long bm_bits = drbd_bm_bits(device);
216		unsigned long bit_pos;
217		unsigned long long stop_sector = 0;
218		if (state.conn == C_VERIFY_S ||
219		    state.conn == C_VERIFY_T) {
220			bit_pos = bm_bits - device->ov_left;
221			if (verify_can_do_stop_sector(device))
222				stop_sector = device->ov_stop_sector;
223		} else
224			bit_pos = device->bm_resync_fo;
225		/* Total sectors may be slightly off for oddly
226		 * sized devices. So what. */
227		seq_printf(seq,
228			"\t%3d%% sector pos: %llu/%llu",
229			(int)(bit_pos / (bm_bits/100+1)),
230			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
231			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
232		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
233			seq_printf(seq, " stop sector: %llu", stop_sector);
234		seq_printf(seq, "\n");
235	}
236}
237
238static int drbd_seq_show(struct seq_file *seq, void *v)
239{
240	int i, prev_i = -1;
241	const char *sn;
242	struct drbd_device *device;
243	struct net_conf *nc;
244	union drbd_dev_state state;
245	char wp;
246
247	static char write_ordering_chars[] = {
248		[WO_none] = 'n',
249		[WO_drain_io] = 'd',
250		[WO_bdev_flush] = 'f',
251	};
252
253	seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
254		   API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
255
256	/*
257	  cs .. connection state
258	  ro .. node role (local/remote)
259	  ds .. disk state (local/remote)
260	     protocol
261	     various flags
262	  ns .. network send
263	  nr .. network receive
264	  dw .. disk write
265	  dr .. disk read
266	  al .. activity log write count
267	  bm .. bitmap update write count
268	  pe .. pending (waiting for ack or data reply)
269	  ua .. unack'd (still need to send ack or data reply)
270	  ap .. application requests accepted, but not yet completed
271	  ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending
272	  wo .. write ordering mode currently in use
273	 oos .. known out-of-sync kB
274	*/
275
276	rcu_read_lock();
277	idr_for_each_entry(&drbd_devices, device, i) {
278		if (prev_i != i - 1)
279			seq_printf(seq, "\n");
280		prev_i = i;
281
282		state = device->state;
283		sn = drbd_conn_str(state.conn);
284
285		if (state.conn == C_STANDALONE &&
286		    state.disk == D_DISKLESS &&
287		    state.role == R_SECONDARY) {
288			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
289		} else {
290			/* reset device->congestion_reason */
291			bdi_rw_congested(&device->rq_queue->backing_dev_info);
292
293			nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
294			wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
295			seq_printf(seq,
296			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
297			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
298			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
299			   i, sn,
300			   drbd_role_str(state.role),
301			   drbd_role_str(state.peer),
302			   drbd_disk_str(state.disk),
303			   drbd_disk_str(state.pdsk),
304			   wp,
305			   drbd_suspended(device) ? 's' : 'r',
306			   state.aftr_isp ? 'a' : '-',
307			   state.peer_isp ? 'p' : '-',
308			   state.user_isp ? 'u' : '-',
309			   device->congestion_reason ?: '-',
310			   test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
311			   device->send_cnt/2,
312			   device->recv_cnt/2,
313			   device->writ_cnt/2,
314			   device->read_cnt/2,
315			   device->al_writ_cnt,
316			   device->bm_writ_cnt,
317			   atomic_read(&device->local_cnt),
318			   atomic_read(&device->ap_pending_cnt) +
319			   atomic_read(&device->rs_pending_cnt),
320			   atomic_read(&device->unacked_cnt),
321			   atomic_read(&device->ap_bio_cnt),
322			   first_peer_device(device)->connection->epochs,
323			   write_ordering_chars[device->resource->write_ordering]
324			);
325			seq_printf(seq, " oos:%llu\n",
326				   Bit2KB((unsigned long long)
327					   drbd_bm_total_weight(device)));
328		}
329		if (state.conn == C_SYNC_SOURCE ||
330		    state.conn == C_SYNC_TARGET ||
331		    state.conn == C_VERIFY_S ||
332		    state.conn == C_VERIFY_T)
333			drbd_syncer_progress(device, seq, state);
334
335		if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
336			lc_seq_printf_stats(seq, device->resync);
337			lc_seq_printf_stats(seq, device->act_log);
338			put_ldev(device);
339		}
340
341		if (proc_details >= 2)
342			seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
343	}
344	rcu_read_unlock();
345
346	return 0;
347}
348
349static int drbd_proc_open(struct inode *inode, struct file *file)
350{
351	int err;
352
353	if (try_module_get(THIS_MODULE)) {
354		err = single_open(file, drbd_seq_show, NULL);
355		if (err)
356			module_put(THIS_MODULE);
357		return err;
358	}
359	return -ENODEV;
360}
361
362static int drbd_proc_release(struct inode *inode, struct file *file)
363{
364	module_put(THIS_MODULE);
365	return single_release(inode, file);
366}
367
368/* PROC FS stuff end */
369