Re: Buffer management - interesting idea

From: Ivan Schreter <is@zapwerk.com>
To: Helge Hafting <helgehaf@idb.hist.no>
Cc: linux-kernel@vger.kernel.org
Subject: Re: Buffer management - interesting idea
Date: Fri, 15 Jun 2001 11:41:14 +0200	[thread overview]
Message-ID: <200106150941.LAA18088@borg4.zapnet.de> (raw)
In-Reply-To: <3B29D048.4E19D545@idb.hist.no>
In-Reply-To: <01060613422800.07218@linux> <3B29D048.4E19D545@idb.hist.no>

[-- Attachment #1: Type: text/plain, Size: 1968 bytes --]

Hello,

Please CC: replies to me, since I am not subscribed.

>>         http://citeseer.nj.nec.com/63909.html

> The "resistance to scanning" seemed interesting, maybe one-time
> activities like a "find" run or big cat/dd will have less impact with
> this.

Exactly. But not only that.

I have made some experimental tests. When you have totally random access
to data, you get degradation to LRU performance (but no worse). However,
when doing normal work, results are quite encouraging. Speedup percentage
is computed assuming processing in-memory buffer takes x and loading
on-disk buffer takes 100x time:

#blocks	buffers	P	hitLRU	hit2Q	2Q+	speedup
262400	16384	8	19.29%	24.89%	29%	7.365%
262400	16384	4	11.56%	14.23%	23%	3.084%
1024K	16384	32	16.90%	22.82%	35%	7.573%
1024K	16384	16	9.00%	11.94%	33%	3.305%
1024K	16384	8	4.94%	6.38%	29%	1.515%
4096K	16384	64	8.40%	11.39%	36%	3.339%
4096K	16384	32	4.32%	5.79%	34%	1.536%

I used reasonable figures for filesystem size (1G, 4G and 16G) and buffer
cache (64MB), with blocksize 4K. All simulations used 10% for A1in and 25%
for A1out queues. Almost all simulations show over 30% better hit rate as
LRU, translating to ~3% real time speedup for normal workload. Speedup for
scanning type workload (find, cat large file, etc.) should be even better
- write access pattern generator and try it :-)

Constant P determines "randomness" of accesses as follows:

int x = random() % (NR * LEVEL);
for (int i = LEVEL - 1; i > 0; --i)
	if (x >= NR * i) x = (x - NR * i) / (i + 1);

where x is generated block # to be accessed, LEVEL is P and NR is # of
disk blocks.

I attach a crude program to simulate LRU, 2Q and FIFO buffer management
policies. If you want, you can play with parameters a little bit and
simulate other workloads (e.g., record what's going on in the system and
then simulate real system block accesses).

I would like to hear from you if you have some interesting results.

--
Ivan Schreter
is@zapwerk.com

[-- Attachment #2: test_bufrep.cpp --]
[-- Type: text/plain, Size: 7103 bytes --]

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <signal.h>

int ops = 0, hit_lru = 0, hit_2q = 0, hit_fifo = 0;

#define	BUFS	16384	// # of buffer entries in memory
#define	NR	262400	// # of DB buffers
#define	LEVEL	8	// max. test level (1=total random)

//#define	USE_FASTAGE

#define	BUFSA1IN	(BUFS/10)
#define	BUFSA1OUT	(BUFS/4)

typedef struct {

	int	next;
	int	prev;
	int 	id;
	int	age;

} lru_entry;

lru_entry	lru[BUFS];
int		lru_idx[NR];
int		lru_free = 0, lru_head = 0, lru_tail = 0;

void lru_page(int x)
{
	if (lru_idx[x] >= 0) {
		hit_lru++;

		// move to beginning of the buffer
		x = lru_idx[x];
		if (x == lru_head) return;

		lru[lru[x].prev].next = lru[x].next;
		if (x == lru_tail) {
			lru_tail = lru[x].prev;
		} else {
			lru[lru[x].next].prev = lru[x].prev;
		}
		lru[x].prev = -1;
		lru[x].next = lru_head;
		lru[lru_head].prev = x;
		lru_head = x;

		return;
	}

	if (lru_free == -1) {

		// remove one from tail
		lru[lru_tail].next = lru_free;
		lru[lru_tail = lru[lru_free = lru_tail].prev].next = -1;
		lru_idx[lru[lru_free].id] = -1;
	}

	// add to head
	int r = lru_free;
	lru_free = lru[r].next;

	lru[r].prev = -1;
	lru[r].next = lru_head;
	lru[r].id = x;
	lru[lru_head].prev = r;
	if (lru_head < 0) lru_tail = r;
	lru_head = r;

	lru_idx[x] = r;
}

lru_entry	r2q[BUFS];
int		r2q_a1out[BUFSA1OUT];
int		r2q_idx[NR];
int		r2q_free = 0, r2q_head = -1, r2q_tail = -1;
int		r2q_a1i_head = -1, r2q_a1i_tail = -1, r2q_a1i_cnt = 0;
int		r2q_a1o_head = 0, r2q_a1o_tail = 0, r2q_age = 0;

#define	R2Q_MASK	0xffffff
#define	R2Q_FLG_MASK	0xff000000
#define	R2Q_A1I_FLG	0x1000000
#define	R2Q_A1O_FLG	0x2000000
#define	R2Q_AM_FLG	0x4000000

void r2q_reclaim()
{
	// free one page

	if (r2q_a1i_cnt > BUFSA1IN) {

		// remove last page from A1in and put to A1out

		int r = r2q_a1i_tail;
#if 0
if (r < 0) {
	printf("Reclaim err: %d - %d\n", r2q_a1i_tail, r2q_a1i_head);
	raise(SIGSEGV);
}
#endif
		r2q_a1i_tail = r2q[r].prev;
		r2q[r2q_a1i_tail].next = -1;
		--r2q_a1i_cnt;
//printf("Del2 %d, cnt = %d\n", r, r2q_a1i_cnt);

		int x = r2q[r].id;
		r2q[r].next = r2q_free;
		r2q_free = r;

		r2q_idx[x] = R2Q_MASK | R2Q_A1O_FLG;

//if (x >= NR || x < 0) printf("set at %d = %d (@%d), head %d\n",
//	r2q_a1o_tail, x, r, r2q_a1o_head);
		r2q_a1out[r2q_a1o_tail++] = x;
		if (r2q_a1o_tail == BUFSA1OUT) r2q_a1o_tail = 0;
		if (r2q_a1o_tail == r2q_a1o_head) {

			// overflow, remove from buf

			int d = r2q_a1out[r2q_a1o_head++];
			if (r2q_a1o_head == BUFSA1OUT) r2q_a1o_head = 0;

			// unmark in index
			r2q_idx[d] &= ~R2Q_A1O_FLG;
		}
		return;
	}

	// remove from tail of Am, do not put in A1out (it was least recently
	// used, so no point in keeping it)

	int x = r2q[r2q_tail].id;
	int r = r2q_tail;
	r2q_idx[x] = R2Q_MASK;
	r2q_tail = r2q[r].prev;
	r2q[r2q_tail].next = -1;
	r2q[r].next = r2q_free;
	r2q_free = r;
}

void r2q_page(int x)
{
	int r = r2q_idx[x] & R2Q_MASK, rm = r2q_idx[x] & R2Q_FLG_MASK;

	if (rm & R2Q_AM_FLG) {

		// already in buffer
		hit_2q++;

		// in LRU buffer, move to first
		if (r == r2q_head) return;

		r2q[r2q[r].prev].next = r2q[r].next;
		if (r == r2q_tail) {
			r2q_tail = r2q[r].prev;
		} else {
			r2q[r2q[r].next].prev = r2q[r].prev;
		}
		r2q[r].prev = -1;
		r2q[r].next = r2q_head;
		r2q[r2q_head].prev = r;
		r2q_head = r;

		return;

	} else if (rm & R2Q_A1O_FLG) {

		// was in A1out, put to head of Am

		if (rm & R2Q_A1I_FLG) {

			// remove from A1in (already in memory)
			hit_2q++;

			--r2q_a1i_cnt;
			if (r == r2q_a1i_head) {
				r2q_a1i_head = r2q[r].next;
				if (r2q_a1i_cnt) r2q[r2q_a1i_head].prev = -1;
			} else if (r == r2q_a1i_tail) {
				r2q_a1i_tail = r2q[r].prev;
				r2q[r2q_a1i_tail].next = -1;
			} else {
				// in the middle
				r2q[r2q[r].next].prev = r2q[r].prev;
				r2q[r2q[r].prev].next = r2q[r].next;
			}
//printf("Del %d, cnt = %d\n", r, r2q_a1i_cnt);

		} else {

			// not yet in memory
			if (r2q_free < 0) r2q_reclaim();

			r = r2q_free;
			r2q_free = r2q[r].next;
			r2q[r].id = x;

			r2q_idx[x] = r;
		}

		// add to head of Am
		r2q[r].prev = -1;
		r2q[r].next = r2q_head;
		if (r2q_head < 0) r2q_tail = r;
		else r2q[r2q_head].prev = r;
		r2q_head = r;

		// must check out flag, may be changed by reclaim
		r2q_idx[x] = r | R2Q_AM_FLG | (r2q_idx[x] & R2Q_A1O_FLG);

		return;

	} else if (rm & R2Q_A1I_FLG) {

		// do nothing - is in memory in A1in queue

		hit_2q++;

#ifdef USE_FASTAGE
#warning Using FastAge

		// mark into R2Q_A1O when in last 25% or so...
		int age = r2q_age - r2q[r].age;

		// age cannot be significantly more than r2q_cnt, normally is
		// less

		if ((r2q_a1i_cnt - age) < (r2q_a1i_cnt << 2)) {
			// is in last 25% of its life, put to out pages
			r2q_idx[x] |= R2Q_A1O_FLG;

			r2q_a1out[r2q_a1o_tail++] = x;
			if (r2q_a1o_tail == BUFSA1OUT) r2q_a1o_tail = 0;
			if (r2q_a1o_tail == r2q_a1o_head) {

				// overflow, remove from buf

				int d = r2q_a1out[r2q_a1o_head++];
				if (r2q_a1o_head == BUFSA1OUT) r2q_a1o_head = 0;

				// unmark in index
				r2q_idx[d] &= ~R2Q_A1O_FLG;
			}
		}
#endif

		return;

	} else {

		// is in no queue, load & add to head of A1in
		if (r2q_free < 0) r2q_reclaim();

		r = r2q_free;
		r2q_free = r2q[r].next;
		r2q[r].id = x;
		r2q[r].age = r2q_age++;
		r2q[r].next = r2q_a1i_head;
		r2q[r].prev = -1;
		if (!r2q_a1i_cnt) {
			// first item, set also tail
			r2q_a1i_tail = r;
		} else {
			r2q[r2q_a1i_head].prev = r;
		}
		r2q_a1i_cnt++;
		r2q_a1i_head = r;
//printf("Add %d, cnt = %d\n", r, r2q_a1i_cnt);
//if (r == 0 && r2q_a1i_cnt > 1) raise(SIGSTOP);

		r2q_idx[x] = r | R2Q_A1I_FLG;
	}
}

int	fifo_idx[NR];
int	fifo[BUFS];
int	fifo_head = 0, fifo_tail = 0;

void fifo_page(int x)
{
	if (fifo_idx[x] >= 0) {
		hit_fifo++;
		return;
	}

	fifo_idx[x] = fifo_head;
	fifo[fifo_head++] = x;
	if (fifo_head == BUFS) fifo_head = 0;
	if (fifo_head == fifo_tail) {
		fifo_idx[fifo[fifo_tail++]] = -1;
		if (fifo_tail == BUFS) fifo_tail = 0;
	}
}

int main()
{
	memset(lru_idx, -1, sizeof(lru_idx));
	memset(fifo_idx, -1, sizeof(fifo_idx));
	for (int i = 0; i < NR; ++i) r2q_idx[i] = R2Q_MASK;

	for (int i = 0; i < BUFS - 1; ++i) {
		lru[i].next = i + 1;
		r2q[i].next = i + 1;
	}
	lru[BUFS-1].next = -1;
	r2q[BUFS-1].next = -1;

	time_t tm = time(NULL);
	for (;;) {
		int x = random() % (NR * LEVEL);
		for (int i = LEVEL - 1; i > 0; --i)
			if (x >= NR * i) x = (x - NR * i) / (i + 1);

		lru_page(x);
		r2q_page(x);
		fifo_page(x);
		ops++;
		time_t cur_tm = time(NULL);
		if (cur_tm != tm) {

			int lru_ptime = (ops + 99 * (ops - hit_lru)) / 1000;
			int r2q_ptime = (ops + 99 * (ops - hit_2q)) / 1000;

			printf("Ops: %d, LRU: %d %.2f%%, 2Q: %d %.2f%%, "
				"FIFO: %d %.2f%%, 2Q/LRU %.2f\n"
				"LRU ptime: %d, 2Q ptime: %d, "
				"ratio 2Q/LRU: %.3f%% +%.3f%%\n",
				ops / 1000, hit_lru / 1000,
				hit_lru * 100.0 / ops,
				hit_2q / 1000, hit_2q * 100.0 / ops,
				hit_fifo / 1000, hit_fifo * 100.0 / ops,
				hit_2q * 1.0 / hit_lru,
				lru_ptime, r2q_ptime,
				r2q_ptime * 100.0 / lru_ptime,
				lru_ptime * 100.0 / r2q_ptime - 100.0);
			tm = cur_tm;
		}
	}
	return 0;
}