#include <stdio.h>
#include <strings.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <linux/fs.h>
#include <linux/hdreg.h>
#include <string.h>
#include <errno.h>

#define FNAME_BUF_SIZE 1024

static int NUM_BUCKETS = 2097152;
static int BUCKET_SIZE = 512;

static int NUM_FRAGS = 100;
static int FRAG_SIZE = 1;

static unsigned int *file_buckets;
static unsigned int *frag_buckets;
static unsigned int files = 0;

#define FRAG_OFF(fb, b, f) ((fb)[((b) * NUM_FRAGS) + (f)])

static int num_fragments(int fd, struct stat *sb, unsigned int *frags, unsigned int *blocks)
{
	int res;
	unsigned int fragments, blocksize, bcount, i, lastblock, block;
	struct stat st;
	struct stat *pstat;

	res = ioctl(fd, FIGETBSZ, &blocksize);
	if (res)
		goto out;

	if (sb) {
		pstat = sb;
	} else {
		res = fstat(fd, &st);
		if (res)
			goto out;
		pstat = &st;
	}

	bcount = (pstat->st_size + blocksize - 1) / blocksize;

	fragments = 1;
	lastblock = 0;
	for (i = 0; i < bcount; i++) {
		block = i;
		res = ioctl(fd, FIBMAP, &block);
		if (res)
			goto out;

		if (i && block != lastblock + 1)
			fragments++;
		lastblock = block;
	}

	*frags = fragments;
	*blocks = bcount;
	return 0;
out:
	return res;
}

static int process_file(const char *fname)
{
	struct stat sb;
	int res, fd;
	unsigned int bucket, b, frag;
	unsigned int f = 0;

	// grab file info
	res = stat(fname, &sb);
	if (res != 0)
		goto out;
		
	// discard non-files
	if (!(sb.st_mode & S_IFREG)) {
		res = -ENOENT;
		goto out;
	}
		
	// put data into bucket
	bucket = sb.st_size / BUCKET_SIZE;
	if (bucket >= NUM_BUCKETS) {
		bucket = NUM_BUCKETS - 1;
	}

	// find number of fragments
	fd = open(fname, O_RDONLY);
	if (fd < 0)
		goto out;

	res = num_fragments(fd, &sb, &f, &b);
	close(fd);

	if (res)
		goto out;

	// find frag bucket
	frag = (f / FRAG_SIZE) - 1;
	if (frag >= NUM_FRAGS)
		frag = NUM_FRAGS - 1;
	
	FRAG_OFF(frag_buckets, bucket, frag)++;
	file_buckets[bucket]++;
	files++;

	return 0;

out:
	perror(fname);
	return res;
}

static void print_buckets(void)
{
	unsigned int bucket, frag;

	printf("NUM_FRAGS=%d\n", NUM_FRAGS);
	printf("BUCKET_SIZE=%d\n", BUCKET_SIZE);

	for(bucket = 0; bucket < NUM_BUCKETS; bucket++) {
		if (!file_buckets[bucket])
			continue;
		unsigned int l = bucket * BUCKET_SIZE;
		unsigned int h = (bucket + 1) * BUCKET_SIZE;

		printf("%d, %d, %d", (l + h) / 2, file_buckets[bucket], files);
		for (frag = 0; frag < NUM_FRAGS; frag++)
			printf(", %d", FRAG_OFF(frag_buckets, bucket, frag));
		printf("\n");
	}
}

static void print_help(const char *progname)
{
	printf("Usage: %s [-b num_file_size_buckets] [-s file_size_bucket_size]\n", progname);
	printf("          [-f num_frag_buckets] [-g frag_bucket_size]\n");
	printf("-b: Allocate a given number of file size buckets (2^21).\n");
	printf("-s: Each file size bucket should span this amount of file sizes (512).\n");
	printf("-f: Allocate this many fragment size buckets for each file size bucket (100).\n");
	printf("-g: Each fragment size bucket should span this amount of fragment sizes (1).\n");
	printf("The names of files to examine should be provided as input.\n");
}

int main(int argc, char *argv[])
{
	char fname[FNAME_BUF_SIZE];
	char *p;
	int x;

	while ((x = getopt(argc, argv, "b:s:f:g:h")) != -1) {
		switch (x) {
		case 'b':
			NUM_BUCKETS = atoi(optarg);
			break;
		case 's':
			BUCKET_SIZE = atoi(optarg);
			break;
		case 'f':
			NUM_FRAGS = atoi(optarg);
			break;
		case 'g':
			FRAG_SIZE = atoi(optarg);
			break;
		default:
			print_help(argv[0]);
			return 1;
		}
	}

	file_buckets = calloc(sizeof(unsigned int), NUM_BUCKETS);
	if (!file_buckets) {
		perror("calloc");
		return 1;
	}

	frag_buckets = calloc(sizeof(unsigned int), NUM_BUCKETS * NUM_FRAGS);
	if (!frag_buckets) {
		perror("calloc");
		return 1;
	}
	
	do {
		// read a filename off stdin
		p = fgets(fname, FNAME_BUF_SIZE, stdin);
		if (p == NULL) {
			break;
		}
		
		// strip off the trailing newline
		x = strlen(fname);
		if (fname[x - 1] == '\n') {
			fname[x - 1] = 0;
		}

		x = process_file(fname);
	} while (1);
	
	print_buckets();

	free(frag_buckets);
	free(file_buckets);

	return 0;
}
