#include "imageptr.h"
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <stdarg.h>
#include <strings.h>
#include <sys/mman.h>
#include <errno.h>
#include <unistd.h>
#include <malloc.h>
#include "system.h"

/// minimum block size to discard, currently 16 MiB
#define CONFIG_MEMORY_DISCARD_LIMIT ( ((size_t)16) << 20 )

/// use a traditional memory allocation
#define CONFIG_MEMORY_NORMAL

/// use the virtual memory mapped into a continuous zeroed area
// #define CONFIG_MEMORY_SPARSE

/// use the virtual memory mapped into a window instead of a continuous zeroed area
// #define CONFIG_MEMORY_WINDOW

/// use Huge 2 MiB pages instead of 4 KiB pages
// #define CONFIG_MEMORY_HUGEPAGES

/// the size of the physical memory window
// FIXME: 3*cb_size1.y is too small window, 4*cb_size1.y is sufficient but too large, 3*(cb_size1)+1 seems to be enough
#define CONFIG_MEMORY_WINDOW_SIZE(cb_size1) ( 3*(cb_size1) + 3 )

static
size_t page_size()
{
	const long pagesize = sysconf(_SC_PAGESIZE);

	assert( pagesize > 0 );

	return pagesize;
}

static
size_t hugepage_size()
{
	// TODO: grep Hugepagesize /proc/meminfo
	return 2ULL<<20;
}

static
size_t page_floor(
	size_t size
)
{
	return size & ~(page_size()-1ULL);
}

static
size_t hugepage_floor(
	size_t size
)
{
	return size & ~(hugepage_size()-1ULL);
}

static
size_t page_ceil(
	size_t size
)
{
	return ( size + page_size() - 1ULL ) & ~(page_size()-1ULL);
}

static
size_t hugepage_ceil(
	size_t size
)
{
	return ( size + hugepage_size() - 1ULL ) & ~(hugepage_size()-1ULL);
}

#if defined(__SSE__) && defined(__AVX__)
static
size_t calc_stride(size_t size)
{
	return (size + 31) & ~31;
}
#endif
#if defined(__SSE__) && !defined(__AVX__)
static
size_t calc_stride(size_t size)
{
	return (size + 15) & ~15;
}
#endif
#ifndef __SSE__
static
size_t calc_stride(size_t size)
{
	return size;
}
#endif

/**
 * @note stride.x == sizeof(float), thus it is possible to use _mm_loadu_ps/_mm_storeu_ps
 * @note data is aligned at a 16/32-byte boundary, thus it is possible to use  _mm_load_ps/_mm_store_ps when accessing the data with no offset
 */
struct imageptr_t *imageptr_alloc_data_normal(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	imageptr->stride.x = sizeof(float);
	imageptr->stride.y = calc_stride(imageptr->stride.x * size.x);

	const size_t total_size = imageptr->stride.y * size.y;

	imageptr->data = memalign(16, total_size);

	if( NULL == imageptr->data )
	{
		perror(NULL);
		return NULL;
	}

#ifndef NDEBUG
	bzero(imageptr->data, total_size);
#endif

	return imageptr;
}

struct imageptr_t *imageptr_alloc_data_sparse(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	imageptr->stride.x = sizeof(float);
	imageptr->stride.y = calc_stride(imageptr->stride.x * size.x);

	// ceil to PAGESIZE boundary
	const size_t total_size = page_ceil( imageptr->stride.y * size.y );

	imageptr->data = mmap(NULL, total_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);

	if( MAP_FAILED == imageptr->data )
	{
		perror(NULL);
		return NULL;
	}

#ifndef NDEBUG
	bzero(imageptr->data, total_size);
#endif

	imageptr->valid = imageptr->data;

	return imageptr;
}

struct imageptr_t *imageptr_alloc_data_sparse_huge(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	imageptr->stride.x = sizeof(float);
	imageptr->stride.y = calc_stride(imageptr->stride.x * size.x);

	// ceil to HUGEPAGESIZE boundary
	const size_t total_size = hugepage_ceil( imageptr->stride.y * size.y );

	imageptr->data = mmap(NULL, total_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE|MAP_HUGETLB, -1, 0);

	if( MAP_FAILED == imageptr->data )
	{
		perror(NULL);
		return NULL;
	}

#ifndef NDEBUG
	bzero(imageptr->data, total_size);
#endif

	imageptr->valid = imageptr->data;

	return imageptr;
}

void imageptr_free_data_sparse(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	assert( imageptr->data );
	assert( imageptr->valid );

	// ceil to PAGESIZE boundary
	const size_t total_size = page_ceil( imageptr->stride.y * size.y );

	// NOTE: this fixed the sporadic crashes
	const ptrdiff_t offset = (uintptr_t)imageptr->valid - (uintptr_t)imageptr->data;

	if( -1 == munmap(imageptr->valid, total_size - offset) )
	{
		perror(NULL);
	}
}

void imageptr_free_data_sparse_huge(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	assert( imageptr->data );
	assert( imageptr->valid );

	// ceil to HUGEPAGESIZE boundary
	const size_t total_size = hugepage_ceil( imageptr->stride.y * size.y );

	// NOTE: this fixed the sporadic crashes
	const ptrdiff_t offset = (uintptr_t)imageptr->valid - (uintptr_t)imageptr->data;

	if( -1 == munmap(imageptr->valid, total_size - offset) )
	{
		perror(NULL);
	}
}

void imageptr_discard_sparse(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	assert( imageptr->data );
	assert( imageptr->valid );

	// floor to PAGESIZE boundary
	const size_t total_size = page_floor(imageptr->stride.y * (size.y - 1));

	// Invalid argument
	if( 0 == total_size )
		return;

	const ptrdiff_t offset = (uintptr_t)imageptr->valid - (uintptr_t)imageptr->data;

	// Invalid argument
	if( 0 == total_size - offset )
		return;

	if( total_size - offset < CONFIG_MEMORY_DISCARD_LIMIT )
		return;

	if( -1 == munmap(imageptr->valid, total_size - offset) )
	{
		perror(NULL);
	}

	// update the pointer to the beginning of data
	imageptr->valid = (void *)( (uintptr_t)imageptr->data + total_size );
}

void imageptr_discard_sparse_huge(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	assert( imageptr->data );
	assert( imageptr->valid );

	// floor to HUGEPAGESIZE boundary
	const size_t total_size = hugepage_floor(imageptr->stride.y * (size.y - 1));

	// Invalid argument
	if( 0 == total_size )
		return;

	const ptrdiff_t offset = (uintptr_t)imageptr->valid - (uintptr_t)imageptr->data;

	// Invalid argument
	if( 0 == total_size - offset )
		return;

	if( total_size - offset < CONFIG_MEMORY_DISCARD_LIMIT )
		return;

	if( -1 == munmap(imageptr->valid, total_size - offset) )
	{
		perror(NULL);
	}

	// update the pointer to the beginning of data
	imageptr->valid = (void *)( (uintptr_t)imageptr->data + total_size );
}

void imageptr_discard(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
#if   defined(CONFIG_MEMORY_NORMAL)
	UNUSED(imageptr);
	UNUSED(size);
#elif defined(CONFIG_MEMORY_SPARSE)
#	ifdef CONFIG_MEMORY_HUGEPAGES
	imageptr_discard_sparse_huge(imageptr, size);
#	else
	imageptr_discard_sparse(imageptr, size);
#	endif
#elif defined(CONFIG_MEMORY_WINDOW)
#	ifdef CONFIG_MEMORY_HUGEPAGES
	imageptr_discard_sparse_huge(imageptr, size);
#	else
	imageptr_discard_sparse(imageptr, size);
#	endif
#else
#	error Either CONFIG_MEMORY_NORMAL, CONFIG_MEMORY_SPARSE or CONFIG_MEMORY_WINDOW must be defined.
#endif
}

struct imageptr_t *imageptr_create_normal(
	const struct vec2_t size
)
{
	struct imageptr_t *imageptr = malloc( sizeof(struct imageptr_t) );

	if( !imageptr )
		return NULL;

	if( !imageptr_alloc_data_normal( imageptr, size ) )
	{
		free( imageptr );
		return NULL;
	}

	return imageptr;
}

struct imageptr_t *imageptr_create(
	const struct vec2_t size
)
{
	return imageptr_create_normal(size);
}

struct imageptr_t *imageptr_create_sparse(
	struct vec2_t size
)
{
	struct imageptr_t *imageptr = malloc( sizeof(struct imageptr_t) );

	if( !imageptr )
		return NULL;

	if( !imageptr_alloc_data_sparse( imageptr, size ) )
	{
		free( imageptr );
		return NULL;
	}

	return imageptr;
}

struct imageptr_t *imageptr_create_sparse_huge(
	struct vec2_t size
)
{
	struct imageptr_t *imageptr = malloc( sizeof(struct imageptr_t) );

	if( !imageptr )
		return NULL;

	if( !imageptr_alloc_data_sparse_huge( imageptr, size ) )
	{
		free( imageptr );
		return NULL;
	}

	return imageptr;
}

struct imageptr_t *imageptr_alloc_data_window(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	int lines
)
{
	assert( imageptr );

	imageptr->stride.x = sizeof(float);
	imageptr->stride.y = calc_stride(imageptr->stride.x * size.x);

	// ceil to PAGESIZE boundary
	const size_t window_size = page_ceil( imageptr->stride.y * lines );

	// ceil to "window_size" boundary
	const size_t total_size = ( imageptr->stride.y * size.y + (window_size-1) ) / window_size * window_size;

	dprintf("%zu MiB area mapped into %zu MiB window\n", total_size>>20, window_size>>20);

	char path[] = "/dev/shm/window-XXXXXX";

	// create temp file
	int fd = mkstemp(path);
	if( -1 == fd )
	{
		perror(NULL);
		return NULL;
	}

	unlink(path);

	if( -1 == ftruncate(fd, (off_t)window_size) )
	{
		perror(NULL);
		return NULL;
	}

	// allocate sparse "total_size" region
	imageptr->data = mmap(NULL, total_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);

	if( MAP_FAILED == imageptr->data )
	{
		perror(NULL);
		return NULL;
	}

	// while(...) mmap segment into "fd"
	for(ptrdiff_t offset = 0; offset < (ptrdiff_t)total_size; offset += window_size)
	{
		// mmap(2): If the memory region specified by addr and len overlaps pages of any existing mapping(s), then the overlapped part of the existing mapping(s) will be discarded.
		if( MAP_FAILED == mmap( (void *)( (uintptr_t)imageptr->data + offset ), window_size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_NORESERVE, fd, 0) )
		{
			perror(NULL);
			return NULL;
		}
	}

	close(fd);

	imageptr->valid = imageptr->data;

	return imageptr;
}

// #include <fcntl.h>

// tmpfs is not compatible with Huge pages
// hugetlbfs is fine
struct imageptr_t *imageptr_alloc_data_window_huge(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	int lines
)
{
	assert( imageptr );

	imageptr->stride.x = sizeof(float);
	imageptr->stride.y = calc_stride(imageptr->stride.x * size.x);

	// ceil to HUGEPAGESIZE boundary
	const size_t window_size = hugepage_ceil( imageptr->stride.y * lines );

	// ceil to "window_size" boundary
	const size_t total_size = ( imageptr->stride.y * size.y + (window_size-1) ) / window_size * window_size;

	dprintf("%zu MiB area mapped into %zu MiB window\n", total_size>>20, window_size>>20);

	char path[] = "/mnt/hugetlbfs/window-XXXXXX";

	// create temp file
	int fd = mkstemp(path);
	if( -1 == fd )
	{
		perror("mkstemp");
		return NULL;
	}

	unlink(path);

	if( -1 == ftruncate(fd, (off_t)window_size) )
	{
		perror("ftruncate");
		return NULL;
	}

	// allocate sparse "total_size" region
	imageptr->data = mmap(NULL, total_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE|MAP_HUGETLB, -1, 0);

	if( MAP_FAILED == imageptr->data )
	{
		perror("mmap(MAP_PRIVATE|MAP_ANONYMOUS)");
		return NULL;
	}

	// while(...) mmap segment into "fd"
	for(ptrdiff_t offset = 0; offset < (ptrdiff_t)total_size; offset += window_size)
	{
		// mmap(2): If the memory region specified by addr and len overlaps pages of any existing mapping(s), then the overlapped part of the existing mapping(s) will be discarded.
		if( MAP_FAILED == mmap( (void *)( (uintptr_t)imageptr->data + offset ), window_size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_NORESERVE/*|MAP_HUGETLB*/, fd, 0) )
		{
			perror("mmap(MAP_FIXED|MAP_SHARED)");
			return NULL;
		}
	}

	close(fd);

	imageptr->valid = imageptr->data;

	return imageptr;
}

void imageptr_free_data_window(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	int lines
)
{
	assert( imageptr );

	assert( imageptr->data );

	// ceil to PAGESIZE boundary
	const size_t window_size = page_ceil( imageptr->stride.y * lines );

	// ceil to "window_size" boundary
	const size_t total_size = ( imageptr->stride.y * size.y + (window_size-1) ) / window_size * window_size;

	const ptrdiff_t offset = (uintptr_t)imageptr->valid - (uintptr_t)imageptr->data;

	if( -1 == munmap(imageptr->valid, total_size - offset) )
	{
		perror(NULL);
	}
}

void imageptr_free_data_window_huge(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	int lines
)
{
	assert( imageptr );

	assert( imageptr->data );

	// ceil to HUGEPAGESIZE boundary
	const size_t window_size = hugepage_ceil( imageptr->stride.y * lines );

	// ceil to "window_size" boundary
	const size_t total_size = ( imageptr->stride.y * size.y + (window_size-1) ) / window_size * window_size;

	const ptrdiff_t offset = (uintptr_t)imageptr->valid - (uintptr_t)imageptr->data;

	if( -1 == munmap(imageptr->valid, total_size - offset) )
	{
		perror(NULL);
	}
}

struct imageptr_t *imageptr_create_window(
	const struct vec2_t size,
	int lines
)
{
	struct imageptr_t *imageptr = malloc( sizeof(struct imageptr_t) );

	if( !imageptr )
		return NULL;

	if( !imageptr_alloc_data_window( imageptr, size, lines ) )
	{
		free( imageptr );
		return NULL;
	}

	return imageptr;
}

struct imageptr_t *imageptr_create_window_huge(
	const struct vec2_t size,
	int lines
)
{
	struct imageptr_t *imageptr = malloc( sizeof(struct imageptr_t) );

	if( !imageptr )
		return NULL;

	if( !imageptr_alloc_data_window_huge( imageptr, size, lines ) )
	{
		free( imageptr );
		return NULL;
	}

	return imageptr;
}

void imageptr_destroy_window(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	int lines
)
{
	imageptr_free_data_window(imageptr, size, lines);

	free(imageptr);
}

void imageptr_destroy_window_huge(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	int lines
)
{
	imageptr_free_data_window_huge(imageptr, size, lines);

	free(imageptr);
}

void imageptr_destroy_normal(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	UNUSED(size);

	free(imageptr->data);
	free(imageptr);
}

void imageptr_destroy(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	imageptr_destroy_normal(imageptr, size);
}

void imageptr_destroy_sparse(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	imageptr_free_data_sparse(imageptr, size);

	free(imageptr);
}

void imageptr_destroy_sparse_huge(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	imageptr_free_data_sparse_huge(imageptr, size);

	free(imageptr);
}

void imageptr_fill(
	struct imageptr_t *imageptr,
	const struct vec2_t size
)
{
	assert( imageptr );

	for(int y = 0; y < size.y; y++)
	{
		for(int x = 0; x < size.x; x++)
		{
			float *pixel = imageptr_pixel(imageptr, vec2_create(x, y));

			*(int32_t *)pixel = (int32_t)( (((1<<6)-1)<<24) | ( (( ( ( 1 + x*(x<<1) + y*y ) ^ ( x*(y<<1) ) ) )<<8) & ((1<<23)-1) ) );
		}
	}
}

void imageptr_fill_strip(
	struct imageptr_t *imageptr,
	const struct vec2_t data_offset,
	const struct vec2_t data_size
)
{
	assert( imageptr );

	for(int y = data_offset.y; y < data_offset.y+data_size.y; y++)
	{
		for(int x = data_offset.x; x < data_offset.x+data_size.x; x++)
		{
			float *pixel = imageptr_pixel(imageptr, vec2_create(x-data_offset.x, y-data_offset.y));

			*(int32_t *)pixel = (int32_t)( (((1<<6)-1)<<24) | ( (( ( ( 1 + x*(x<<1) + y*y ) ^ ( x*(y<<1) ) ) )<<8) & ((1<<23)-1) ) );
		}
	}
}

void imageptr_dump(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	const char *path
)
{
	assert( path );

	FILE *file = fopen(path, "w");

	if( !file )
	{
		printf("unable to open the file\n");
		return;
	}

	assert( imageptr );

	fprintf(file, "P2\n%i %i\n%i\n", size.x, size.y, 255);

	for(int y = 0; y < size.y; y++)
	{
		for(int x = 0; x < size.x; x++)
		{
			float *pixel = imageptr_pixel(imageptr, vec2_create(x, y));

			assert( *pixel == *pixel );

			int int_pixel = roundf(*pixel * 255);

			assert( int_pixel >= 0 && int_pixel <= 255 );

			fprintf(file, "%i\n", int_pixel);
		}
	}

	fclose(file);
}

void imageptr_log_dump(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	const char *path
)
{
	assert( path );

	FILE *file = fopen(path, "w");

	assert( file );

	assert( imageptr );

	fprintf(file, "P2\n%i %i\n%i\n", size.x, size.y, 255);

	const float a = 100.f;
	const float b = 10.f;

	for(int y = 0; y < size.y; y++)
	{
		for(int x = 0; x < size.x; x++)
		{
			float *pixel = imageptr_pixel(imageptr, vec2_create(x, y));

			assert( *pixel == *pixel );

			int int_pixel = roundf( logf(1.f+fabsf(*pixel)*a)/b * 255 );

			assert( int_pixel >= 0 && int_pixel <= 255 );

			fprintf(file, "%i\n", int_pixel);
		}
	}

	fclose(file);
}

void imageptr_log_dump_fmt(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	const char *path,
	...
)
{
	assert( path );

	char buff[4096];

	va_list ap;
	va_start(ap, path);
	vsprintf(buff, path, ap);
	va_end(ap);

	imageptr_log_dump(imageptr, size, buff);
}

void imageptr_log_dump_fmt_va(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	const char *path,
	va_list ap
)
{
	assert( path );

	char buff[4096];

	vsprintf(buff, path, ap);

	imageptr_log_dump(imageptr, size, buff);
}

struct imageptr_t *imageptr_viewport(
	struct imageptr_t *imageptr,
	const struct vec2_t offset
)
{
	assert( imageptr );

	struct imageptr_t *viewport = malloc( sizeof(struct imageptr_t) );

	if( !viewport )
		return NULL;

	*viewport = (struct imageptr_t){
		.data = imageptr_pixel(imageptr, offset),
		.stride = imageptr->stride,
	};

	return viewport;
}

void imageptr_log_dump_viewport_fmt(
	struct imageptr_t *imageptr,
	const struct vec2_t offset,
	const struct vec2_t size,
	const char *path,
	...
)
{
	struct imageptr_t *viewport = imageptr_viewport(
		imageptr,
		offset
	);

	va_list ap;

	va_start(ap, path);

	imageptr_log_dump_fmt_va(viewport, size, path, ap);

	va_end(ap);

	free(viewport);
}

struct imageptr_t *imageptr_create_llband(
	const struct vec2_t size,
	const struct vec2_t cb_size1
)
{
#if   defined(CONFIG_MEMORY_NORMAL)
	UNUSED(cb_size1);
	return imageptr_create_normal(size);
#elif defined(CONFIG_MEMORY_SPARSE)
	UNUSED(cb_size1);
#	ifdef CONFIG_MEMORY_HUGEPAGES
	return imageptr_create_sparse_huge(size);
#	else
	return imageptr_create_sparse(size);
#	endif
#elif defined(CONFIG_MEMORY_WINDOW)
#	ifdef CONFIG_MEMORY_HUGEPAGES
	return imageptr_create_window_huge(size, CONFIG_MEMORY_WINDOW_SIZE(cb_size1.y));
#	else
	return imageptr_create_window(size, CONFIG_MEMORY_WINDOW_SIZE(cb_size1.y));
#	endif
#else
#	error Either CONFIG_MEMORY_NORMAL, CONFIG_MEMORY_SPARSE or CONFIG_MEMORY_WINDOW must be defined.
#endif
}

void imageptr_destroy_llband(
	struct imageptr_t *imageptr,
	const struct vec2_t size,
	const struct vec2_t cb_size1
)
{
#if   defined(CONFIG_MEMORY_NORMAL)
	UNUSED(cb_size1);
	imageptr_destroy_normal(imageptr, size);
#elif defined(CONFIG_MEMORY_SPARSE)
	UNUSED(cb_size1);
#	ifdef CONFIG_MEMORY_HUGEPAGES
	imageptr_destroy_sparse_huge(imageptr, size);
#	else
	imageptr_destroy_sparse(imageptr, size);
#	endif
#elif defined(CONFIG_MEMORY_WINDOW)
#	ifdef CONFIG_MEMORY_HUGEPAGES
	imageptr_destroy_window_huge(imageptr, size, CONFIG_MEMORY_WINDOW_SIZE(cb_size1.y));
#	else
	imageptr_destroy_window(imageptr, size, CONFIG_MEMORY_WINDOW_SIZE(cb_size1.y));
#	endif
#else
#	error Either CONFIG_MEMORY_NORMAL, CONFIG_MEMORY_SPARSE or CONFIG_MEMORY_WINDOW must be defined.
#endif
}
