// gcc -lm -o compute_pll compare_pll.c

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdint.h>
#include <math.h>

typedef union dfixed {
        uint32_t full;
} fixed20_12;

#define dfixed_const(A) (uint32_t)(((A) << 12))/*  + ((B + 0.000122)*4096)) */
#define dfixed_const_half(A) (uint32_t)(((A) << 12) + 2048)
#define dfixed_const_666(A) (uint32_t)(((A) << 12) + 2731)
#define dfixed_const_8(A) (uint32_t)(((A) << 12) + 3277)
#define dfixed_mul(A, B) ((uint64_t)((uint64_t)(A).full * (B).full + 2048) >> 12)
#define dfixed_init(A) { .full = dfixed_const((A)) }
#define dfixed_init_half(A) { .full = dfixed_const_half((A)) }
#define dfixed_trunc(A) ((A).full >> 12)

static inline uint32_t dfixed_floor(fixed20_12 A)
{
        uint32_t non_frac = dfixed_trunc(A);

        return dfixed_const(non_frac);
}

static inline uint32_t dfixed_ceil(fixed20_12 A)
{
        uint32_t non_frac = dfixed_trunc(A);

        if (A.full > dfixed_const(non_frac))
                return dfixed_const(non_frac + 1);
        else
                return dfixed_const(non_frac);
}

static inline uint32_t dfixed_div(fixed20_12 A, fixed20_12 B)
{
        uint64_t tmp = ((uint64_t)A.full << 13);

        tmp /= B.full;
        tmp += 1;
        tmp /= 2;
        return (uint32_t)(tmp & 0xffffffff);
}

/* pll flags */
#define RADEON_PLL_USE_BIOS_DIVS        (1 << 0)
#define RADEON_PLL_NO_ODD_POST_DIV      (1 << 1)
#define RADEON_PLL_USE_REF_DIV          (1 << 2)
#define RADEON_PLL_LEGACY               (1 << 3)
#define RADEON_PLL_PREFER_LOW_REF_DIV   (1 << 4)
#define RADEON_PLL_PREFER_HIGH_REF_DIV  (1 << 5)
#define RADEON_PLL_PREFER_LOW_FB_DIV    (1 << 6)
#define RADEON_PLL_PREFER_HIGH_FB_DIV   (1 << 7)
#define RADEON_PLL_PREFER_LOW_POST_DIV  (1 << 8)
#define RADEON_PLL_PREFER_HIGH_POST_DIV (1 << 9)
#define RADEON_PLL_USE_FRAC_FB_DIV      (1 << 10)
#define RADEON_PLL_PREFER_CLOSEST_LOWER (1 << 11)
#define RADEON_PLL_PREFER_CLOSEST_HIGHER (1 << 12)
#define RADEON_PLL_USE_POST_DIV         (1 << 13)
#define RADEON_PLL_IS_LCD               (1 << 14)


struct radeon_pll {
        /* reference frequency */
        uint32_t reference_freq;

        /* fixed dividers */
        uint32_t reference_div;
        uint32_t post_div;

        /* pll in/out limits */
        uint32_t pll_in_min;
        uint32_t pll_in_max;
        uint32_t pll_out_min;
        uint32_t pll_out_max;
        uint32_t lcd_pll_out_min;
        uint32_t lcd_pll_out_max;
        uint32_t best_vco;

        /* divider limits */
        uint32_t min_ref_div;
        uint32_t max_ref_div;
        uint32_t min_post_div;
        uint32_t max_post_div;
        uint32_t min_feedback_div;
        uint32_t max_feedback_div;
        uint32_t min_frac_feedback_div;
        uint32_t max_frac_feedback_div;

        /* flags for the current clock */
        uint32_t flags;

        /* pll id */
        uint32_t id;
};

static int
calc_fb_div_fixed(struct radeon_pll *pll,
		  uint32_t freq,
		  uint32_t post_div,
		  uint32_t ref_div,
		  uint32_t *fb_div,
		  uint32_t *fb_div_frac)
{
	fixed20_12 feedback_divider, a, b;
	uint32_t vco_freq;

	vco_freq = freq * post_div;
	/* feedback_divider = vco_freq * ref_div / pll->reference_freq; */
	a.full = dfixed_const(pll->reference_freq);
	feedback_divider.full = dfixed_const(vco_freq);
	feedback_divider.full = dfixed_div(feedback_divider, a);
	a.full = dfixed_const(ref_div);
	feedback_divider.full = dfixed_mul(feedback_divider, a);

	if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) {
		/* feedback_divider = floor((feedback_divider * 10.0) + 0.5) * 0.1; */
		a.full = dfixed_const(10);
		feedback_divider.full = dfixed_mul(feedback_divider, a);
		feedback_divider.full += dfixed_const_half(0);
		feedback_divider.full = dfixed_floor(feedback_divider);
		feedback_divider.full = dfixed_div(feedback_divider, a);

		/* *fb_div = floor(feedback_divider); */
		a.full = dfixed_floor(feedback_divider);
		*fb_div = dfixed_trunc(a);
		/* *fb_div_frac = fmod(feedback_divider, 1.0) * 10.0; */
		a.full = dfixed_const(10);
		b.full = dfixed_mul(feedback_divider, a);

		feedback_divider.full = dfixed_floor(feedback_divider);
		feedback_divider.full = dfixed_mul(feedback_divider, a);
		feedback_divider.full = b.full - feedback_divider.full;
		*fb_div_frac = dfixed_trunc(feedback_divider);
	} else {
		/* *fb_div = floor(feedback_divider + 0.5); */
		feedback_divider.full += dfixed_const_half(0);
		feedback_divider.full = dfixed_floor(feedback_divider);

		*fb_div = dfixed_trunc(feedback_divider);
		*fb_div_frac = 0;
	}

	if (((*fb_div) < pll->min_feedback_div) || ((*fb_div) > pll->max_feedback_div))
		return 0;
	else
		return 1;
}

static int
calc_fb_ref_div_fixed(struct radeon_pll *pll,
		      uint32_t freq,
		      uint32_t post_div,
		      uint32_t *fb_div,
		      uint32_t *fb_div_frac,
		      uint32_t *ref_div)
{
	fixed20_12 ffreq, max_error, error, pll_out, a;
	uint32_t vco;
	uint32_t pll_out_min, pll_out_max;

	if (pll->flags & RADEON_PLL_IS_LCD) {
		pll_out_min = pll->lcd_pll_out_min;
		pll_out_max = pll->lcd_pll_out_max;
	} else {
		pll_out_min = pll->pll_out_min;
		pll_out_max = pll->pll_out_max;
	}

	ffreq.full = dfixed_const(freq);
	/* max_error = ffreq * 0.0025; */
	a.full = dfixed_const(400);
	max_error.full = dfixed_div(ffreq, a);

	for ((*ref_div) = pll->min_ref_div; (*ref_div) < pll->max_ref_div; ++(*ref_div)) {
		if (calc_fb_div_fixed(pll, freq, post_div, (*ref_div), fb_div, fb_div_frac)) {
			vco = pll->reference_freq * (((*fb_div) * 10) + (*fb_div_frac));
			vco = vco / ((*ref_div) * 10);

			if ((vco < pll_out_min) || (vco > pll_out_max))
				continue;

			/* pll_out = vco / post_div; */
			a.full = dfixed_const(post_div);
			pll_out.full = dfixed_const(vco);
			pll_out.full = dfixed_div(pll_out, a);

			if (pll_out.full >= ffreq.full) {
				error.full = pll_out.full - ffreq.full;
				if (error.full <= max_error.full)
					return 1;
			}
		}
	}
	return 0;
}

static void radeon_compute_pll_fixed(struct radeon_pll *pll,
				     uint32_t freq,
				     uint32_t *dot_clock_p,
				     uint32_t *fb_div_p,
				     uint32_t *frac_fb_div_p,
				     uint32_t *ref_div_p,
				     uint32_t *post_div_p)
{
	uint32_t fb_div = 0, fb_div_frac = 0, post_div = 0, ref_div = 0;
	uint32_t best_freq = 0, vco_frequency;
	uint32_t pll_out_min, pll_out_max;

	if (pll->flags & RADEON_PLL_IS_LCD) {
		pll_out_min = pll->lcd_pll_out_min;
		pll_out_max = pll->lcd_pll_out_max;
	} else {
		pll_out_min = pll->pll_out_min;
		pll_out_max = pll->pll_out_max;
	}

	/* freq = freq / 10; */
	freq /= 10;

	if (pll->flags & RADEON_PLL_USE_POST_DIV) {
		post_div = pll->post_div;
		if ((post_div < pll->min_post_div) || (post_div > pll->max_post_div))
			goto done;

		vco_frequency = freq * post_div;
		if ((vco_frequency < pll_out_min) || (vco_frequency > pll_out_max))
			goto done;

		if (pll->flags & RADEON_PLL_USE_REF_DIV) {
			ref_div = pll->reference_div;
			if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div))
				goto done;
			if (!calc_fb_div_fixed(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac))
				goto done;
		}
	} else {
		for (post_div = pll->max_post_div; post_div >= pll->min_post_div; --post_div) {
			if (pll->flags & RADEON_PLL_LEGACY) {
				if ((post_div == 5) ||
				    (post_div == 7) ||
				    (post_div == 9) ||
				    (post_div == 10) ||
				    (post_div == 11))
					continue;
			}

			if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
				continue;

			vco_frequency = freq * post_div;
			if ((vco_frequency < pll_out_min) || (vco_frequency > pll_out_max))
				continue;
			if (pll->flags & RADEON_PLL_USE_REF_DIV) {
				ref_div = pll->reference_div;
				if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div))
					goto done;
				if (calc_fb_div_fixed(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac))
					break;
			} else {
				if (calc_fb_ref_div_fixed(pll, freq, post_div, &fb_div, &fb_div_frac, &ref_div))
					break;
			}
		}
	}

	best_freq = pll->reference_freq * 10 * fb_div;
	best_freq += pll->reference_freq * fb_div_frac;
	best_freq = best_freq / (ref_div * post_div);

done:
	if (best_freq == 0)
		printf("Couldn't find valid PLL dividers\n");

	*dot_clock_p = best_freq;
	*fb_div_p = fb_div;
	*frac_fb_div_p = fb_div_frac;
	*ref_div_p = ref_div;
	*post_div_p = post_div;

	//printf("fixed: %u\t%d.%d,\t%d,\t%d\n", *dot_clock_p, *fb_div_p,
	//     *frac_fb_div_p, *ref_div_p, *post_div_p);
}

static int
calc_fb_div_float(struct radeon_pll *pll,
		  uint32_t freq,
		  int post_div,
		  int ref_div,
		  int *fb_div,
		  int *fb_div_frac)
{
	float ffreq = freq / 10;
	float vco_freq = ffreq * post_div;
	float feedback_divider = vco_freq * ref_div / pll->reference_freq;

	if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) {
		feedback_divider = floor((feedback_divider * 10.0) + 0.5) * 0.1;

		*fb_div = floor(feedback_divider);
		*fb_div_frac = fmod(feedback_divider, 1.0) * 10.0;

	} else {
		*fb_div = floor(feedback_divider + 0.5);
		*fb_div_frac = 0;
	}
	if ((*fb_div < pll->min_feedback_div) || (*fb_div > pll->max_feedback_div))
		return 0;
	else
		return 1;
}

static int
calc_fb_ref_div_float(struct radeon_pll *pll,
		      uint32_t freq,
		      int post_div,
		      int *fb_div,
		      int *fb_div_frac,
		      int *ref_div)
{
	float ffreq = freq / 10;
	float max_error = ffreq * 0.0025;
	float vco, error, pll_out;

	for ((*ref_div) = pll->min_ref_div; (*ref_div) < pll->max_ref_div; ++(*ref_div)) {
		if (calc_fb_div_float(pll, freq, post_div, (*ref_div), fb_div, fb_div_frac)) {
			vco = pll->reference_freq * ((*fb_div) + ((*fb_div_frac) * 0.1)) / (*ref_div);

			if ((vco < pll->pll_out_min) || (vco > pll->pll_out_max))
				continue;

			pll_out = vco / post_div;

			error = pll_out - ffreq;
			if ((fabs(error) <= max_error) && (error >= 0))
				return 1;
		}
	}
	return 0;
}

static void
radeon_compute_pll_float(struct radeon_pll *pll,
			 uint32_t freq,
			 uint32_t *chosen_dot_clock_freq,
			 uint32_t *chosen_feedback_div,
			 uint32_t *chosen_frac_feedback_div,
			 uint32_t *chosen_reference_div,
			 uint32_t *chosen_post_div)
{
	float ffreq = freq / 10;
	float vco_frequency;
	int fb_div = 0, fb_div_frac = 0, post_div = 0, ref_div = 0;
	uint32_t best_freq = 0;

	if (pll->flags & RADEON_PLL_USE_POST_DIV) {
		post_div = pll->post_div;
		if ((post_div < pll->min_post_div) || (post_div > pll->max_post_div))
			goto done;
		vco_frequency = ffreq * post_div;
		if ((vco_frequency < pll->pll_out_min) || (vco_frequency > pll->pll_out_max))
			goto done;

		if (pll->flags & RADEON_PLL_USE_REF_DIV) {
			ref_div = pll->reference_div;
			if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div))
				goto done;
			if (!calc_fb_div_float(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac))
				goto done;
		}
	} else {
		for (post_div = pll->max_post_div; post_div >= pll->min_post_div; --post_div) {
			if (pll->flags & RADEON_PLL_LEGACY) {
				if ((post_div == 5) ||
				    (post_div == 7) ||
				    (post_div == 9) ||
				    (post_div == 10) ||
				    (post_div == 11))
					continue;
			}
			if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
				continue;

			vco_frequency = ffreq * post_div;
			if ((vco_frequency < pll->pll_out_min) || (vco_frequency > pll->pll_out_max))
				continue;
			if (pll->flags & RADEON_PLL_USE_REF_DIV) {
				ref_div = pll->reference_div;
				if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div))
					goto done;
				if (calc_fb_div_float(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac))
					break;
			} else {
				if (calc_fb_ref_div_float(pll, freq, post_div, &fb_div, &fb_div_frac, &ref_div))
					break;
			}
		}
	}

	best_freq = pll->reference_freq * 10 * fb_div;
	best_freq += pll->reference_freq * fb_div_frac;
	best_freq = best_freq / (ref_div * post_div);

done:
	if (best_freq == 0)
		printf("Couldn't find valid PLL dividers\n");

	*chosen_dot_clock_freq = best_freq;
	*chosen_feedback_div = fb_div;
	*chosen_frac_feedback_div = fb_div_frac;
	*chosen_reference_div = ref_div;
	*chosen_post_div = post_div;

	//printf("float: %u\t%d.%d,\t%d,\t%d\n", *chosen_dot_clock_freq, *chosen_feedback_div,
	//*chosen_frac_feedback_div, *chosen_reference_div, *chosen_post_div);
}

int main(int argc,char *argv[])
{
    uint32_t dotclock = 0;
    struct radeon_pll pll;
    uint32_t chosen_clock_fixed, fb_div_fixed, frac_fb_div_fixed, ref_div_fixed, post_div_fixed;
    uint32_t chosen_clock_float, fb_div_float, frac_fb_div_float, ref_div_float, post_div_float;

    if (argc > 1) {
	dotclock = strtoul(argv[1], NULL, 0);
	printf("dotclock: %d\n", dotclock);
    }

    pll.flags = 0;
#if 1
    //rs690
    pll.reference_freq = 1432;
    pll.reference_div = 13;
    //pll.pll_out_min = 80000;
    pll.pll_out_min = 64800;
    pll.pll_out_max = 120000;
    pll.pll_in_min = 100;
    pll.pll_in_max = 1350;
    pll.min_post_div = 2;
    pll.max_post_div = 0x7f;
#else
    //rv250
    pll.reference_freq = 2700;
    pll.reference_div = 12;
    pll.pll_out_min = 20000;
    pll.pll_out_max = 35000;
    pll.pll_in_min = 40;
    pll.pll_in_max = 3000;
    pll.min_post_div = 1;
    pll.max_post_div = 12;//16;
#endif

    pll.min_frac_feedback_div = 0;
    pll.max_frac_feedback_div = 9;
    pll.min_ref_div = 2;
    pll.max_ref_div = 0x3ff;
    pll.min_feedback_div = 4;
    pll.max_feedback_div = 0x7ff;
    pll.best_vco = 0;

    if (dotclock != 0) {
	    radeon_compute_pll_fixed(&pll,
				     dotclock,
				     &chosen_clock_fixed,
				     &fb_div_fixed,
				     &frac_fb_div_fixed,
				     &ref_div_fixed,
				     &post_div_fixed);
	    radeon_compute_pll_float(&pll,
				     dotclock,
				     &chosen_clock_float,
				     &fb_div_float,
				     &frac_fb_div_float,
				     &ref_div_float,
				     &post_div_float);
	    printf("fixed: %u\t%u\t%d.%d,\t%d,\t%d\n", dotclock, chosen_clock_fixed,
		   fb_div_fixed, frac_fb_div_fixed, ref_div_fixed, post_div_fixed);
	    printf("float: %u\t%u\t%d.%d,\t%d,\t%d\n", dotclock, chosen_clock_float,
		   fb_div_float, frac_fb_div_float, ref_div_float, post_div_float);
    } else {
	    for (dotclock = 12000; dotclock <= 400000; dotclock += 10) {
		    radeon_compute_pll_fixed(&pll,
					     dotclock,
					     &chosen_clock_fixed,
					     &fb_div_fixed,
					     &frac_fb_div_fixed,
					     &ref_div_fixed,
					     &post_div_fixed);
		    radeon_compute_pll_float(&pll,
					     dotclock,
					     &chosen_clock_float,
					     &fb_div_float,
					     &frac_fb_div_float,
					     &ref_div_float,
					     &post_div_float);
		    //if (chosen_clock_fixed != chosen_clock_float) {
		    if ((fb_div_fixed != fb_div_float) ||
			(frac_fb_div_fixed != frac_fb_div_float) ||
			(ref_div_fixed != ref_div_float) ||
			(post_div_fixed != post_div_float)) {
			    printf("fixed: %u\t%u\t%d.%d,\t%d,\t%d\n", dotclock, chosen_clock_fixed,
				   fb_div_fixed, frac_fb_div_fixed, ref_div_fixed, post_div_fixed);
			    printf("float: %u\t%u\t%d.%d,\t%d,\t%d\n", dotclock, chosen_clock_float,
				   fb_div_float, frac_fb_div_float, ref_div_float, post_div_float);
		    }
	    }
    }

    return 0;
}
