* [PATCH 1/3] drm/nouveau: Update the CRTC arbitration parameters on FB depth switch.
@ 2009-11-23 13:45 Francisco Jerez
[not found] ` <1258983953-15349-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
0 siblings, 1 reply; 4+ messages in thread
From: Francisco Jerez @ 2009-11-23 13:45 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
drivers/gpu/drm/nouveau/nv04_crtc.c | 37 +++++++++++++++++++++-------------
1 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index 2ab9f30..0a5cfc1 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -106,10 +106,8 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
struct nv04_mode_state *state = &dev_priv->mode_reg;
struct nv04_crtc_reg *regp = &state->crtc_reg[nv_crtc->index];
- struct drm_framebuffer *fb = crtc->fb;
struct nouveau_pll_vals *pv = ®p->pllvals;
struct pll_lims pll_lim;
- int vclk, arb_burst, arb_fifo_lwm;
if (get_pll_limits(dev, nv_crtc->index ? VPLL2 : VPLL1, &pll_lim))
return;
@@ -130,8 +128,7 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
if (dev_priv->chipset > 0x40 && dot_clock <= (pll_lim.vco1.maxfreq / 2))
memset(&pll_lim.vco2, 0, sizeof(pll_lim.vco2));
- vclk = nouveau_calc_pll_mnp(dev, &pll_lim, dot_clock, pv);
- if (!vclk)
+ if (!nouveau_calc_pll_mnp(dev, &pll_lim, dot_clock, pv))
return;
state->pllsel &= PLLSEL_VPLL1_MASK | PLLSEL_VPLL2_MASK | PLLSEL_TV_MASK;
@@ -152,13 +149,6 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
NV_TRACE(dev, "vpll: n %d m %d log2p %d\n",
pv->N1, pv->M1, pv->log2P);
- nouveau_calc_arb(dev, vclk, fb->bits_per_pixel, &arb_burst, &arb_fifo_lwm);
-
- regp->CRTC[NV_CIO_CRE_FF_INDEX] = arb_burst;
- regp->CRTC[NV_CIO_CRE_FFLWM__INDEX] = arb_fifo_lwm & 0xff;
- if (nv_arch(dev) >= NV_30)
- regp->CRTC[NV_CIO_CRE_47] = arb_fifo_lwm >> 8;
-
nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.offset);
}
@@ -775,10 +765,12 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
struct drm_framebuffer *old_fb)
{
struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
- struct drm_nouveau_private *dev_priv = crtc->dev->dev_private;
+ struct drm_device *dev = crtc->dev;
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nv04_crtc_reg *regp = &dev_priv->mode_reg.crtc_reg[nv_crtc->index];
struct drm_framebuffer *drm_fb = nv_crtc->base.fb;
struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
+ int arb_burst, arb_lwm;
int ret;
ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM);
@@ -797,13 +789,14 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
nv_crtc_gamma_load(crtc);
}
+ /* Update the framebuffer format. */
regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] &= ~3;
regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] |= (crtc->fb->depth + 1) / 8;
regp->ramdac_gen_ctrl &= ~NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL;
if (crtc->fb->depth == 16)
regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL;
crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_PIXEL_INDEX);
- NVWriteRAMDAC(crtc->dev, nv_crtc->index, NV_PRAMDAC_GENERAL_CONTROL,
+ NVWriteRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_GENERAL_CONTROL,
regp->ramdac_gen_ctrl);
regp->CRTC[NV_CIO_CR_OFFSET_INDEX] = drm_fb->pitch >> 3;
@@ -812,9 +805,25 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_RPC0_INDEX);
crtc_wr_cio_state(crtc, regp, NV_CIO_CR_OFFSET_INDEX);
+ /* Update the framebuffer location. */
regp->fb_start = nv_crtc->fb.offset & ~3;
regp->fb_start += (y * drm_fb->pitch) + (x * drm_fb->bits_per_pixel / 8);
- NVWriteCRTC(crtc->dev, nv_crtc->index, NV_PCRTC_START, regp->fb_start);
+ NVWriteCRTC(dev, nv_crtc->index, NV_PCRTC_START, regp->fb_start);
+
+ /* Update the arbitration parameters. */
+ nouveau_calc_arb(dev, crtc->mode.clock, drm_fb->bits_per_pixel,
+ &arb_burst, &arb_lwm);
+
+ regp->CRTC[NV_CIO_CRE_FF_INDEX] = arb_burst;
+ regp->CRTC[NV_CIO_CRE_FFLWM__INDEX] = arb_lwm & 0xff;
+ crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FF_INDEX);
+ crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FFLWM__INDEX);
+
+ if (nv_arch(dev) >= NV_30) {
+ regp->CRTC[NV_CIO_CRE_47] = arb_lwm >> 8;
+ crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_47);
+ }
+
return 0;
}
--
1.6.4.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] drm/nouveau: Clean up the arbitration parameters calculation code.
[not found] ` <1258983953-15349-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
@ 2009-11-23 13:45 ` Francisco Jerez
2009-11-23 13:45 ` [PATCH 3/3] drm/nv10-nv20: CRTC arbitration code rewrite Francisco Jerez
1 sibling, 0 replies; 4+ messages in thread
From: Francisco Jerez @ 2009-11-23 13:45 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
drivers/gpu/drm/nouveau/nouveau_calc.c | 248 +++++++++-----------------------
1 files changed, 69 insertions(+), 179 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c
index 3f80db8..5d48274 100644
--- a/drivers/gpu/drm/nouveau/nouveau_calc.c
+++ b/drivers/gpu/drm/nouveau/nouveau_calc.c
@@ -34,35 +34,31 @@
\****************************************************************************/
struct nv_fifo_info {
- int graphics_lwm;
- int video_lwm;
- int graphics_burst_size;
- int video_burst_size;
- bool valid;
+ int lwm;
+ int burst;
};
struct nv_sim_state {
int pclk_khz;
int mclk_khz;
int nvclk_khz;
- int pix_bpp;
- bool enable_mp;
- bool enable_video;
+ int bpp;
int mem_page_miss;
int mem_latency;
int memory_type;
int memory_width;
+ int two_heads;
};
static void
-nv4CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
+nv04_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
{
- int pagemiss, cas, width, video_enable, bpp;
- int nvclks, mclks, pclks, vpagemiss, crtpagemiss, vbs;
+ int pagemiss, cas, width, bpp;
+ int nvclks, mclks, pclks, crtpagemiss;
int found, mclk_extra, mclk_loop, cbs, m1, p1;
- int mclk_freq, pclk_freq, nvclk_freq, mp_enable;
- int us_m, us_n, us_p, video_drain_rate, crtc_drain_rate;
- int vpm_us, us_video, vlwm, video_fill_us, cpm_us, us_crt, clwm;
+ int mclk_freq, pclk_freq, nvclk_freq;
+ int us_m, us_n, us_p, crtc_drain_rate;
+ int cpm_us, us_crt, clwm;
pclk_freq = arb->pclk_khz;
mclk_freq = arb->mclk_khz;
@@ -70,107 +66,53 @@ nv4CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
pagemiss = arb->mem_page_miss;
cas = arb->mem_latency;
width = arb->memory_width >> 6;
- video_enable = arb->enable_video;
- bpp = arb->pix_bpp;
- mp_enable = arb->enable_mp;
- clwm = 0;
- vlwm = 0;
+ bpp = arb->bpp;
cbs = 128;
+
pclks = 2;
- nvclks = 2;
- nvclks += 2;
- nvclks += 1;
- mclks = 5;
- mclks += 3;
- mclks += 1;
- mclks += cas;
- mclks += 1;
- mclks += 1;
- mclks += 1;
- mclks += 1;
+ nvclks = 10;
+ mclks = 13 + cas;
mclk_extra = 3;
- nvclks += 2;
- nvclks += 1;
- nvclks += 1;
- nvclks += 1;
- if (mp_enable)
- mclks += 4;
- nvclks += 0;
- pclks += 0;
found = 0;
- vbs = 0;
- while (found != 1) {
- fifo->valid = true;
+
+ while (!found) {
found = 1;
+
mclk_loop = mclks + mclk_extra;
us_m = mclk_loop * 1000 * 1000 / mclk_freq;
us_n = nvclks * 1000 * 1000 / nvclk_freq;
us_p = nvclks * 1000 * 1000 / pclk_freq;
- if (video_enable) {
- video_drain_rate = pclk_freq * 2;
- crtc_drain_rate = pclk_freq * bpp / 8;
- vpagemiss = 2;
- vpagemiss += 1;
- crtpagemiss = 2;
- vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
- if (nvclk_freq * 2 > mclk_freq * width)
- video_fill_us = cbs * 1000 * 1000 / 16 / nvclk_freq;
- else
- video_fill_us = cbs * 1000 * 1000 / (8 * width) / mclk_freq;
- us_video = vpm_us + us_m + us_n + us_p + video_fill_us;
- vlwm = us_video * video_drain_rate / (1000 * 1000);
- vlwm++;
- vbs = 128;
- if (vlwm > 128)
- vbs = 64;
- if (vlwm > (256 - 64))
- vbs = 32;
- if (nvclk_freq * 2 > mclk_freq * width)
- video_fill_us = vbs * 1000 * 1000 / 16 / nvclk_freq;
- else
- video_fill_us = vbs * 1000 * 1000 / (8 * width) / mclk_freq;
- cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
- us_crt = us_video + video_fill_us + cpm_us + us_m + us_n + us_p;
- clwm = us_crt * crtc_drain_rate / (1000 * 1000);
- clwm++;
- } else {
- crtc_drain_rate = pclk_freq * bpp / 8;
- crtpagemiss = 2;
- crtpagemiss += 1;
- cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
- us_crt = cpm_us + us_m + us_n + us_p;
- clwm = us_crt * crtc_drain_rate / (1000 * 1000);
- clwm++;
- }
+
+ crtc_drain_rate = pclk_freq * bpp / 8;
+ crtpagemiss = 2;
+ crtpagemiss += 1;
+ cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
+ us_crt = cpm_us + us_m + us_n + us_p;
+ clwm = us_crt * crtc_drain_rate / (1000 * 1000);
+ clwm++;
+
m1 = clwm + cbs - 512;
p1 = m1 * pclk_freq / mclk_freq;
p1 = p1 * bpp / 8;
- if ((p1 < m1 && m1 > 0) ||
- (video_enable && (clwm > 511 || vlwm > 255)) ||
- (!video_enable && clwm > 519)) {
- fifo->valid = false;
+ if ((p1 < m1 && m1 > 0) || clwm > 519) {
found = !mclk_extra;
mclk_extra--;
}
if (clwm < 384)
clwm = 384;
- if (vlwm < 128)
- vlwm = 128;
- fifo->graphics_lwm = clwm;
- fifo->graphics_burst_size = 128;
- fifo->video_lwm = vlwm + 15;
- fifo->video_burst_size = vbs;
+
+ fifo->lwm = clwm;
+ fifo->burst = cbs;
}
}
static void
-nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
+nv10_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
{
- int pagemiss, width, video_enable, bpp;
+ int pagemiss, width, bpp;
int nvclks, mclks, pclks, vpagemiss, crtpagemiss;
- int nvclk_fill;
int found, mclk_extra, mclk_loop, cbs, m1;
- int mclk_freq, pclk_freq, nvclk_freq, mp_enable;
+ int mclk_freq, pclk_freq, nvclk_freq;
int us_m, us_m_min, us_n, us_p, crtc_drain_rate;
int vus_m;
int vpm_us, us_video, cpm_us, us_crt, clwm;
@@ -184,9 +126,7 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
nvclk_freq = arb->nvclk_khz;
pagemiss = arb->mem_page_miss;
width = arb->memory_width / 64;
- video_enable = arb->enable_video;
- bpp = arb->pix_bpp;
- mp_enable = arb->enable_mp;
+ bpp = arb->bpp;
clwm = 0;
cbs = 512;
pclks = 4; /* lwm detect. */
@@ -210,29 +150,20 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
else
mclks += 1;
- if (!video_enable && arb->memory_width == 128) {
- mclk_extra = (bpp == 32) ? 31 : 42; /* Margin of error */
- min_mclk_extra = 17;
- } else {
- mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */
- /* mclk_extra = 4; *//* Margin of error */
- min_mclk_extra = 18;
- }
+ mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */
+ min_mclk_extra = 18;
nvclks += 1; /* 2 edge sync. may be very close to edge so just put one. */
nvclks += 1; /* fbi_d_rdv_n */
nvclks += 1; /* Fbi_d_rdata */
nvclks += 1; /* crtfifo load */
- if (mp_enable)
- mclks += 4; /* Mp can get in with a burst of 8. */
/* Extra clocks determined by heuristics */
nvclks += 0;
pclks += 0;
found = 0;
while (found != 1) {
- fifo->valid = true;
found = 1;
mclk_loop = mclks + mclk_extra;
us_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */
@@ -244,50 +175,24 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
vus_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */
- if (video_enable) {
- crtc_drain_rate = pclk_freq * bpp / 8; /* MB/s */
-
- vpagemiss = 1; /* self generating page miss */
- vpagemiss += 1; /* One higher priority before */
-
- crtpagemiss = 2; /* self generating page miss */
- if (mp_enable)
- crtpagemiss += 1; /* if MA0 conflict */
-
- vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
-
- us_video = vpm_us + vus_m; /* Video has separate read return path */
-
- cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
- us_crt = us_video /* Wait for video */
- + cpm_us /* CRT Page miss */
- + us_m + us_n + us_p; /* other latency */
-
- clwm = us_crt * crtc_drain_rate / (1000 * 1000);
- clwm++; /* fixed point <= float_point - 1. Fixes that */
- } else {
- crtc_drain_rate = pclk_freq * bpp / 8; /* bpp * pclk/8 */
-
- crtpagemiss = 1; /* self generating page miss */
- crtpagemiss += 1; /* MA0 page miss */
- if (mp_enable)
- crtpagemiss += 1; /* if MA0 conflict */
- cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
- us_crt = cpm_us + us_m + us_n + us_p;
- clwm = us_crt * crtc_drain_rate / (1000 * 1000);
- clwm++; /* fixed point <= float_point - 1. Fixes that */
-
- /* Finally, a heuristic check when width == 64 bits */
- if (width == 1) {
- nvclk_fill = nvclk_freq * 8;
- if (crtc_drain_rate * 100 >= nvclk_fill * 102)
- clwm = 0xfff; /* Large number to fail */
- else if (crtc_drain_rate * 100 >= nvclk_fill * 98) {
- clwm = 1024;
- cbs = 512;
- }
- }
- }
+ crtc_drain_rate = pclk_freq * bpp / 8; /* MB/s */
+
+ vpagemiss = 1; /* self generating page miss */
+ vpagemiss += 1; /* One higher priority before */
+
+ crtpagemiss = 2; /* self generating page miss */
+
+ vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
+
+ us_video = vpm_us + vus_m; /* Video has separate read return path */
+
+ cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
+ us_crt = us_video /* Wait for video */
+ + cpm_us /* CRT Page miss */
+ + us_m + us_n + us_p; /* other latency */
+
+ clwm = us_crt * crtc_drain_rate / (1000 * 1000);
+ clwm++; /* fixed point <= float_point - 1. Fixes that */
/*
* Overfill check:
@@ -305,7 +210,6 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
p2 = p1clk * bpp / 8; /* bytes drained. */
if (p2 < m1 && m1 > 0) {
- fifo->valid = false;
found = 0;
if (min_mclk_extra == 0) {
if (cbs <= 32)
@@ -315,7 +219,6 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
} else
min_mclk_extra--;
} else if (clwm > 1023) { /* Have some margin */
- fifo->valid = false;
found = 0;
if (min_mclk_extra == 0)
found = 1; /* Can't adjust anymore! */
@@ -330,17 +233,14 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
clwm = min_clwm;
/* printf("CRT LWM: prog: 0x%x, bs: 256\n", clwm); */
- fifo->graphics_lwm = clwm;
- fifo->graphics_burst_size = cbs;
-
- fifo->video_lwm = 1024;
- fifo->video_burst_size = 512;
+ fifo->lwm = clwm;
+ fifo->burst = cbs;
}
}
static void
-nv4_10UpdateArbitrationSettings(struct drm_device *dev, int VClk, int bpp,
- int *burst, int *lwm)
+nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
+ int *burst, int *lwm)
{
struct nv_fifo_info fifo_data;
struct nv_sim_state sim_data;
@@ -351,21 +251,19 @@ nv4_10UpdateArbitrationSettings(struct drm_device *dev, int VClk, int bpp,
sim_data.pclk_khz = VClk;
sim_data.mclk_khz = MClk;
sim_data.nvclk_khz = NVClk;
- sim_data.pix_bpp = bpp;
- sim_data.enable_mp = false;
+ sim_data.bpp = bpp;
+ sim_data.two_heads = nv_two_heads(dev);
if ((dev->pci_device & 0xffff) == 0x01a0 /*CHIPSET_NFORCE*/ ||
(dev->pci_device & 0xffff) == 0x01f0 /*CHIPSET_NFORCE2*/) {
uint32_t type;
pci_read_config_dword(pci_get_bus_and_slot(0, 1), 0x7c, &type);
- sim_data.enable_video = false;
sim_data.memory_type = (type >> 12) & 1;
sim_data.memory_width = 64;
sim_data.mem_latency = 3;
sim_data.mem_page_miss = 10;
} else {
- sim_data.enable_video = (nv_arch(dev) != NV_04);
sim_data.memory_type = nvReadFB(dev, NV_PFB_CFG0) & 0x1;
sim_data.memory_width = (nvReadEXTDEV(dev, NV_PEXTDEV_BOOT_0) & 0x10) ? 128 : 64;
sim_data.mem_latency = cfg1 & 0xf;
@@ -373,21 +271,16 @@ nv4_10UpdateArbitrationSettings(struct drm_device *dev, int VClk, int bpp,
}
if (nv_arch(dev) == NV_04)
- nv4CalcArbitration(&fifo_data, &sim_data);
+ nv04_calc_arb(&fifo_data, &sim_data);
else
- nv10CalcArbitration(&fifo_data, &sim_data);
-
- if (fifo_data.valid) {
- int b = fifo_data.graphics_burst_size >> 4;
- *burst = 0;
- while (b >>= 1)
- (*burst)++;
- *lwm = fifo_data.graphics_lwm >> 3;
- }
+ nv10_calc_arb(&fifo_data, &sim_data);
+
+ *burst = ilog2(fifo_data.burst >> 4);
+ *lwm = fifo_data.lwm >> 3;
}
static void
-nv30UpdateArbitrationSettings(int *burst, int *lwm)
+nv30_update_arb(int *burst, int *lwm)
{
unsigned int fifo_size, burst_size, graphics_lwm;
@@ -395,10 +288,7 @@ nv30UpdateArbitrationSettings(int *burst, int *lwm)
burst_size = 512;
graphics_lwm = fifo_size - burst_size;
- *burst = 0;
- burst_size >>= 5;
- while (burst_size >>= 1)
- (*burst)++;
+ *burst = ilog2(burst_size >> 5);
*lwm = graphics_lwm >> 3;
}
@@ -406,13 +296,13 @@ void
nouveau_calc_arb(struct drm_device *dev, int vclk, int bpp, int *burst, int *lwm)
{
if (nv_arch(dev) < NV_30)
- nv4_10UpdateArbitrationSettings(dev, vclk, bpp, burst, lwm);
+ nv04_update_arb(dev, vclk, bpp, burst, lwm);
else if ((dev->pci_device & 0xfff0) == 0x0240 /*CHIPSET_C51*/ ||
(dev->pci_device & 0xfff0) == 0x03d0 /*CHIPSET_C512*/) {
*burst = 128;
*lwm = 0x0480;
} else
- nv30UpdateArbitrationSettings(burst, lwm);
+ nv30_update_arb(burst, lwm);
}
static int
--
1.6.4.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] drm/nv10-nv20: CRTC arbitration code rewrite.
[not found] ` <1258983953-15349-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-11-23 13:45 ` [PATCH 2/3] drm/nouveau: Clean up the arbitration parameters calculation code Francisco Jerez
@ 2009-11-23 13:45 ` Francisco Jerez
[not found] ` <1258983953-15349-3-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
1 sibling, 1 reply; 4+ messages in thread
From: Francisco Jerez @ 2009-11-23 13:45 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
The previous bandwidth calculation code was nv legacy and it had some
issues besides being obfuscated:
* It assumed a single-head setup (I workarounded this on 5603fe7f,
however it triggered bug 24820).
* It could lead to unnecessarily conservative settings, because it
assumed a 1kB FIFO size limit (like nv10/nv15, but not nv11/nv17).
* It sometimes set unacceptably large FIFO burst values, screwing
latency and causing some overlay corruption (bug 11993).
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
drivers/gpu/drm/nouveau/nouveau_calc.c | 182 ++++++++++++--------------------
1 files changed, 67 insertions(+), 115 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c
index 5d48274..1f85715 100644
--- a/drivers/gpu/drm/nouveau/nouveau_calc.c
+++ b/drivers/gpu/drm/nouveau/nouveau_calc.c
@@ -109,133 +109,85 @@ nv04_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
static void
nv10_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
{
- int pagemiss, width, bpp;
- int nvclks, mclks, pclks, vpagemiss, crtpagemiss;
- int found, mclk_extra, mclk_loop, cbs, m1;
- int mclk_freq, pclk_freq, nvclk_freq;
- int us_m, us_m_min, us_n, us_p, crtc_drain_rate;
- int vus_m;
- int vpm_us, us_video, cpm_us, us_crt, clwm;
- int clwm_rnd_down, min_clwm;
- int m2us, us_pipe_min, p1clk, p2;
- int min_mclk_extra;
- int us_min_mclk_extra;
-
- pclk_freq = arb->pclk_khz; /* freq in KHz */
- mclk_freq = arb->mclk_khz;
+ int fill_rate, drain_rate;
+ int pclks, nvclks, mclks, xclks;
+ int pclk_freq, nvclk_freq, mclk_freq;
+ int fill_lat, extra_lat;
+ int max_burst_o, max_burst_l, burst;
+ int fifo_len, min_lwm, max_lwm;
+ const int burst_lat = 80; /* Maximum allowable latency due
+ * to the CRTC FIFO burst. (ns) */
+
+ pclk_freq = arb->pclk_khz;
nvclk_freq = arb->nvclk_khz;
- pagemiss = arb->mem_page_miss;
- width = arb->memory_width / 64;
- bpp = arb->bpp;
- clwm = 0;
- cbs = 512;
+ mclk_freq = arb->mclk_khz;
+
+ fill_rate = mclk_freq * arb->memory_width / 8; /* kB/s */
+ drain_rate = pclk_freq * arb->bpp / 8; /* kB/s */
+
+ fifo_len = arb->two_heads ? 1536 : 1024; /* B */
+
+ /* Fixed FIFO refill latency. */
+
pclks = 4; /* lwm detect. */
- nvclks = 3; /* lwm -> sync. */
- nvclks += 2; /* fbi bus cycles (1 req + 1 busy) */
- mclks = 1; /* 2 edge sync. may be very close to edge so just put one. */
- mclks += 1; /* arb_hp_req */
- mclks += 5; /* ap_hp_req tiling pipeline */
- mclks += 2; /* tc_req latency fifo */
- mclks += 2; /* fb_cas_n_ memory request to fbio block */
- mclks += 7; /* sm_d_rdv data returned from fbio block */
-
- /* fb.rd.d.Put_gc need to accumulate 256 bits for read */
- if (arb->memory_type == 0) {
- if (arb->memory_width == 64) /* 64 bit bus */
- mclks += 4;
- else
- mclks += 2;
- } else if (arb->memory_width == 64) /* 64 bit bus */
- mclks += 2;
- else
- mclks += 1;
- mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */
- min_mclk_extra = 18;
+ nvclks = 3 /* lwm -> sync. */
+ + 2 /* fbi bus cycles (1 req + 1 busy) */
+ + 1 /* 2 edge sync. may be very close to edge so
+ * just put one. */
+ + 1 /* fbi_d_rdv_n */
+ + 1 /* Fbi_d_rdata */
+ + 1; /* crtfifo load */
- nvclks += 1; /* 2 edge sync. may be very close to edge so just put one. */
- nvclks += 1; /* fbi_d_rdv_n */
- nvclks += 1; /* Fbi_d_rdata */
- nvclks += 1; /* crtfifo load */
+ mclks = 1 /* 2 edge sync. may be very close to edge so
+ * just put one. */
+ + 1 /* arb_hp_req */
+ + 5 /* tiling pipeline */
+ + 2 /* latency fifo */
+ + 2 /* memory request to fbio block */
+ + 7; /* data returned from fbio block */
- /* Extra clocks determined by heuristics */
+ /* Need to accumulate 256 bits for read */
+ mclks += (arb->memory_type == 0 ? 2 : 1)
+ * arb->memory_width / 32;
- nvclks += 0;
- pclks += 0;
- found = 0;
- while (found != 1) {
- found = 1;
- mclk_loop = mclks + mclk_extra;
- us_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */
- us_m_min = mclks * 1000 * 1000 / mclk_freq; /* Minimum Mclk latency in us */
- us_min_mclk_extra = min_mclk_extra * 1000 * 1000 / mclk_freq;
- us_n = nvclks * 1000 * 1000 / nvclk_freq; /* nvclk latency in us */
- us_p = pclks * 1000 * 1000 / pclk_freq; /* nvclk latency in us */
- us_pipe_min = us_m_min + us_n + us_p;
+ fill_lat = mclks * 1000 * 1000 / mclk_freq /* minimum mclk latency */
+ + nvclks * 1000 * 1000 / nvclk_freq /* nvclk latency */
+ + pclks * 1000 * 1000 / pclk_freq; /* pclk latency */
- vus_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */
+ /* Conditional FIFO refill latency. */
- crtc_drain_rate = pclk_freq * bpp / 8; /* MB/s */
+ xclks = 2 * arb->mem_page_miss + mclks /* Extra latency due to
+ * the overlay. */
+ + 2 * arb->mem_page_miss /* Extra pagemiss latency. */
+ + (arb->bpp == 32 ? 8 : 4); /* Margin of error. */
- vpagemiss = 1; /* self generating page miss */
- vpagemiss += 1; /* One higher priority before */
+ extra_lat = xclks * 1000 * 1000 / mclk_freq;
- crtpagemiss = 2; /* self generating page miss */
+ if (arb->two_heads)
+ /* Account for another CRTC. */
+ extra_lat += fill_lat + extra_lat + burst_lat;
- vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
+ /* FIFO burst */
- us_video = vpm_us + vus_m; /* Video has separate read return path */
+ /* Max burst not leading to overflows. */
+ max_burst_o = (1 + fifo_len - extra_lat * drain_rate / (1000 * 1000))
+ * (fill_rate / 1000) / ((fill_rate - drain_rate) / 1000);
- cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
- us_crt = us_video /* Wait for video */
- + cpm_us /* CRT Page miss */
- + us_m + us_n + us_p; /* other latency */
+ /* Max burst value with an acceptable latency. */
+ max_burst_l = burst_lat * fill_rate / (1000 * 1000);
- clwm = us_crt * crtc_drain_rate / (1000 * 1000);
- clwm++; /* fixed point <= float_point - 1. Fixes that */
-
- /*
- * Overfill check:
- */
-
- clwm_rnd_down = (clwm / 8) * 8;
- if (clwm_rnd_down < clwm)
- clwm += 8;
-
- m1 = clwm + cbs - 1024; /* Amount of overfill */
- m2us = us_pipe_min + us_min_mclk_extra;
-
- /* pclk cycles to drain */
- p1clk = m2us * pclk_freq / (1000 * 1000);
- p2 = p1clk * bpp / 8; /* bytes drained. */
-
- if (p2 < m1 && m1 > 0) {
- found = 0;
- if (min_mclk_extra == 0) {
- if (cbs <= 32)
- found = 1; /* Can't adjust anymore! */
- else
- cbs = cbs / 2; /* reduce the burst size */
- } else
- min_mclk_extra--;
- } else if (clwm > 1023) { /* Have some margin */
- found = 0;
- if (min_mclk_extra == 0)
- found = 1; /* Can't adjust anymore! */
- else
- min_mclk_extra--;
- }
+ fifo->burst = burst = rounddown_pow_of_two(
+ min(max_burst_l, min(max_burst_o, 1024)));
- /* This correction works around a slight snow effect
- * when the TV and VGA outputs are enabled simultaneously. */
- min_clwm = 1024 - cbs + 128 * pclk_freq / 100000;
- if (clwm < min_clwm)
- clwm = min_clwm;
+ /* FIFO low watermark */
- /* printf("CRT LWM: prog: 0x%x, bs: 256\n", clwm); */
- fifo->lwm = clwm;
- fifo->burst = cbs;
- }
+ min_lwm = (fill_lat + extra_lat) * drain_rate / (1000 * 1000) + 1;
+ max_lwm = fifo_len - burst
+ + fill_lat * drain_rate / (1000 * 1000)
+ + burst * drain_rate / fill_rate;
+
+ fifo->lwm = min_lwm + 5 * (max_lwm - min_lwm) / 100; /* Empirical. */
}
static void
--
1.6.4.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCHv2 3/3] drm/nv10-nv20: CRTC arbitration code rewrite.
[not found] ` <1258983953-15349-3-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
@ 2009-11-24 15:00 ` Francisco Jerez
0 siblings, 0 replies; 4+ messages in thread
From: Francisco Jerez @ 2009-11-24 15:00 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
The previous bandwidth calculation code was nv legacy and it had some
issues besides being obfuscated:
* It assumed a single-head setup (I workarounded this on 5603fe7f,
however it triggered bug 24820).
* It could lead to unnecessarily conservative settings, because it
assumed a 1kB FIFO size limit (like nv10/nv15, but not nv11/nv17).
* It sometimes set unacceptably large FIFO burst values, screwing
latency and causing some overlay corruption (bug 11993).
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
v2: Fix some sparse warnings.
drivers/gpu/drm/nouveau/nouveau_calc.c | 183 ++++++++++++--------------------
1 files changed, 68 insertions(+), 115 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c
index 5d48274..4258da9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_calc.c
+++ b/drivers/gpu/drm/nouveau/nouveau_calc.c
@@ -109,133 +109,86 @@ nv04_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
static void
nv10_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb)
{
- int pagemiss, width, bpp;
- int nvclks, mclks, pclks, vpagemiss, crtpagemiss;
- int found, mclk_extra, mclk_loop, cbs, m1;
- int mclk_freq, pclk_freq, nvclk_freq;
- int us_m, us_m_min, us_n, us_p, crtc_drain_rate;
- int vus_m;
- int vpm_us, us_video, cpm_us, us_crt, clwm;
- int clwm_rnd_down, min_clwm;
- int m2us, us_pipe_min, p1clk, p2;
- int min_mclk_extra;
- int us_min_mclk_extra;
-
- pclk_freq = arb->pclk_khz; /* freq in KHz */
- mclk_freq = arb->mclk_khz;
+ int fill_rate, drain_rate;
+ int pclks, nvclks, mclks, xclks;
+ int pclk_freq, nvclk_freq, mclk_freq;
+ int fill_lat, extra_lat;
+ int max_burst_o, max_burst_l;
+ int fifo_len, min_lwm, max_lwm;
+ const int burst_lat = 80; /* Maximum allowable latency due
+ * to the CRTC FIFO burst. (ns) */
+
+ pclk_freq = arb->pclk_khz;
nvclk_freq = arb->nvclk_khz;
- pagemiss = arb->mem_page_miss;
- width = arb->memory_width / 64;
- bpp = arb->bpp;
- clwm = 0;
- cbs = 512;
+ mclk_freq = arb->mclk_khz;
+
+ fill_rate = mclk_freq * arb->memory_width / 8; /* kB/s */
+ drain_rate = pclk_freq * arb->bpp / 8; /* kB/s */
+
+ fifo_len = arb->two_heads ? 1536 : 1024; /* B */
+
+ /* Fixed FIFO refill latency. */
+
pclks = 4; /* lwm detect. */
- nvclks = 3; /* lwm -> sync. */
- nvclks += 2; /* fbi bus cycles (1 req + 1 busy) */
- mclks = 1; /* 2 edge sync. may be very close to edge so just put one. */
- mclks += 1; /* arb_hp_req */
- mclks += 5; /* ap_hp_req tiling pipeline */
- mclks += 2; /* tc_req latency fifo */
- mclks += 2; /* fb_cas_n_ memory request to fbio block */
- mclks += 7; /* sm_d_rdv data returned from fbio block */
-
- /* fb.rd.d.Put_gc need to accumulate 256 bits for read */
- if (arb->memory_type == 0) {
- if (arb->memory_width == 64) /* 64 bit bus */
- mclks += 4;
- else
- mclks += 2;
- } else if (arb->memory_width == 64) /* 64 bit bus */
- mclks += 2;
- else
- mclks += 1;
- mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */
- min_mclk_extra = 18;
+ nvclks = 3 /* lwm -> sync. */
+ + 2 /* fbi bus cycles (1 req + 1 busy) */
+ + 1 /* 2 edge sync. may be very close to edge so
+ * just put one. */
+ + 1 /* fbi_d_rdv_n */
+ + 1 /* Fbi_d_rdata */
+ + 1; /* crtfifo load */
- nvclks += 1; /* 2 edge sync. may be very close to edge so just put one. */
- nvclks += 1; /* fbi_d_rdv_n */
- nvclks += 1; /* Fbi_d_rdata */
- nvclks += 1; /* crtfifo load */
+ mclks = 1 /* 2 edge sync. may be very close to edge so
+ * just put one. */
+ + 1 /* arb_hp_req */
+ + 5 /* tiling pipeline */
+ + 2 /* latency fifo */
+ + 2 /* memory request to fbio block */
+ + 7; /* data returned from fbio block */
- /* Extra clocks determined by heuristics */
+ /* Need to accumulate 256 bits for read */
+ mclks += (arb->memory_type == 0 ? 2 : 1)
+ * arb->memory_width / 32;
- nvclks += 0;
- pclks += 0;
- found = 0;
- while (found != 1) {
- found = 1;
- mclk_loop = mclks + mclk_extra;
- us_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */
- us_m_min = mclks * 1000 * 1000 / mclk_freq; /* Minimum Mclk latency in us */
- us_min_mclk_extra = min_mclk_extra * 1000 * 1000 / mclk_freq;
- us_n = nvclks * 1000 * 1000 / nvclk_freq; /* nvclk latency in us */
- us_p = pclks * 1000 * 1000 / pclk_freq; /* nvclk latency in us */
- us_pipe_min = us_m_min + us_n + us_p;
+ fill_lat = mclks * 1000 * 1000 / mclk_freq /* minimum mclk latency */
+ + nvclks * 1000 * 1000 / nvclk_freq /* nvclk latency */
+ + pclks * 1000 * 1000 / pclk_freq; /* pclk latency */
- vus_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */
+ /* Conditional FIFO refill latency. */
- crtc_drain_rate = pclk_freq * bpp / 8; /* MB/s */
+ xclks = 2 * arb->mem_page_miss + mclks /* Extra latency due to
+ * the overlay. */
+ + 2 * arb->mem_page_miss /* Extra pagemiss latency. */
+ + (arb->bpp == 32 ? 8 : 4); /* Margin of error. */
- vpagemiss = 1; /* self generating page miss */
- vpagemiss += 1; /* One higher priority before */
+ extra_lat = xclks * 1000 * 1000 / mclk_freq;
- crtpagemiss = 2; /* self generating page miss */
+ if (arb->two_heads)
+ /* Account for another CRTC. */
+ extra_lat += fill_lat + extra_lat + burst_lat;
- vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
+ /* FIFO burst */
- us_video = vpm_us + vus_m; /* Video has separate read return path */
+ /* Max burst not leading to overflows. */
+ max_burst_o = (1 + fifo_len - extra_lat * drain_rate / (1000 * 1000))
+ * (fill_rate / 1000) / ((fill_rate - drain_rate) / 1000);
+ fifo->burst = min(max_burst_o, 1024);
- cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq;
- us_crt = us_video /* Wait for video */
- + cpm_us /* CRT Page miss */
- + us_m + us_n + us_p; /* other latency */
+ /* Max burst value with an acceptable latency. */
+ max_burst_l = burst_lat * fill_rate / (1000 * 1000);
+ fifo->burst = min(max_burst_l, fifo->burst);
- clwm = us_crt * crtc_drain_rate / (1000 * 1000);
- clwm++; /* fixed point <= float_point - 1. Fixes that */
-
- /*
- * Overfill check:
- */
-
- clwm_rnd_down = (clwm / 8) * 8;
- if (clwm_rnd_down < clwm)
- clwm += 8;
-
- m1 = clwm + cbs - 1024; /* Amount of overfill */
- m2us = us_pipe_min + us_min_mclk_extra;
-
- /* pclk cycles to drain */
- p1clk = m2us * pclk_freq / (1000 * 1000);
- p2 = p1clk * bpp / 8; /* bytes drained. */
-
- if (p2 < m1 && m1 > 0) {
- found = 0;
- if (min_mclk_extra == 0) {
- if (cbs <= 32)
- found = 1; /* Can't adjust anymore! */
- else
- cbs = cbs / 2; /* reduce the burst size */
- } else
- min_mclk_extra--;
- } else if (clwm > 1023) { /* Have some margin */
- found = 0;
- if (min_mclk_extra == 0)
- found = 1; /* Can't adjust anymore! */
- else
- min_mclk_extra--;
- }
+ fifo->burst = rounddown_pow_of_two(fifo->burst);
- /* This correction works around a slight snow effect
- * when the TV and VGA outputs are enabled simultaneously. */
- min_clwm = 1024 - cbs + 128 * pclk_freq / 100000;
- if (clwm < min_clwm)
- clwm = min_clwm;
+ /* FIFO low watermark */
- /* printf("CRT LWM: prog: 0x%x, bs: 256\n", clwm); */
- fifo->lwm = clwm;
- fifo->burst = cbs;
- }
+ min_lwm = (fill_lat + extra_lat) * drain_rate / (1000 * 1000) + 1;
+ max_lwm = fifo_len - fifo->burst
+ + fill_lat * drain_rate / (1000 * 1000)
+ + fifo->burst * drain_rate / fill_rate;
+
+ fifo->lwm = min_lwm + 5 * (max_lwm - min_lwm) / 100; /* Empirical. */
}
static void
--
1.6.4.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-11-24 15:00 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-23 13:45 [PATCH 1/3] drm/nouveau: Update the CRTC arbitration parameters on FB depth switch Francisco Jerez
[not found] ` <1258983953-15349-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-11-23 13:45 ` [PATCH 2/3] drm/nouveau: Clean up the arbitration parameters calculation code Francisco Jerez
2009-11-23 13:45 ` [PATCH 3/3] drm/nv10-nv20: CRTC arbitration code rewrite Francisco Jerez
[not found] ` <1258983953-15349-3-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-11-24 15:00 ` [PATCHv2 " Francisco Jerez
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.