From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH -V5 06/13] arch/powerpc: Increase the slice range to 64TB
Date: Wed, 01 Aug 2012 13:13:37 +0530 [thread overview]
Message-ID: <87wr1jhxau.fsf@skywalker.in.ibm.com> (raw)
In-Reply-To: <20120801051617.GC24014@drongo>
Paul Mackerras <paulus@samba.org> writes:
> On Mon, Jul 30, 2012 at 04:52:12PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>
>> This patch makes the high psizes mask as an unsigned char array
>> so that we can have more than 16TB. Currently we support upto
>> 64TB
>
> Comments below...
>
>> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
>> index b9ee79ce..c355af6 100644
>> --- a/arch/powerpc/mm/slb_low.S
>> +++ b/arch/powerpc/mm/slb_low.S
>> @@ -108,17 +108,34 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
>> * between 4k and 64k standard page size
>> */
>> #ifdef CONFIG_PPC_MM_SLICES
>> + /* r10 have esid */
>> cmpldi r10,16
>> -
>> - /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
>> - ld r9,PACALOWSLICESPSIZE(r13)
>> - sldi r11,r10,2
>> + /* below SLICE_LOW_TOP */
>> blt 5f
>> - ld r9,PACAHIGHSLICEPSIZE(r13)
>> - srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
>> - andi. r11,r11,0x3c
>> -
>> -5: /* Extract the psize and multiply to get an array offset */
>> + /*
>> + * Handle hpsizes,
>> + * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
>> + * We use r10 here, later we restore it to esid.
>> + * Can we use other register instead of r10 ?
>
> Only r9, r10 and r11 are available here, and you're using them all.
> Restoring r10 with one integer instruction is going to be quicker than
> saving and restoring another register to/from memory.
>
>> + */
>> + srdi r10,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT) /* index */
>> + srdi r11,r10,1 /* r11 is array index */
>> + addi r9,r11,PACAHIGHSLICEPSIZE
>> + lbzx r9,r9,r13 /* r9 is hpsizes[r11] */
>> + sldi r11,r11,1
>> + subf r11,r11,r10 /* mask_index = index - (array_index << 1) */
>> + srdi r10,r3,28 /* restore r10 with esid */
>> + b 6f
>
> How about (untested):
>
> srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
> addi r9,r11,PACAHIGHSLICEPSIZE
> lbzx r9,r13,r9 /* r9 is hpsizes[r11] */
> /* r11 = (r10 >> 12) & 1, i.e. grab lowest bit of 1T ESID */
> rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
> b 6f
>
nice, I missed the assembly part when you asked to update the c code
in previous review. This change also bring it closer to the c code.
> Note that I swapped the RA and RB arguments for the lbzx. Our recent
> processors process indexed mode instructions more quickly if the value
> in RB is small.
>
>> static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
>> {
>> + unsigned char *hpsizes;
>> + int index, mask_index;
>> struct slice_mask ret = { 0, 0 };
>> unsigned long i;
>> - u64 psizes;
>> + u64 lpsizes;
>>
>> - psizes = mm->context.low_slices_psize;
>> + lpsizes = mm->context.low_slices_psize;
>> for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (((psizes >> (i * 4)) & 0xf) == psize)
>> + if (((lpsizes >> (i * 4)) & 0xf) == psize)
>> ret.low_slices |= 1u << i;
>>
>> - psizes = mm->context.high_slices_psize;
>> - for (i = 0; i < SLICE_NUM_HIGH; i++)
>> - if (((psizes >> (i * 4)) & 0xf) == psize)
>> + hpsizes = mm->context.high_slices_psize;
>> + for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
>> ret.high_slices |= 1u << i;
>
> This needs to be 1ul not 1u, since we are creating a 64-bit mask.
>
>> static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
>> {
>> + int index, mask_index;
>> /* Write the new slice psize bits */
>> - u64 lpsizes, hpsizes;
>> + unsigned char *hpsizes;
>> + u64 lpsizes;
>> unsigned long i, flags;
>>
>> slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
>> @@ -201,14 +208,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
>> lpsizes = (lpsizes & ~(0xful << (i * 4))) |
>> (((unsigned long)psize) << (i * 4));
>>
>> + /* Assign the value back */
>> + mm->context.low_slices_psize = lpsizes;
>> +
>> hpsizes = mm->context.high_slices_psize;
>> - for (i = 0; i < SLICE_NUM_HIGH; i++)
>> + for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> if (mask.high_slices & (1u << i))
>
> Again, 1ul now. Check all the other places where we manipulate a
> slice mask to see if there are any other instances of 1u that need to
> be changed.
I ended up with this.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0136040..b4e996a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -54,7 +54,7 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
*(p++) = '-';
*(p++) = ' ';
for (i = 0; i < SLICE_NUM_HIGH; i++)
- *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+ *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
*(p++) = 0;
printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -84,8 +84,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
}
if ((start + len) > SLICE_LOW_TOP)
- ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
- - (1u << GET_HIGH_SLICE_INDEX(start));
+ ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
+ - (1ul << GET_HIGH_SLICE_INDEX(start));
return ret;
}
@@ -135,7 +135,7 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
for (i = 0; i < SLICE_NUM_HIGH; i++)
if (!slice_high_has_vma(mm, i))
- ret.high_slices |= 1u << i;
+ ret.high_slices |= 1ul << i;
return ret;
}
@@ -158,7 +158,7 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
mask_index = i & 0x1;
index = i >> 1;
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- ret.high_slices |= 1u << i;
+ ret.high_slices |= 1ul << i;
}
return ret;
@@ -215,7 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
for (i = 0; i < SLICE_NUM_HIGH; i++) {
mask_index = i & 0x1;
index = i >> 1;
- if (mask.high_slices & (1u << i))
+ if (mask.high_slices & (1ul << i))
hpsizes[index] = (hpsizes[index] &
~(0xf << (mask_index * 4))) |
(((unsigned long)psize) << (mask_index * 4));
-aneesh
next prev parent reply other threads:[~2012-08-01 7:44 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-07-30 11:22 [PATCH -V5 0/13] arch/powerpc: Add 64TB support to ppc64 Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 01/13] arch/powerpc: Use hpt_va to compute virtual address Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 02/13] arch/powerpc: Simplify hpte_decode Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 03/13] arch/powerpc: Convert virtual address to vpn Aneesh Kumar K.V
2012-08-01 4:33 ` Paul Mackerras
2012-08-01 7:23 ` Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 04/13] arch/powerpc: Rename va " Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 05/13] arch/powerpc: Make KERN_VIRT_SIZE not dependend on PGTABLE_RANGE Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 06/13] arch/powerpc: Increase the slice range to 64TB Aneesh Kumar K.V
2012-08-01 5:16 ` Paul Mackerras
2012-08-01 7:43 ` Aneesh Kumar K.V [this message]
2012-07-30 11:22 ` [PATCH -V5 07/13] arch/powerpc: Make some of the PGTABLE_RANGE dependency explicit Aneesh Kumar K.V
2012-08-01 5:18 ` Paul Mackerras
2012-08-01 7:45 ` Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 08/13] arch/powerpc: Use the rquired number of VSID bits in slbmte Aneesh Kumar K.V
2012-08-01 5:19 ` Paul Mackerras
2012-07-30 11:22 ` [PATCH -V5 09/13] arch/powerpc: Use 32bit array for slb cache Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 10/13] arch/powerpc: Add 64TB support Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 11/13] arch/powerpc: properly isolate kernel and user proto-VSID Aneesh Kumar K.V
2012-08-01 4:31 ` Paul Mackerras
2012-08-01 7:58 ` Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 12/13] arch/powerpc: Replace open coded CONTEXT_BITS value Aneesh Kumar K.V
2012-08-01 5:29 ` Paul Mackerras
2012-08-01 9:56 ` Aneesh Kumar K.V
2012-07-30 11:22 ` [PATCH -V5 13/13] arch/powerpc: Update VSID allocation documentation Aneesh Kumar K.V
2012-08-01 5:35 ` Paul Mackerras
2012-08-01 10:01 ` Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87wr1jhxau.fsf@skywalker.in.ibm.com \
--to=aneesh.kumar@linux.vnet.ibm.com \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).