From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:34123)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <sebastian@macke.de>) id 1YFgkW-00081z-Vu
	for qemu-devel@nongnu.org; Mon, 26 Jan 2015 05:18:33 -0500
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <sebastian@macke.de>) id 1YFgkS-0004hA-84
	for qemu-devel@nongnu.org; Mon, 26 Jan 2015 05:18:28 -0500
Received: from www11.your-server.de ([213.133.104.11]:56419)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <sebastian@macke.de>) id 1YFgkR-0004gr-Uf
	for qemu-devel@nongnu.org; Mon, 26 Jan 2015 05:18:24 -0500
Message-ID: <54C61467.3050805@macke.de>
Date: Mon, 26 Jan 2015 11:18:15 +0100
From: Sebastian Macke <sebastian@macke.de>
MIME-Version: 1.0
References: <1422181532-10674-1-git-send-email-sebastian@macke.de>	<1422181532-10674-3-git-send-email-sebastian@macke.de>
	<CAJBMM-uhiOQrRwBK-FiVJK24DUyFWDh0eVbSujswU5C=Y13wqg@mail.gmail.com>
In-Reply-To: <CAJBMM-uhiOQrRwBK-FiVJK24DUyFWDh0eVbSujswU5C=Y13wqg@mail.gmail.com>
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 7bit
Subject: Re: [Qemu-devel] [PATCH 2/2] target-openrisc: Add l.lwa/l.swa
	support
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: Jia Liu <proljc@gmail.com>
Cc: "qemu-devel@nongnu.org" <qemu-devel@nongnu.org>, blue@cmd.nu

Hi Jia,

On 1/26/2015 10:50 AM, Jia Liu wrote:
> Hi Sebastian, Christian
>
> On Sun, Jan 25, 2015 at 6:25 PM, Sebastian Macke <sebastian@macke.de> wrote:
>> From: Christian Svensson <blue@cmd.nu>
>>
>> This patch adds support for atomic locks
>> and is an adaption from https://github.com/bluecmd/or1k-qemu/commits/or32-optimize
>>
>> Tested via the atomic lock implementation of musl
>>
>> Signed-off-by: Christian Svensson <blue@cmd.nu>
>> Signed-off-by: Sebastian Macke <sebastian@macke.de>
>> ---
>>   target-openrisc/cpu.h       |  3 ++
>>   target-openrisc/interrupt.c |  3 ++
>>   target-openrisc/translate.c | 77 ++++++++++++++++++++++++++++++++++++++++++---
>>   3 files changed, 79 insertions(+), 4 deletions(-)
>>
>> diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
>> index 69b96c6..abdba75 100644
>> --- a/target-openrisc/cpu.h
>> +++ b/target-openrisc/cpu.h
>> @@ -302,6 +302,9 @@ typedef struct CPUOpenRISCState {
>>                                    in solt so far.  */
>>       uint32_t btaken;          /* the SR_F bit */
>>
>> +    target_ulong lock_addr;   /* Atomicity lock address. */
>> +    target_ulong lock_value;  /* Atomicity lock value. */
>> +
>>       CPU_COMMON
>>
>>       /* Fields from here on are preserved across CPU reset. */
>> diff --git a/target-openrisc/interrupt.c b/target-openrisc/interrupt.c
>> index e480cfd..68d554c 100644
>> --- a/target-openrisc/interrupt.c
>> +++ b/target-openrisc/interrupt.c
>> @@ -54,6 +54,9 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
>>       env->tlb->cpu_openrisc_map_address_data = &cpu_openrisc_get_phys_nommu;
>>       env->tlb->cpu_openrisc_map_address_code = &cpu_openrisc_get_phys_nommu;
>>
>> +    /* invalidate lock */
>> +    env->cpu_lock_addr = -1;
>> +
>>       if (cs->exception_index > 0 && cs->exception_index < EXCP_NR) {
>>           env->pc = (cs->exception_index << 8);
>>       } else {
>> diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
>> index 543aa67..6401b4b 100644
>> --- a/target-openrisc/translate.c
>> +++ b/target-openrisc/translate.c
>> @@ -55,6 +55,8 @@ typedef struct DisasContext {
>>   static TCGv_ptr cpu_env;
>>   static TCGv cpu_sr;
>>   static TCGv cpu_R[32];
>> +static TCGv cpu_lock_addr;
>> +static TCGv cpu_lock_value;
>>   static TCGv cpu_pc;
>>   static TCGv jmp_pc;            /* l.jr/l.jalr temp pc */
>>   static TCGv cpu_npc;
>> @@ -82,6 +84,12 @@ void openrisc_translate_init(void)
>>       env_flags = tcg_global_mem_new_i32(TCG_AREG0,
>>                                          offsetof(CPUOpenRISCState, flags),
>>                                          "flags");
>> +    cpu_lock_addr = tcg_global_mem_new(TCG_AREG0,
>> +                                       offsetof(CPUOpenRISCState, lock_addr),
>> +                                       "lock_addr");
>> +    cpu_lock_value = tcg_global_mem_new(TCG_AREG0,
>> +                                        offsetof(CPUOpenRISCState, lock_value),
>> +                                        "lock_value");
>>       cpu_pc = tcg_global_mem_new(TCG_AREG0,
>>                                   offsetof(CPUOpenRISCState, pc), "pc");
>>       cpu_npc = tcg_global_mem_new(TCG_AREG0,
>> @@ -254,17 +262,67 @@ static void gen_jump(DisasContext *dc, uint32_t imm, uint32_t reg, uint32_t op0)
>>       gen_sync_flags(dc);
>>   }
>>
>> +/* According to the OpenRISC specification we should poison our atomic lock
>> + * if any other store is detected to the same address. For the sake of speed
>> + * and because we're single-threaded we guarantee that atomic stores
>> + * fail only if an atomic load or another atomic store
>> + * is executed.
>> + *
>> + * To prevent the potential case of an ordinary store, we save
>> + * the *value* of the address at the lock time. */
>> +
>> +static void gen_atomic_load(TCGv tD, TCGv t0, DisasContext *dc)
>> +{
>> +    tcg_gen_qemu_ld_tl(tD, t0, dc->mem_idx, MO_TEUL);
>> +    tcg_gen_mov_i32(cpu_lock_addr, t0);
>> +    tcg_gen_mov_i32(cpu_lock_value, tD);
>> +}
>> +
>> +static void gen_atomic_store(TCGv tB, TCGv t0, DisasContext *dc)
>> +{
>> +    int store_fail;
>> +    int store_done;
>> +
>> +    store_fail = gen_new_label();
>> +    store_done = gen_new_label();
>> +
>> +    /* check address */
>> +    tcg_gen_brcond_i32(TCG_COND_NE, t0, cpu_lock_addr, store_fail);
>> +
>> +    /* check value */
>> +    TCGv val = tcg_temp_new();
>> +    tcg_gen_qemu_ld_tl(val, t0, dc->mem_idx, MO_TEUL);
>> +    tcg_gen_brcond_i32(TCG_COND_NE, val, cpu_lock_value, store_fail);
>> +    tcg_temp_free(val);
>> +
>> +    /* success of atomic access */
>> +    tcg_gen_qemu_st_tl(tB, t0, dc->mem_idx, MO_TEUL);
>> +    tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_F);
>> +    tcg_gen_br(store_done);
>> +
>> +    gen_set_label(store_fail);
>> +    tcg_gen_andi_tl(cpu_sr, cpu_sr, ~SR_F);
>> +
>> +    gen_set_label(store_done);
>> +    /* the atomic store invalidates the lock address. */
>> +    tcg_gen_movi_i32(cpu_lock_addr, -1);
>> +}
>> +
>>   static void gen_loadstore(DisasContext *dc, uint32 op0,
>>                             uint32_t ra, uint32_t rb, uint32_t rd,
>>                             uint32_t offset)
>>   {
>>       TCGv t0 = cpu_R[ra];
>>       if (offset != 0) {
>> -        t0 = tcg_temp_new();
>> +        t0 = tcg_temp_local_new();
>>           tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(offset, 16));
>>       }
>>
>>       switch (op0) {
>> +    case 0x1b:    /* l.lwa */
>> +        gen_atomic_load(cpu_R[rd], t0, dc);
>> +        break;
>> +
>>       case 0x21:    /* l.lwz */
>>           tcg_gen_qemu_ld_tl(cpu_R[rd], t0, dc->mem_idx, MO_TEUL);
>>           break;
>> @@ -289,6 +347,10 @@ static void gen_loadstore(DisasContext *dc, uint32 op0,
>>           tcg_gen_qemu_ld_tl(cpu_R[rd], t0, dc->mem_idx, MO_TESW);
>>           break;
>>
>> +    case 0x33:    /* l.swa */
>> +        gen_atomic_store(cpu_R[rb], t0, dc);
>> +        break;
>> +
>>       case 0x35:    /* l.sw */
>>           tcg_gen_qemu_st_tl(cpu_R[rb], t0, dc->mem_idx, MO_TEUL);
>>           break;
>> @@ -759,9 +821,6 @@ static void dec_calc(DisasContext *dc, uint32_t insn)
>>       }
>>   }
>>
>> -
>> -
>> -
> It should be blank lines in here in [patch 1/2].

Yes, looks like I added three lines in patch 1/2 and then removed them 
in patch 2/2.
I guess if both patches are accepted it does not matter. Otherwise I 
will fix these in revision 2.

>
>>   static void dec_misc(DisasContext *dc, uint32_t insn)
>>   {
>>       uint32_t op0, op1;
>> @@ -905,6 +964,11 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
>>           gen_loadstore(dc, op0, ra, rb, rd, I16);
>>   #endif*/
>>
>> +    case 0x1b:    /* l.lwa */
>> +        LOG_DIS("l.lwa r%d, r%d, %d\n", rd, ra, I16);
>> +        gen_loadstore(dc, op0, ra, rb, rd, I16);
>> +        break;
>> +
> Is it a new instruction new added into OpenRISC?

Yes, they were added last year.
http://opencores.org/websvn,filedetails?repname=openrisc&path=%2Fopenrisc%2Ftrunk%2Fdocs%2Fopenrisc-arch-1.1-rev0.pdf
Previously the kernel handled those atomic calls for single core 
implementations.
But we also managed to run multi-core OpenRISC machine. And here the 
instructions are absolutely necessary.

Fact is, that almost none of our toolchains work anymore without these 
new instructions.

>>       case 0x21:    /* l.lwz */
>>           LOG_DIS("l.lwz r%d, r%d, %d\n", rd, ra, I16);
>>           gen_loadstore(dc, op0, ra, rb, rd, I16);
>> @@ -1072,6 +1136,11 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
>>           gen_loadstore(dc, op0, ra, rb, rd, tmp);
>>   #endif*/
>>
>> +    case 0x33:    /* l.swa */
>> +        LOG_DIS("l.swa %d, r%d, r%d, %d\n", I5, ra, rb, I11);
>> +        gen_loadstore(dc, op0, ra, rb, rd, tmp);
>> +        break;
>> +
>>       case 0x35:    /* l.sw */
>>           LOG_DIS("l.sw %d, r%d, r%d, %d\n", I5, ra, rb, I11);
>>           gen_loadstore(dc, op0, ra, rb, rd, tmp);
>> --
>> 2.2.2
>>
> Regards,
> Jia