Skip to content

Commit

Permalink
[AArch64] Assembly support for FEAT_LRCPC3
Browse files Browse the repository at this point in the history
This patch implements assembly support for the 2022 A-Profile Architecture
extension FEAT_LRCPC3. FEAT_LRCPC3 is AArch64 only and introduces new
variants of load/store instructions with release consistency ordering.

Specs for individual instructions can be found here:
https://developer.arm.com/documentation/ddi0602/2022-09/Base-Instructions/

This feature is optionally available from v8.2a and therefore not enabled by
default.

Contributors:
  Lucas Prates
  Sam Elliot
  Son Tuan Vu
  Tomas Matheson

Differential Revision: https://reviews.llvm.org/D138579
  • Loading branch information
tmatheson-arm committed Nov 25, 2022
1 parent afba867 commit a6aaa96
Show file tree
Hide file tree
Showing 11 changed files with 450 additions and 2 deletions.
26 changes: 26 additions & 0 deletions clang/test/Driver/aarch64-lrcpc3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Test that target feature FEAT_RCPC3 is implemented and available correctly

// FEAT_RCPC3 is optional for v8.2a onwards, and can be enabled with +rcpc3
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.9-a %s 2>&1 | FileCheck %s --check-prefix=NOT_ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.9-a+rcpc3 %s 2>&1 | FileCheck %s --check-prefix=ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.9-a+norcpc3 %s 2>&1 | FileCheck %s --check-prefix=DISABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv9.4-a %s 2>&1 | FileCheck %s --check-prefix=NOT_ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv9.4-a+rcpc3 %s 2>&1 | FileCheck %s --check-prefix=ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv9.4-a+norcpc3 %s 2>&1 | FileCheck %s --check-prefix=DISABLED

// FEAT_RCPC3 is optional (off by default) for v8.8a/9.3a and older, and can be enabled using +rcpc3
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.2-a %s 2>&1 | FileCheck %s --check-prefix=NOT_ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.2-a+rcpc3 %s 2>&1 | FileCheck %s --check-prefix=ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.2-a+norcpc3 %s 2>&1 | FileCheck %s --check-prefix=DISABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv9-a %s 2>&1 | FileCheck %s --check-prefix=NOT_ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv9-a+rcpc3 %s 2>&1 | FileCheck %s --check-prefix=ENABLED
// RUN: %clang -### -target aarch64-none-none-eabi -march=armv9-a+norcpc3 %s 2>&1 | FileCheck %s --check-prefix=DISABLED

// FEAT_RCPC3 is invalid before v8
// RUN: %clang -### -target arm-none-none-eabi -march=armv7-a+rcpc3 %s 2>&1 | FileCheck %s --check-prefix=INVALID

// INVALID: error: unsupported argument 'armv7-a+rcpc3' to option '-march='
// ENABLED: "-target-feature" "+rcpc3"
// NOT_ENABLED-NOT: "-target-feature" "+rcpc3"
// DISABLED: "-target-feature" "-rcpc3"

1 change: 1 addition & 0 deletions llvm/include/llvm/Support/AArch64TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ AARCH64_ARCH_EXT_NAME("hbc", AArch64::AEK_HBC, "+hbc",
AARCH64_ARCH_EXT_NAME("mops", AArch64::AEK_MOPS, "+mops", "-mops")
AARCH64_ARCH_EXT_NAME("pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon")
AARCH64_ARCH_EXT_NAME("cssc", AArch64::AEK_CSSC, "+cssc", "-cssc")
AARCH64_ARCH_EXT_NAME("rcpc3", AArch64::AEK_RCPC3, "+rcpc3", "-rcpc3")
#undef AARCH64_ARCH_EXT_NAME

#ifndef AARCH64_CPU_NAME
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/AArch64TargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ enum ArchExtKind : uint64_t {
AEK_B16B16 = 1ULL << 46, // FEAT_B16B16
AEK_SMEF16F16 = 1ULL << 47, // FEAT_SMEF16F16
AEK_CSSC = 1ULL << 48, // FEAT_CSSC
AEK_RCPC3 = 1ULL << 49, // FEAT_LRCPC3
};

enum class ArchKind {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,10 @@ def FeatureITE : SubtargetFeature<"ite", "HasITE",
"true", "Enable Armv9.4-A Instrumentation Extension FEAT_ITE", [FeatureETE,
FeatureTRBE]>;

def FeatureRCPC3 : SubtargetFeature<"rcpc3", "HasRCPC3",
"true", "Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set (FEAT_LRCPC3)",
[FeatureRCPC_IMMO]>;

//===----------------------------------------------------------------------===//
// Architectures.
//
Expand Down
69 changes: 68 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -4552,7 +4552,7 @@ let mayLoad = 0, mayStore = 1 in
class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0,
RegisterClass regtype, string asm>
: LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs),
(ins regtype:$Rt, GPR64sp0:$Rn),
(ins regtype:$Rt, GPR64sp:$Rn),
asm, "\t$Rt, [$Rn]">,
Sched<[WriteST]>;

Expand Down Expand Up @@ -11762,6 +11762,73 @@ multiclass ComparisonOp<bit isUnsigned, bit isMin, string asm,
// 2022 Armv8.9/Armv9.4 Extensions
//----------------------------------------------------------------------------

//---
// RCPC instructions (FEAT_LRCPC3)
//---

class BaseLRCPC3<bits<2> size, bit V, bits<2> opc, dag oops, dag iops,
string asm, string operands, string cstr = "">
: I<oops, iops, asm, operands, cstr, []>,
Sched<[WriteAtomic]> {
bits<5> Rt;
bits<5> Rn;
let Inst{31-30} = size;
let Inst{29-24} = {0,1,1,V,0,1};
let Inst{23-22} = opc;
let Inst{21} = 0b0;
// Inst{20-12}
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
let Inst{4-0} = Rt;

let mayLoad = Inst{22};
let mayStore = !not(Inst{22});
let hasSideEffects = 0;
}

class BaseLRCPC3IntegerLoadStorePair<bits<2> size, bits<2> opc, bits<4> opc2,
dag oops, dag iops, string asm,
string operands, string cstr>
: BaseLRCPC3<size, /*V*/0, opc, oops, iops, asm, operands, cstr> {
bits<5> Rt2;
let Inst{20-16} = Rt2;
let Inst{15-12} = opc2;
}

class BaseLRCPC3IntegerLoadStore<bits<2> size, bits<2> opc, dag oops, dag iops,
string asm, string operands, string cstr>
: BaseLRCPC3<size, /*V*/0, opc, oops, iops, asm, operands, cstr> {
let Inst{20-12} = 0b000000000; // imm9
}

multiclass LRCPC3NEONLoadStoreUnscaledOffset<bits<2> size, bits<2> opc, RegisterClass regtype,
dag oops, dag iops, string asm> {
def i : BaseLRCPC3<size, /*V*/1, opc, oops, iops, asm, "\t$Rt, [$Rn{, $simm}]", /*cstr*/""> {
bits<9> simm; // signed immediate encoded in imm9=Rt2:imm4
let Inst{20-12} = simm;
}

def a : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}

class LRCPC3NEONLdStSingle<bit L, dag oops, dag iops, string asm, string cst>
: BaseSIMDLdStSingle<L, /*R*/0b0, /*opcode*/0b100, asm,
"\t$Vt$Q, [$Rn]", cst, oops, iops, []>,
Sched<[]> {
bit Q;
let Inst{31} = 0;
let Inst{30} = Q;
let Inst{23} = 0;
let Inst{20-16} = 0b00001;
let Inst{12} = 0; // S
let Inst{11-10} = 0b01; // size

let mayLoad = L;
let mayStore = !not(L);
let hasSideEffects = 1;
}

//---
// Instrumentation Extension (FEAT_ITE)
//---
Expand Down
70 changes: 70 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
def HasITE : Predicate<"Subtarget->hasITE()">,
AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">,
AssemblerPredicate<(all_of FeatureRCPC3), "rcpc3">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
Expand Down Expand Up @@ -3903,6 +3905,21 @@ def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;

/*
Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
alias for the case of immediate #0. This is because new STLR versions (from
LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
appropriate anymore (it parses and discards the optional zero). This is not the
case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
and the immediate values are not inside the [] brackets and thus not accepted
by GPR64sp0 parser.
*/
def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>;
def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>;
def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>;
def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>;

def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
Expand Down Expand Up @@ -3937,6 +3954,12 @@ let Predicates = [HasLOR] in {
def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;

// Aliases for when offset=0
def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>;
def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>;
def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>;
def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>;
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -8528,6 +8551,53 @@ def RPRFM:
let DecoderNamespace = "Fallback";
}

//===----------------------------------------------------------------------===//
// RCPC Instructions (FEAT_LRCPC3)
//===----------------------------------------------------------------------===//

let Predicates = [HasRCPC3] in {
// size opc opc2
def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;

// Aliases for when offset=0
def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;

// size opc
def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">;
def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">;
}

let Predicates = [HasRCPC3, HasNEON] in {
// size opc regtype
defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;

// L
def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;

// Aliases for when offset=0
def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
}

include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7547,6 +7547,15 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
case MCK__HASH_8:
ExpectedVal = 8;
break;
case MCK__HASH__MINUS_4:
ExpectedVal = -4;
break;
case MCK__HASH__MINUS_8:
ExpectedVal = -8;
break;
case MCK__HASH__MINUS_16:
ExpectedVal = -16;
break;
case MCK_MPR:
// If the Kind is a token for the MPR register class which has the "za"
// register (SME accumulator array), check if the asm is a literal "za"
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AArch64/arm64-memory.s
Original file line number Diff line number Diff line change
Expand Up @@ -498,11 +498,23 @@ foo:
stlrb w3, [x6]
stlrh w3, [x6]

stlr w3, [x6, #0]
stlr x3, [x6, 0]
stlrb w3, [sp]
stlrb w3, [sp, #0]
stlrb w3, [sp, 0]

; CHECK: stlr w3, [x6] ; encoding: [0xc3,0xfc,0x9f,0x88]
; CHECK: stlr x3, [x6] ; encoding: [0xc3,0xfc,0x9f,0xc8]
; CHECK: stlrb w3, [x6] ; encoding: [0xc3,0xfc,0x9f,0x08]
; CHECK: stlrh w3, [x6] ; encoding: [0xc3,0xfc,0x9f,0x48]

; CHECK: stlr w3, [x6] ; encoding: [0xc3,0xfc,0x9f,0x88]
; CHECK: stlr x3, [x6] ; encoding: [0xc3,0xfc,0x9f,0xc8]
; CHECK: stlrb w3, [sp] ; encoding: [0xe3,0xff,0x9f,0x08]
; CHECK: stlrb w3, [sp] ; encoding: [0xe3,0xff,0x9f,0x08]
; CHECK: stlrb w3, [sp] ; encoding: [0xe3,0xff,0x9f,0x08]

;-----------------------------------------------------------------------------
; Load-acquire/Store-release exclusive
;-----------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit a6aaa96

Please sign in to comment.