Skip to content

Commit

Permalink
[AArch64] Refactoring Frequency and TSC counters
Browse files Browse the repository at this point in the history
  • Loading branch information
cyring committed Jan 31, 2024
1 parent e3f7a13 commit baff80f
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 149 deletions.
15 changes: 0 additions & 15 deletions aarch64/bitasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,22 +94,10 @@ __asm__ volatile \
: "cc", "memory" \
)

#define RDTSCP64(_mem64) \
__asm__ volatile \
( \
"mrs %0 , cntvct_el0" "\n\t" \
"isb" \
: "=r" (_mem64) \
: \
: "cc", "memory" \
)

#define ASM_RDTSC(_reg) \
"# Read variant TSC." "\n\t" \
"mrs " #_reg ", cntvct_el0" "\n\t"

#define ASM_RDTSCP(_reg) ASM_RDTSC(_reg)

#define ASM_CODE_RDPMC(_ctr, _reg) \
"# Read PMC counter." "\n\t" \
/*TODO "movq $" #_ctr ", %%rcx" "\n\t" \
Expand Down Expand Up @@ -152,9 +140,6 @@ __asm__ volatile \
#define RDTSC_PMCx1(mem_tsc, ...) \
ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSC, mem_tsc, __VA_ARGS__)

#define RDTSCP_PMCx1(mem_tsc, ...) \
ASM_RDTSC_PMCx1(x14, x15, ASM_RDTSCP, mem_tsc, __VA_ARGS__)

#if defined(LEGACY) && LEGACY > 0

#define _BITSET_GPR(_lock, _base, _offset) \
Expand Down
23 changes: 9 additions & 14 deletions aarch64/corefreqd.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,15 +291,15 @@ static void *Core_Cycle(void *arg)
CFlip->Delta.TSC = RO(Core)->Delta.TSC;
CFlip->Delta.C1 = RO(Core)->Delta.C1;

double FRQ = CFlip->Delta.TSC * PRECISION; /* TODO(SourceMe) */
/* Update all clock ratios. */
memcpy(Cpu->Boost, RO(Core)->Boost, (BOOST(SIZE))*sizeof(unsigned int));

CFlip->Absolute.Ratio.Perf = (double)RO(Core)->Ratio.COF.Q;
CFlip->Absolute.Ratio.Perf +=(double)RO(Core)->Ratio.COF.R /UNIT_KHz(1);

/* Compute IPS=Instructions per TSC */
CFlip->State.IPS = (double)CFlip->Delta.INST
/ (double)CFlip->Delta.TSC;
CFlip->State.IPS = (double)CFlip->Delta.INST / FRQ;

/* Compute IPC=Instructions per non-halted reference cycle.
( Protect against a division by zero ) */
Expand All @@ -318,12 +318,10 @@ static void *Core_Cycle(void *arg)
CFlip->State.CPI = 0.0f;
}
/* Compute the Turbo State. */
CFlip->State.Turbo = (double)CFlip->Delta.C0.UCC
/ (double)CFlip->Delta.TSC;
CFlip->State.Turbo = (double)CFlip->Delta.C0.UCC / FRQ;

/* Compute the C-States. */
CFlip->State.C0 = (double)CFlip->Delta.C0.URC
/ (double)CFlip->Delta.TSC;
CFlip->State.C0 = (double)CFlip->Delta.C0.URC / FRQ;

CFlip->State.C3 = (double)CFlip->Delta.C3
/ (double)CFlip->Delta.TSC;
Expand All @@ -340,7 +338,7 @@ static void *Core_Cycle(void *arg)
/* Relative Frequency = Relative Ratio x Bus Clock Frequency */
CFlip->Relative.Ratio = (double)(CFlip->Delta.C0.URC
* Cpu->Boost[BOOST(MAX)])
/ (double)CFlip->Delta.TSC;
/ FRQ;

CFlip->Relative.Freq = REL_FREQ_MHz( double,
CFlip->Relative.Ratio,
Expand Down Expand Up @@ -475,15 +473,12 @@ static void *Child_Thread(void *arg)
RW(SHM_STRUCT) *RW(Shm) = Arg->Ref->RW(Shm);
CPU_STRUCT *Cpu = &RO(Shm)->Cpu[cpu];

CALL_FUNC MatrixCallFunc[2][2] = {
{ CallWith_RDTSC_No_RDPMC, CallWith_RDTSC_RDPMC },
{ CallWith_RDTSCP_No_RDPMC, CallWith_RDTSCP_RDPMC }
CALL_FUNC MatrixCallFunc[2] = {
CallWith_RDTSC_No_RDPMC, CallWith_RDTSC_RDPMC
};
const int withTSCP = ((RO(Shm)->Proc.Features.Inv_TSC == 1)
|| (RO(Shm)->Proc.Features.RDTSCP == 1)),
withRDPMC = ((RO(Shm)->Proc.PM_version >= 1));
const int withRDPMC = ((RO(Shm)->Proc.PM_version >= 1));

CALL_FUNC CallSliceFunc = MatrixCallFunc[withTSCP][withRDPMC];
CALL_FUNC CallSliceFunc = MatrixCallFunc[withRDPMC];

pthread_t tid = pthread_self();
cpu_set_t cpuset;
Expand Down
65 changes: 10 additions & 55 deletions aarch64/corefreqk.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,17 +350,7 @@ unsigned int FixMissingRatioAndFrequency(unsigned int r32, CLOCK *pClock)
return (unsigned int) r64;
}

static unsigned long long
CoreFreqK_Read_CS_From_Invariant_TSC(struct clocksource *cs)
{
unsigned long long TSC __attribute__ ((aligned (8)));
UNUSED(cs);
RDTSCP64(TSC);
return TSC;
}

static unsigned long long
CoreFreqK_Read_CS_From_Variant_TSC(struct clocksource *cs)
static unsigned long long CoreFreqK_Read_CS_From_TSC(struct clocksource *cs)
{
unsigned long long TSC __attribute__ ((aligned (8)));
UNUSED(cs);
Expand Down Expand Up @@ -403,15 +393,7 @@ static long CoreFreqK_Register_ClockSource(unsigned int cpu)
unsigned long long Freq_Hz;
unsigned int Freq_KHz;

if ((PUBLIC(RO(Proc))->Features.Inv_TSC == 1)
|| (PUBLIC(RO(Proc))->Features.RDTSCP == 1))
{
CoreFreqK_CS.read = CoreFreqK_Read_CS_From_Invariant_TSC;
}
else
{
CoreFreqK_CS.read = CoreFreqK_Read_CS_From_Variant_TSC;
}
CoreFreqK_CS.read = CoreFreqK_Read_CS_From_TSC;

Freq_Hz = PUBLIC(RO(Core, AT(cpu)))->Boost[BOOST(MAX)]
* PUBLIC(RO(Core, AT(cpu)))->Clock.Hz;
Expand Down Expand Up @@ -1279,24 +1261,7 @@ void Compute_Interval(void)

#endif

static void ComputeWithSerializedTSC(COMPUTE_ARG *pCompute)
{
unsigned int loop;
/* Writeback and Invalidate Caches. */
WBINVD();
/* Warm-up & Overhead */
for (loop = 0; loop < OCCURRENCES; loop++)
{
CLOCK_OVERHEAD(TSCP, pCompute->TSC[0][loop].V);
}
/* Estimation */
for (loop = 0; loop < OCCURRENCES; loop++)
{
CLOCK_DELAY(1000LLU, TSCP, pCompute->TSC[1][loop].V);
}
}

static void ComputeWithUnSerializedTSC(COMPUTE_ARG *pCompute)
static void Measure_TSC(COMPUTE_ARG *pCompute)
{
unsigned int loop;
/* Writeback and Invalidate Caches. */
Expand All @@ -1323,14 +1288,8 @@ static void Compute_TSC(void *arg)
TSC[0] stores the overhead
TSC[1] stores the estimation
*/
/* Is the TSC invariant or can serialize ? */
if ((PUBLIC(RO(Proc))->Features.Inv_TSC == 1)
|| (PUBLIC(RO(Proc))->Features.RDTSCP == 1))
{
ComputeWithSerializedTSC(pCompute);
} else {
ComputeWithUnSerializedTSC(pCompute);
}
Measure_TSC(pCompute);

/* Select the best clock. */
for (loop = 0; loop < OCCURRENCES; loop++) {
for (what = 0; what < 2; what++) {
Expand Down Expand Up @@ -2421,24 +2380,20 @@ void Generic_Core_Counters_Clear(union SAVE_AREA_CORE *Save, CORE_RO *Core)

#define Counters_Generic(Core, T) \
({ \
volatile unsigned long long UCC, URC; \
RDTSC_COUNTERx3(Core->Counter[T].TSC, \
pmevcntr2_el0, UCC, \
pmccntr_el0, URC, \
pmevcntr2_el0, Core->Counter[T].C0.UCC, \
pmccntr_el0, Core->Counter[T].C0.URC, \
pmevcntr3_el0, Core->Counter[T].INST ); \
/* Normalize Frequencies */ \
Core->Counter[T].C0.UCC = KDIV(UCC, PRECISION); \
Core->Counter[T].C0.URC = KDIV(URC, PRECISION); \
/* Derive C1: */ \
/* Derive C1: \
Core->Counter[T].C1 = \
(Core->Counter[T].TSC > Core->Counter[T].C0.URC) ? \
Core->Counter[T].TSC - Core->Counter[T].C0.URC \
: 0; \
: 0; TODO(FixMe)*/ \
})

#define Mark_OVH(Core) \
({ \
RDTSCP64(Core->Overhead.TSC); \
RDTSC64(Core->Overhead.TSC); \
})

#define Core_OVH(Core) \
Expand Down
9 changes: 0 additions & 9 deletions aarch64/corefreqk.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,21 +99,12 @@ __asm__ volatile \
#define RDTSC_COUNTERx1(mem_tsc, ...) \
ASM_COUNTERx1(x11, x12, ASM_RDTSC, mem_tsc, __VA_ARGS__)

#define RDTSCP_COUNTERx1(mem_tsc, ...) \
ASM_COUNTERx1(x11, x12, ASM_RDTSCP, mem_tsc, __VA_ARGS__)

#define RDTSC_COUNTERx2(mem_tsc, ...) \
ASM_COUNTERx2(x11, x12, x13, ASM_RDTSC, mem_tsc, __VA_ARGS__)

#define RDTSCP_COUNTERx2(mem_tsc, ...) \
ASM_COUNTERx2(x11, x12, x13, ASM_RDTSCP, mem_tsc, __VA_ARGS__)

#define RDTSC_COUNTERx3(mem_tsc, ...) \
ASM_COUNTERx3(x11, x12, x13, x14, ASM_RDTSC, mem_tsc, __VA_ARGS__)

#define RDTSCP_COUNTERx3(mem_tsc, ...) \
ASM_COUNTERx3(x11, x12, x13, x14, ASM_RDTSCP, mem_tsc, __VA_ARGS__)

/* Manufacturers Identifier Strings. */
#define VENDOR_RESERVED "Reserved"
#define VENDOR_ARM "Arm"
Expand Down
44 changes: 0 additions & 44 deletions aarch64/corefreqm.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,6 @@
pSlice->Delta.INST -= overhead; \
})

void CallWith_RDTSCP_RDPMC( RO(SHM_STRUCT) *RO(Shm),
RW(SHM_STRUCT) *RW(Shm),
unsigned int cpu,
SLICE_FUNC SliceFunc,
unsigned long arg )
{
struct SLICE_STRUCT *pSlice = &RO(Shm)->Cpu[cpu].Slice;

RDTSCP_PMCx1(pSlice->Counter[0].TSC,0x40000000,pSlice->Counter[0].INST);

RDTSCP_PMCx1(pSlice->Counter[1].TSC,0x40000000,pSlice->Counter[1].INST);

SliceFunc(RO(Shm), RW(Shm), cpu, arg);

RDTSCP_PMCx1(pSlice->Counter[2].TSC,0x40000000,pSlice->Counter[2].INST);

if (BITVAL(RW(Shm)->Proc.Sync, BURN)) {
DeltaTSC(pSlice);
DeltaINST(pSlice);
}
}

void CallWith_RDTSC_RDPMC( RO(SHM_STRUCT) *RO(Shm),
RW(SHM_STRUCT) *RW(Shm),
unsigned int cpu,
Expand All @@ -79,28 +57,6 @@ void CallWith_RDTSC_RDPMC( RO(SHM_STRUCT) *RO(Shm),
}
}

void CallWith_RDTSCP_No_RDPMC( RO(SHM_STRUCT) *RO(Shm),
RW(SHM_STRUCT) *RW(Shm),
unsigned int cpu,
SLICE_FUNC SliceFunc,
unsigned long arg )
{
struct SLICE_STRUCT *pSlice = &RO(Shm)->Cpu[cpu].Slice;

RDTSCP64(pSlice->Counter[0].TSC);

RDTSCP64(pSlice->Counter[1].TSC);

SliceFunc(RO(Shm), RW(Shm), cpu, arg);

RDTSCP64(pSlice->Counter[2].TSC);

if (BITVAL(RW(Shm)->Proc.Sync, BURN)) {
DeltaTSC(pSlice);
pSlice->Delta.INST = 0;
}
}

void CallWith_RDTSC_No_RDPMC( RO(SHM_STRUCT) *RO(Shm),
RW(SHM_STRUCT) *RW(Shm),
unsigned int cpu,
Expand Down
12 changes: 0 additions & 12 deletions aarch64/corefreqm.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,12 @@ typedef void (*CALL_FUNC)( RO(SHM_STRUCT)*,
SLICE_FUNC,
unsigned long );

void CallWith_RDTSCP_RDPMC( RO(SHM_STRUCT)*,
RW(SHM_STRUCT)*,
unsigned int,
SLICE_FUNC,
unsigned long );

void CallWith_RDTSC_RDPMC( RO(SHM_STRUCT)*,
RW(SHM_STRUCT)*,
unsigned int,
SLICE_FUNC,
unsigned long );

void CallWith_RDTSCP_No_RDPMC( RO(SHM_STRUCT)*,
RW(SHM_STRUCT)*,
unsigned int,
SLICE_FUNC,
unsigned long );

void CallWith_RDTSC_No_RDPMC( RO(SHM_STRUCT)*,
RW(SHM_STRUCT)*,
unsigned int,
Expand Down

0 comments on commit baff80f

Please sign in to comment.