[edk2-devel] [PATCH] UefiCpuPkg/Feature: Support different thread count per core

Laszlo Ersek lersek at redhat.com
Thu Dec 3 10:12:19 UTC 2020


On 12/02/20 12:55, Ray Ni wrote:
> Today's code assumes every core contains the same number of threads.
> It's not always TRUE for certain model.
> Such assumption causes system hang when thread count per core
> is different and there is core or package dependency between CPU
> features (using CPU_FEATURE_CORE_BEFORE/AFTER,
> CPU_FEATURE_PACKAGE_BEFORE/AFTER).
> 
> The change removes such assumption by calculating the actual thread
> count per package and per core.
> 
> Signed-off-by: Ray Ni <ray.ni at intel.com>
> Cc: Eric Dong <eric.dong at intel.com>
> Cc: Yun Lou <yun.lou at intel.com>
> Cc: Laszlo Ersek <lersek at redhat.com>
> ---
>  UefiCpuPkg/Include/AcpiCpuData.h              |  16 ++-
>  .../CpuFeaturesInitialize.c                   | 113 ++++++++++--------
>  UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c             |  73 ++++++-----
>  3 files changed, 119 insertions(+), 83 deletions(-)

Acked-by: Laszlo Ersek <lersek at redhat.com>

Thanks
Laszlo

> diff --git a/UefiCpuPkg/Include/AcpiCpuData.h b/UefiCpuPkg/Include/AcpiCpuData.h
> index 77da5d4455..b5a69ad80c 100644
> --- a/UefiCpuPkg/Include/AcpiCpuData.h
> +++ b/UefiCpuPkg/Include/AcpiCpuData.h
> @@ -1,7 +1,7 @@
>  /** @file
>  Definitions for CPU S3 data.
>  
> -Copyright (c) 2013 - 2018, Intel Corporation. All rights reserved.<BR>
> +Copyright (c) 2013 - 2020, Intel Corporation. All rights reserved.<BR>
>  SPDX-License-Identifier: BSD-2-Clause-Patent
>  
>  **/
> @@ -60,14 +60,24 @@ typedef struct {
>    UINT32                      MaxThreadCount;
>    //
>    // This field points to an array.
> -  // This array saves valid core count (type UINT32) of each package.
> +  // This array saves thread count (type UINT32) of each package.
>    // The array has PackageCount elements.
>    //
>    // If the platform does not support MSR setting at S3 resume, and
>    // therefore it doesn't need the dependency semaphores, it should set
>    // this field to 0.
>    //
> -  EFI_PHYSICAL_ADDRESS        ValidCoreCountPerPackage;
> +  EFI_PHYSICAL_ADDRESS        ThreadCountPerPackage;
> +  //
> +  // This field points to an array.
> +  // This array saves thread count (type UINT8) of each core.
> +  // The array has PackageCount * MaxCoreCount elements.
> +  //
> +  // If the platform does not support MSR setting at S3 resume, and
> +  // therefore it doesn't need the dependency semaphores, it should set
> +  // this field to 0.
> +  //
> +  EFI_PHYSICAL_ADDRESS        ThreadCountPerCore;
>  } CPU_STATUS_INFORMATION;
>  
>  //
> diff --git a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> index 5c673fa8cf..0cce909cc0 100644
> --- a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> +++ b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> @@ -103,14 +103,13 @@ CpuInitDataInitialize (
>    UINT32                               Package;
>    UINT32                               Thread;
>    EFI_CPU_PHYSICAL_LOCATION            *Location;
> -  BOOLEAN                              *CoresVisited;
> -  UINTN                                Index;
>    UINT32                               PackageIndex;
>    UINT32                               CoreIndex;
>    UINT32                               First;
>    ACPI_CPU_DATA                        *AcpiCpuData;
>    CPU_STATUS_INFORMATION               *CpuStatus;
> -  UINT32                               *ValidCoreCountPerPackage;
> +  UINT32                               *ThreadCountPerPackage;
> +  UINT8                                *ThreadCountPerCore;
>    UINTN                                NumberOfCpus;
>    UINTN                                NumberOfEnabledProcessors;
>  
> @@ -202,35 +201,32 @@ CpuInitDataInitialize (
>    //
>    // Collect valid core count in each package because not all cores are valid.
>    //
> -  ValidCoreCountPerPackage= AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
> -  ASSERT (ValidCoreCountPerPackage != 0);
> -  CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ValidCoreCountPerPackage;
> -  CoresVisited = AllocatePool (sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
> -  ASSERT (CoresVisited != NULL);
> -
> -  for (Index = 0; Index < CpuStatus->PackageCount; Index ++ ) {
> -    ZeroMem (CoresVisited, sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
> -    //
> -    // Collect valid cores in Current package.
> -    //
> -    for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
> -      Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
> -      if (Location->Package == Index && !CoresVisited[Location->Core] ) {
> -        //
> -        // The ValidCores position for Location->Core is valid.
> -        // The possible values in ValidCores[Index] are 0 or 1.
> -        // FALSE means no valid threads in this Core.
> -        // TRUE means have valid threads in this core, no matter the thead count is 1 or more.
> -        //
> -        CoresVisited[Location->Core] = TRUE;
> -        ValidCoreCountPerPackage[Index]++;
> -      }
> -    }
> +  ThreadCountPerPackage = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
> +  ASSERT (ThreadCountPerPackage != NULL);
> +  CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerPackage;
> +
> +  ThreadCountPerCore = AllocateZeroPool (sizeof (UINT8) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount);
> +  ASSERT (ThreadCountPerCore != NULL);
> +  CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerCore;
> +
> +  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
> +    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
> +    ThreadCountPerPackage[Location->Package]++;
> +    ThreadCountPerCore[Location->Package * CpuStatus->MaxCoreCount + Location->Core]++;
>    }
> -  FreePool (CoresVisited);
>  
> -  for (Index = 0; Index <= Package; Index++) {
> -    DEBUG ((DEBUG_INFO, "Package: %d, Valid Core : %d\n", Index, ValidCoreCountPerPackage[Index]));
> +  for (PackageIndex = 0; PackageIndex < CpuStatus->PackageCount; PackageIndex++) {
> +    if (ThreadCountPerPackage[PackageIndex] != 0) {
> +      DEBUG ((DEBUG_INFO, "P%02d: Thread Count = %d\n", PackageIndex, ThreadCountPerPackage[PackageIndex]));
> +      for (CoreIndex = 0; CoreIndex < CpuStatus->MaxCoreCount; CoreIndex++) {
> +        if (ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex] != 0) {
> +          DEBUG ((
> +            DEBUG_INFO, "  P%02d C%04d, Thread Count = %d\n", PackageIndex, CoreIndex, 
> +            ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex]
> +            ));
> +        }
> +      }
> +    }
>    }
>  
>    CpuFeaturesData->CpuFlags.CoreSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);
> @@ -894,11 +890,11 @@ ProgramProcessorRegister (
>    CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;
>    volatile UINT32           *SemaphorePtr;
>    UINT32                    FirstThread;
> -  UINT32                    PackageThreadsCount;
>    UINT32                    CurrentThread;
> +  UINT32                    CurrentCore;
>    UINTN                     ProcessorIndex;
> -  UINTN                     ValidThreadCount;
> -  UINT32                    *ValidCoreCountPerPackage;
> +  UINT32                    *ThreadCountPerPackage;
> +  UINT8                     *ThreadCountPerCore;
>    EFI_STATUS                Status;
>    UINT64                    CurrentValue;
>  
> @@ -1029,28 +1025,44 @@ ProgramProcessorRegister (
>        switch (RegisterTableEntry->Value) {
>        case CoreDepType:
>          SemaphorePtr = CpuFlags->CoreSemaphoreCount;
> +        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
> +
> +        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
>          //
>          // Get Offset info for the first thread in the core which current thread belongs to.
>          //
> -        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
> +        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;
>          CurrentThread = FirstThread + ApLocation->Thread;
> +
>          //
> -        // First Notify all threads in current Core that this thread has ready.
> +        // Different cores may have different valid threads in them. If driver maintail clearly
> +        // thread index in different cores, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all cores and use it as expect
> +        // thread number for all cores.
> +        // In below two steps logic, first current thread will Release semaphore for each thread
> +        // in current core. Maybe some threads are not valid in this core, but driver don't
> +        // care. Second, driver will let current thread wait semaphore for all valid threads in
> +        // current core. Because only the valid threads will do release semaphore for this
> +        // thread, driver here only need to wait the valid thread count.
> +        //
> +
> +        //
> +        // First Notify ALL THREADs in current Core that this thread is ready.
>          //
>          for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> -          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
> +          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether all valid threads in current core have ready.
> +        // Second, check whether all VALID THREADs (not all threads) in current core are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
>            LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
>  
>        case PackageDepType:
>          SemaphorePtr = CpuFlags->PackageSemaphoreCount;
> -        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
> +        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
>          //
>          // Get Offset info for the first thread in the package which current thread belongs to.
>          //
> @@ -1058,18 +1070,13 @@ ProgramProcessorRegister (
>          //
>          // Get the possible threads count for current package.
>          //
> -        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
>          CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
> -        //
> -        // Get the valid thread count for current package.
> -        //
> -        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
>  
>          //
> -        // Different packages may have different valid cores in them. If driver maintail clearly
> -        // cores number in different packages, the logic will be much complicated.
> -        // Here driver just simply records the max core number in all packages and use it as expect
> -        // core number for all packages.
> +        // Different packages may have different valid threads in them. If driver maintail clearly
> +        // thread index in different packages, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all packages and use it as expect
> +        // thread number for all packages.
>          // In below two steps logic, first current thread will Release semaphore for each thread
>          // in current package. Maybe some threads are not valid in this package, but driver don't
>          // care. Second, driver will let current thread wait semaphore for all valid threads in
> @@ -1078,15 +1085,15 @@ ProgramProcessorRegister (
>          //
>  
>          //
> -        // First Notify ALL THREADS in current package that this thread has ready.
> +        // First Notify ALL THREADS in current package that this thread is ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
> -          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
> +        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
> +          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether VALID THREADS (not all threads) in current package have ready.
> +        // Second, check whether VALID THREADS (not all threads) in current package are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
>            LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> index 29e9ba92b4..9592430636 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> @@ -1,7 +1,7 @@
>  /** @file
>  Code for Processor S3 restoration
>  
> -Copyright (c) 2006 - 2019, Intel Corporation. All rights reserved.<BR>
> +Copyright (c) 2006 - 2020, Intel Corporation. All rights reserved.<BR>
>  SPDX-License-Identifier: BSD-2-Clause-Patent
>  
>  **/
> @@ -235,11 +235,11 @@ ProgramProcessorRegister (
>    CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;
>    volatile UINT32           *SemaphorePtr;
>    UINT32                    FirstThread;
> -  UINT32                    PackageThreadsCount;
>    UINT32                    CurrentThread;
> +  UINT32                    CurrentCore;
>    UINTN                     ProcessorIndex;
> -  UINTN                     ValidThreadCount;
> -  UINT32                    *ValidCoreCountPerPackage;
> +  UINT32                    *ThreadCountPerPackage;
> +  UINT8                     *ThreadCountPerCore;
>    EFI_STATUS                Status;
>    UINT64                    CurrentValue;
>  
> @@ -372,35 +372,52 @@ ProgramProcessorRegister (
>        //
>        ASSERT (
>          (ApLocation != NULL) &&
> -        (CpuStatus->ValidCoreCountPerPackage != 0) &&
> +        (CpuStatus->ThreadCountPerPackage != 0) &&
> +        (CpuStatus->ThreadCountPerCore != 0) &&
>          (CpuFlags->CoreSemaphoreCount != NULL) &&
>          (CpuFlags->PackageSemaphoreCount != NULL)
>          );
>        switch (RegisterTableEntry->Value) {
>        case CoreDepType:
>          SemaphorePtr = CpuFlags->CoreSemaphoreCount;
> +        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
> +
> +        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
>          //
>          // Get Offset info for the first thread in the core which current thread belongs to.
>          //
> -        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
> +        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;
>          CurrentThread = FirstThread + ApLocation->Thread;
> +
>          //
> -        // First Notify all threads in current Core that this thread has ready.
> +        // Different cores may have different valid threads in them. If driver maintail clearly
> +        // thread index in different cores, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all cores and use it as expect
> +        // thread number for all cores.
> +        // In below two steps logic, first current thread will Release semaphore for each thread
> +        // in current core. Maybe some threads are not valid in this core, but driver don't
> +        // care. Second, driver will let current thread wait semaphore for all valid threads in
> +        // current core. Because only the valid threads will do release semaphore for this
> +        // thread, driver here only need to wait the valid thread count.
> +        //
> +
> +        //
> +        // First Notify ALL THREADs in current Core that this thread is ready.
>          //
>          for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
>            S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether all valid threads in current core have ready.
> +        // Second, check whether all VALID THREADs (not all threads) in current core are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
>            S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
>  
>        case PackageDepType:
>          SemaphorePtr = CpuFlags->PackageSemaphoreCount;
> -        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
> +        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
>          //
>          // Get Offset info for the first thread in the package which current thread belongs to.
>          //
> @@ -408,18 +425,13 @@ ProgramProcessorRegister (
>          //
>          // Get the possible threads count for current package.
>          //
> -        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
>          CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
> -        //
> -        // Get the valid thread count for current package.
> -        //
> -        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
>  
>          //
> -        // Different packages may have different valid cores in them. If driver maintail clearly
> -        // cores number in different packages, the logic will be much complicated.
> -        // Here driver just simply records the max core number in all packages and use it as expect
> -        // core number for all packages.
> +        // Different packages may have different valid threads in them. If driver maintail clearly
> +        // thread index in different packages, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all packages and use it as expect
> +        // thread number for all packages.
>          // In below two steps logic, first current thread will Release semaphore for each thread
>          // in current package. Maybe some threads are not valid in this package, but driver don't
>          // care. Second, driver will let current thread wait semaphore for all valid threads in
> @@ -428,15 +440,15 @@ ProgramProcessorRegister (
>          //
>  
>          //
> -        // First Notify all threads in current package that this thread has ready.
> +        // First Notify ALL THREADS in current package that this thread is ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
>            S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether all valid threads in current package have ready.
> +        // Second, check whether VALID THREADS (not all threads) in current package are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
>            S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
> @@ -1059,12 +1071,19 @@ GetAcpiCpuData (
>  
>    CpuStatus = &mAcpiCpuData.CpuStatus;
>    CopyMem (CpuStatus, &AcpiCpuData->CpuStatus, sizeof (CPU_STATUS_INFORMATION));
> -  if (AcpiCpuData->CpuStatus.ValidCoreCountPerPackage != 0) {
> -    CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> +  if (AcpiCpuData->CpuStatus.ThreadCountPerPackage != 0) {
> +    CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
>                                              sizeof (UINT32) * CpuStatus->PackageCount,
> -                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ValidCoreCountPerPackage
> +                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerPackage
> +                                            );
> +    ASSERT (CpuStatus->ThreadCountPerPackage != 0);
> +  }
> +  if (AcpiCpuData->CpuStatus.ThreadCountPerCore != 0) {
> +    CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> +                                            sizeof (UINT8) * (CpuStatus->PackageCount * CpuStatus->MaxCoreCount),
> +                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerCore
>                                              );
> -    ASSERT (CpuStatus->ValidCoreCountPerPackage != 0);
> +    ASSERT (CpuStatus->ThreadCountPerCore != 0);
>    }
>    if (AcpiCpuData->ApLocation != 0) {
>      mAcpiCpuData.ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> 



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#68234): https://edk2.groups.io/g/devel/message/68234
Mute This Topic: https://groups.io/mt/78657549/1813853
Group Owner: devel+owner at edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [edk2-devel-archive at redhat.com]
-=-=-=-=-=-=-=-=-=-=-=-





More information about the edk2-devel-archive mailing list