diff --git a/.github/workflows/win-package-test.yml b/.github/workflows/win-package-test.yml index 2f44e57aac..6a7735ca05 100644 --- a/.github/workflows/win-package-test.yml +++ b/.github/workflows/win-package-test.yml @@ -287,6 +287,42 @@ jobs: if: ${{ always() && matrix.WITH_FLUENTD == 'true' }} run: Get-Content -Path "${env:SYSTEMDRIVE}\opt\td-agent\td-agent.log" + windows-script-upgrade-test: + strategy: + matrix: + OS: [ "windows-2022", "windows-2025" ] + runs-on: otel-windows # Tests with uninstall require a more powerful runner + needs: [msi-test, windows-install-script-test] + timeout-minutes: 45 + steps: + - name: Check out the codebase. + uses: actions/checkout@v4 + + - name: Downloading msi build + uses: actions/download-artifact@v4 + with: + name: msi-build-${{ matrix.OS }} + path: ./dist + - name: Ensure required ports in the dynamic range are available + run: | + $ErrorActionPreference = 'Continue' + & ${{ github.workspace }}\.github\workflows\scripts\win-required-ports.ps1 + + - name: Set the MSI_COLLECTOR_PATH and INSTALL_SCRIPT_PATH environment variable + run: | + $ErrorActionPreference = 'Stop' + $msi_path = Resolve-Path .\dist\splunk-otel-collector*.msi + Test-Path $msi_path + "MSI_COLLECTOR_PATH=$msi_path" | Out-File -FilePath $env:GITHUB_ENV -Append + $ps1_path = Resolve-Path .\packaging\installer\install.ps1 + Test-Path $ps1_path + "INSTALL_SCRIPT_PATH=$ps1_path" | Out-File -FilePath $env:GITHUB_ENV -Append + + - name: Run the script upgrade tests + shell: powershell # Use PowerShell to run the script since running the go test from pwsh.exe hides the Get-ExecutionPolicy cmdlet. + run: | + go test -v github.com/signalfx/splunk-otel-collector/tests/windows-install-script + choco-build: runs-on: ${{ matrix.OS }} strategy: diff --git a/packaging/installer/install.ps1 b/packaging/installer/install.ps1 index 8c15a86c15..8b26386d89 100644 --- a/packaging/installer/install.ps1 +++ b/packaging/installer/install.ps1 @@ -124,6 +124,10 @@ If specified, the -mode parameter will be ignored. .EXAMPLE .\install.ps1 -config_path "C:\SOME_FOLDER\my_config.yaml" +.PARAMETER preserve_prev_default_config + (OPTIONAL) Preserve the default configuration files, located at `$Env:ProgramData\Splunk\OpenTelemetry Collector`, of previous version when upgrading the collector. By default it is $false since version changes can include breaking configuration changes. + .EXAMPLE + .\install.ps1 -preserve_prev_default_config $true #> param ( @@ -145,6 +149,7 @@ param ( [string]$msi_path = "", [string]$msi_public_properties = "", [string]$config_path = "", + [bool]$preserve_prev_default_config = $false, [string]$collector_msi_url = "", [string]$fluentd_msi_url = "", [string]$dotnet_psm1_path = "", @@ -154,6 +159,8 @@ param ( [bool]$UNIT_TEST = $false ) +New-Variable -Name UninstallWildcardRegPath -Option Constant -Value "HKLM:\Software\Microsoft\Windows\CurrentVersion\Uninstall\*" +New-Variable -Name CollectorServiceDisplayName -Option Constant -Value "Splunk OpenTelemetry Collector" $arch = "amd64" $format = "msi" $service_name = "splunk-otel-collector" @@ -370,8 +377,29 @@ function download_collector_package([string]$collector_version=$collector_versio } # check registry for the agent msi package -function msi_installed([string]$name) { - return (Get-ItemProperty HKLM:\Software\Microsoft\Windows\CurrentVersion\Uninstall\* | Where { $_.DisplayName -eq $name }) -ne $null +function is_msi_installed([string]$product_name) { + return $null -ne (Get-ItemProperty $UninstallWildcardRegPath | Where { $_.DisplayName -eq $product_name }) +} + +function get_msi_installation_sids([string]$product_name) { + $sids = [string[]]@() + + $uninstallEntry = Get-ItemProperty $UninstallWildcardRegPath -ErrorAction SilentlyContinue | + Where-Object { $_.DisplayName -eq $product_name } + if ($uninstallEntry) { + $userInstalls = Get-ItemProperty 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Installer\UserData\*\Products\*\InstallProperties' -ErrorAction SilentlyContinue | + Where-Object { $_.DisplayName -eq $product_name } + foreach ($user in $userInstalls) { + # Not all entries are valid user SIDS, e.g.: some are SIDs with suffixes like "_Classes" + # We only want the SIDs. + if ($user.PSPath -match 'UserData\\(?S-1-[0-9\-]+)') { + $sid = $Matches['SID'] + $sids += , @($sid) + } + } + } + + return $sids } function update_registry([string]$path, [string]$name, [string]$value) { @@ -413,6 +441,21 @@ function install_msi([string]$path) { Write-Host "- Done" } +function uninstall_msi([string]$product_name) { + Write-Host "Uninstalling $product_name ..." + $uninstall_entry = Get-ItemProperty $UninstallWildcardRegPath -ErrorAction SilentlyContinue | + Where-Object { $_.DisplayName -eq $product_name } | Select-Object -First 1 + if (-not $uninstall_entry) { + throw "Failed to find the uninstall registry entry for $product_name" + } + $proc = (Start-Process msiexec.exe -Wait -PassThru -ArgumentList "/X `"$($uninstall_entry.PSChildName)`" /qn /norestart") + if ($proc.ExitCode -ne 0) { + Write-Warning "The uninstall attempt failed with error code $($proc.ExitCode)." + Exit $proc.ExitCode + } + Write-Host "- Done" +} + $ErrorActionPreference = 'Stop'; # stop on all errors # check administrator status @@ -427,12 +470,47 @@ if (!(check_if_admin)) { echo 'Checking execution policy' check_policy -if (msi_installed -name "Splunk OpenTelemetry Collector") { - throw "The Splunk OpenTelemetry Collector is already installed. Remove or uninstall the Collector and rerun this script." -} - if (Get-Service -Name $service_name -ErrorAction SilentlyContinue) { - throw "The $service_name service is already installed. Remove or uninstall the Collector and rerun this script." + Write-Host "The $service_name service is already installed. Checking installation for automatic update." + + $uninstall_collector = $true + $collector_sids = get_msi_installation_sids -product_name $CollectorServiceDisplayName + if ($collector_sids.Count -eq 0) { + $uninstall_collector = $false + Write-Warning "The $service_name service exists but it is not on the Windows installation database." + } + else { + if ($collector_sids.Count -gt 1) { + $sids_list = $collector_sids -join ", " + throw "The $CollectorServiceDisplayName is already installed for multiple users (SIDs: $sids_list). Uninstall the collector and remove remaining users installations from the registry." + } + + # "S-1-5-18" is the SID for the Local System account, which is used for machine-wide installations. + if ("S-1-5-18" -ne $collector_sids[0]) { + # not a machine wide installation, check if it is the same user + $currentUser = [System.Security.Principal.WindowsIdentity]::GetCurrent() + $currentUserSID = $currentUser.User.Value + if ($currentUserSID -ne $collector_sids[0]) { + $sid = New-Object System.Security.Principal.SecurityIdentifier($userSid) + $user = $sid.Translate([System.Security.Principal.NTAccount]) + throw "The $CollectorServiceDisplayName was last installed by '${user.Value}' it must be updated or uninstalled by the same user." + } + } + } + + Write-Host "Stopping $service_name service..." + stop_service -name "$service_name" + if ($uninstall_collector) { + uninstall_msi -product_name $CollectorServiceDisplayName + } + if (-not $preserve_prev_default_config) { + $default_config_files = @("agent_config.yaml", "gateway_config.yaml") + foreach ($file in $default_config_files) { + $target = Join-Path "${Env:ProgramData}\Splunk\OpenTelemetry Collector" "$file" + Write-Host "Deleting previous version default configuration file '$target'" + Remove-Item -Path $target + } + } } if ($with_fluentd -And (Get-Service -name $fluentd_service_name -ErrorAction SilentlyContinue)) { @@ -447,7 +525,7 @@ if ($with_fluentd -And (Test-Path -Path "$fluentd_base_dir\bin\fluentd")) { $tempdir = create_temp_dir -tempdir $tempdir if ($with_dotnet_instrumentation) { - if ((msi_installed -name "SignalFx .NET Tracing 64-bit") -Or (msi_installed -name "SignalFx .NET Tracing 32-bit")) { + if ((is_msi_installed -name "SignalFx .NET Tracing 64-bit") -Or (is_msi_installed -name "SignalFx .NET Tracing 32-bit")) { throw "SignalFx .NET Instrumentation is already installed. Stop all instrumented applications and uninstall SignalFx Instrumentation for .NET before running this script again." } echo "Downloading Splunk Distribution of OpenTelemetry .NET ..." @@ -590,7 +668,7 @@ if ($network_interface -Ne "") { set_service_environment $service_name $collector_env_vars $message = " -The Splunk OpenTelemetry Collector for Windows has been successfully installed. +The $CollectorServiceDisplayName for Windows has been successfully installed. Make sure that your system's time is relatively accurate or else datapoints may not be accepted. The collector's main configuration file is located at $config_path, and the environment variables are stored in the $regkey registry key. @@ -643,7 +721,7 @@ if ($with_fluentd) { install_msi -path "$fluentd_msi_path" $message = " -Fluentd has been installed and configured to forward log events to the Splunk OpenTelemetry Collector. +Fluentd has been installed and configured to forward log events to the $CollectorServiceDisplayName. By default, all log events with the @SPLUNK label will be forwarded to the collector. The main fluentd configuration file is located at $fluentd_config_path. @@ -704,7 +782,7 @@ if ($with_dotnet_instrumentation) { } $message = " -Splunk Distribution of OpenTelemetry for .NET has been installed and configured to forward traces to the Splunk OpenTelemetry Collector. +Splunk Distribution of OpenTelemetry for .NET has been installed and configured to forward traces to the $CollectorServiceDisplayName. By default, the .NET instrumentation will automatically generate telemetry only for .NET applications running on IIS. " echo "$message" diff --git a/tests/windows-install-script/windows_install_script_test.go b/tests/windows-install-script/windows_install_script_test.go new file mode 100644 index 0000000000..1154ce15b6 --- /dev/null +++ b/tests/windows-install-script/windows_install_script_test.go @@ -0,0 +1,170 @@ +// Copyright Splunk, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package windows_install_script + +import ( + "fmt" + "os" + "os/exec" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + "golang.org/x/sys/windows/registry" + "golang.org/x/sys/windows/svc" + "golang.org/x/sys/windows/svc/mgr" +) + +const ( + // Old version to install first, this version by default is not installed as machine-wide. + oldCollectorVersion = "0.94.0" + // Service name + serviceName = "splunk-otel-collector" + // Service display name + serviceDisplayName = "Splunk OpenTelemetry Collector" +) + +func TestUpgradeFromNonMachineWideVersion(t *testing.T) { + t.Setenv("VERIFY_ACCESS_TOKEN", "false") + + requireNoPendingFileOperations(t) + + scm, err := mgr.Connect() + require.NoError(t, err) + defer scm.Disconnect() + + t.Logf(" *** Installing old collector version %s", oldCollectorVersion) + installCollector(t, oldCollectorVersion, "") + verifyServiceExists(t, scm) + verifyServiceState(t, scm, svc.Running) + legacySvcVersion := getCurrentServiceVersion(t) + require.Equal(t, oldCollectorVersion, legacySvcVersion) + + msiInstallerPath := getFilePathFromEnvVar(t, "MSI_COLLECTOR_PATH") + t.Logf(" *** Installing collector from %q", msiInstallerPath) + installCollector(t, "", msiInstallerPath) + verifyServiceExists(t, scm) + verifyServiceState(t, scm, svc.Running) + latestSvcVersion := getCurrentServiceVersion(t) + require.NotEqual(t, oldCollectorVersion, latestSvcVersion) + requireNoPendingFileOperations(t) +} + +func installCollector(t *testing.T, version string, msiPath string) { + require.False(t, version == "" && msiPath == "", "Either version or msiPath must be provided") + require.False(t, version != "" && msiPath != "", "Only one of version or msiPath should be provided") + args := []string{ + "-ExecutionPolicy", "Bypass", + "-File", getFilePathFromEnvVar(t, "INSTALL_SCRIPT_PATH"), + "-access_token", "fake-token", + } + + if version != "" { + args = append(args, "-collector_version", version) + } else if msiPath != "" { + args = append(args, "-msi_path", msiPath) + } else { + require.Fail(t, "Either version or msiPath must be provided") + } + + cmd := exec.Command("powershell.exe", args...) + + output, err := cmd.CombinedOutput() + t.Logf("Install output: %s", string(output)) + require.NoError(t, err, "Failed to install collector (version:%q msiPath:%q)", version, msiPath) +} + +func verifyServiceExists(t *testing.T, scm *mgr.Mgr) { + service, err := scm.OpenService(serviceName) + require.NoError(t, err) + service.Close() +} + +func verifyServiceState(t *testing.T, scm *mgr.Mgr, desiredState svc.State) { + service, err := scm.OpenService(serviceName) + require.NoError(t, err) + defer service.Close() + + // Wait for the service to reach the running state + require.Eventually(t, func() bool { + status, err := service.Query() + require.NoError(t, err) + return status.State == desiredState + }, 10*time.Second, 500*time.Millisecond, "Service failed to reach the desired state") +} + +func getCurrentServiceVersion(t *testing.T) string { + // Read the service version from the registry, need to find the GUID registry key + // given the service name. + key, err := registry.OpenKey(registry.LOCAL_MACHINE, `Software\Microsoft\Windows\CurrentVersion\Uninstall`, registry.ALL_ACCESS) + require.NoError(t, err) + defer key.Close() + + // Enumerate all subkeys to find the one that matches the service name + subKeys, err := key.ReadSubKeyNames(0) + require.NoError(t, err) + + for _, subKey := range subKeys { + subKeyPath := fmt.Sprintf(`Software\Microsoft\Windows\CurrentVersion\Uninstall\%s`, subKey) + subKeyHandle, err := registry.OpenKey(registry.LOCAL_MACHINE, subKeyPath, registry.QUERY_VALUE) + if err != nil { + continue + } + defer subKeyHandle.Close() + + displayName, _, err := subKeyHandle.GetStringValue("DisplayName") + if err == nil && strings.Contains(displayName, serviceDisplayName) { + // Found the subkey for the service, now get the version + version, _, err := subKeyHandle.GetStringValue("DisplayVersion") + require.NoError(t, err) + return version + } + } + + require.Fail(t, "Failed to find service version in registry") + return "" +} + +func requireNoPendingFileOperations(t *testing.T) { + // Check for pending file rename operations + pendingFileRenameKey, err := registry.OpenKey( + registry.LOCAL_MACHINE, `SYSTEM\CurrentControlSet\Control\Session Manager`, registry.QUERY_VALUE) + require.NoError(t, err) + defer pendingFileRenameKey.Close() + pendingFileRenameEntries, _, err := pendingFileRenameKey.GetStringsValue("PendingFileRenameOperations") + if err != nil { + require.ErrorIs(t, err, registry.ErrNotExist) + } + + for _, fileName := range pendingFileRenameEntries { + if strings.Contains(strings.ToLower(fileName), "splunk") { + require.Fail(t, "Found pending file rename: %s", fileName) + } + } +} + +func getFilePathFromEnvVar(t *testing.T, envVar string) string { + filePath := os.Getenv(envVar) + require.NotEmpty(t, filePath, "%s environment variable is not set", envVar) + _, err := os.Stat(filePath) + require.NoError(t, err, "File %s does not exist", filePath) + if strings.Contains(filePath, " ") { + filePath = "\"" + filePath + "\"" + } + return filePath +}