From c60b702e016b845ab77564a9e40b3a6b64e17188 Mon Sep 17 00:00:00 2001 From: Steven Fairchild Date: Wed, 10 Apr 2024 11:24:40 -0400 Subject: [PATCH] Update RP and Gateway vmss OS image to cbl-mariner-2-gen2-fips. Restructure VMSS bootstrap bash scripts for increased reliability, and easier debugging Move all shared code into a commonly shared file to be sourced by all bootstrapping scripts. This allows for code reuse, minimal duplication. Fix mdm mdsd certificate download script Increase rpm retry time to 30 minutes total, every 30 seconds. Install Azure Security Monitor via VMSS Extension Remove RHUI and Microsoft repo configuration, add Mariner Extended repo config Remove lvm disk resize Mariner does not use lvm, the disk is automatically grown to the full size specified. Firewalld configuration has been removed, as Mariner does not have the requirements to support the nftables backend. Firewall rules will be configured at the vnet level in Azure. Remove semanage Mariner Linux does not have selinux configured. Add changes to remove CHECKACCESS Merged in PR https://github.com/Azure/ARO-RP/pull/3643 Remove gateway log rotation config Log rotation for the podman level driver log was not the correct approach. The podman log driver is now journald, so all logs will be shipped to journald rather than a ctr.log file. During mdm and mdsd setup, I've added wait steps for the download scripts to complete getting certificates. Without this, the download scripts run in a subshell and fixing up the certificates fails. Add firewalld configuration, required for podman networking Add podman aro network creation to isolate RP containers from possible interaction on the default podman network. --- pkg/deploy/assets/env-development.json | 2 +- pkg/deploy/assets/gateway-production.json | 15 +- pkg/deploy/assets/rp-production.json | 15 +- pkg/deploy/generator/resources_dev.go | 6 +- pkg/deploy/generator/resources_gateway.go | 31 +- pkg/deploy/generator/resources_rp.go | 32 +- pkg/deploy/generator/scripts.go | 12 + pkg/deploy/generator/scripts/devProxyVMSS.sh | 1 + pkg/deploy/generator/scripts/gatewayVMSS.sh | 621 ++++----------- pkg/deploy/generator/scripts/rpVMSS.sh | 745 ++++-------------- pkg/deploy/generator/scripts/util-common.sh | 132 ++++ pkg/deploy/generator/scripts/util-packages.sh | 125 +++ pkg/deploy/generator/scripts/util-services.sh | 673 ++++++++++++++++ pkg/deploy/generator/scripts/util-system.sh | 300 +++++++ pkg/deploy/generator/scripts/util.sh | 33 + 15 files changed, 1657 insertions(+), 1086 deletions(-) create mode 100644 pkg/deploy/generator/scripts/util-common.sh create mode 100644 pkg/deploy/generator/scripts/util-packages.sh create mode 100644 pkg/deploy/generator/scripts/util-services.sh create mode 100644 pkg/deploy/generator/scripts/util-system.sh create mode 100644 pkg/deploy/generator/scripts/util.sh diff --git a/pkg/deploy/assets/env-development.json b/pkg/deploy/assets/env-development.json index ef9a60f496e..49f3529ad50 100644 --- a/pkg/deploy/assets/env-development.json +++ b/pkg/deploy/assets/env-development.json @@ -367,7 +367,7 @@ "autoUpgradeMinorVersion": true, "settings": {}, "protectedSettings": { - "script": "[base64(concat(base64ToString('c2V0IC1leAoK'),'PROXYIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('proxyImage')),''')\n','PROXYIMAGEAUTH=$(base64 -d \u003c\u003c\u003c''',base64(parameters('proxyImageAuth')),''')\n','PROXYCERT=''',parameters('proxyCert'),'''\n','PROXYCLIENTCERT=''',parameters('proxyClientCert'),'''\n','PROXYKEY=''',parameters('proxyKey'),'''\n','\n',base64ToString('I0FkZGluZyByZXRyeSBsb2dpYyB0byB5dW0gY29tbWFuZHMgaW4gb3JkZXIgdG8gYXZvaWQgc3RhbGxpbmcgb3V0IG9uIHJlc291cmNlIGxvY2tzCmVjaG8gImluc3RhbGxpbmcgbW9ieS1lbmdpbmUgKGRvY2tlcikiCmZvciBhdHRlbXB0IGluIHsxLi42MH07IGRvCgl0ZG5mIGluc3RhbGwgLXkgbW9ieS1lbmdpbmUgbW9ieS1jbGkgJiYgYnJlYWsKCWlmIFtbICR7YXR0ZW1wdH0gLWx0IDYwIF1dOyB0aGVuIHNsZWVwIDMwOyBlbHNlIGV4aXQgMTsgZmkKZG9uZQoKc3lzdGVtY3RsIGVuYWJsZSBkb2NrZXIKc3lzdGVtY3RsIHN0YXJ0IGRvY2tlcgoKbWtkaXIgL3Jvb3QvLmRvY2tlcgpjYXQgPi9yb290Ly5kb2NrZXIvY29uZmlnLmpzb24gPDxFT0YKewoJImF1dGhzIjogewoJCSIke1BST1hZSU1BR0UlJS8qfSI6IHsKCQkJImF1dGgiOiAiJFBST1hZSU1BR0VBVVRIIgoJCX0KCX0KfQpFT0YKCmRvY2tlciBwdWxsICIkUFJPWFlJTUFHRSIKCm1rZGlyIC9ldGMvcHJveHkKYmFzZTY0IC1kIDw8PCIkUFJPWFlDRVJUIiA+L2V0Yy9wcm94eS9wcm94eS5jcnQKYmFzZTY0IC1kIDw8PCIkUFJPWFlLRVkiID4vZXRjL3Byb3h5L3Byb3h5LmtleQpiYXNlNjQgLWQgPDw8IiRQUk9YWUNMSUVOVENFUlQiID4vZXRjL3Byb3h5L3Byb3h5LWNsaWVudC5jcnQKY2hvd24gLVIgMTAwMDoxMDAwIC9ldGMvcHJveHkKY2htb2QgMDYwMCAvZXRjL3Byb3h5L3Byb3h5LmtleQoKY2F0ID4vZXRjL3N5c2NvbmZpZy9wcm94eSA8PEVPRgpQUk9YWV9JTUFHRT0nJFBST1hZSU1BR0UnCkVPRgoKY2F0ID4vZXRjL3N5c3RlbWQvc3lzdGVtL3Byb3h5LnNlcnZpY2UgPDwnRU9GJwpbVW5pdF0KQWZ0ZXI9bmV0d29yay1vbmxpbmUudGFyZ2V0CldhbnRzPW5ldHdvcmstb25saW5lLnRhcmdldAoKW1NlcnZpY2VdCkVudmlyb25tZW50RmlsZT0vZXRjL3N5c2NvbmZpZy9wcm94eQpFeGVjU3RhcnRQcmU9LS91c3IvYmluL2RvY2tlciBybSAtZiAlbgpFeGVjU3RhcnQ9L3Vzci9iaW4vZG9ja2VyIHJ1biAtLXJtIC0tbmFtZSAlbiAtcCA0NDM6ODQ0MyAtdiAvZXRjL3Byb3h5Oi9zZWNyZXRzICRQUk9YWV9JTUFHRQpFeGVjU3RvcD0vdXNyL2Jpbi9kb2NrZXIgc3RvcCAlbgpSZXN0YXJ0PWFsd2F5cwpSZXN0YXJ0U2VjPTEKU3RhcnRMaW1pdEludGVydmFsPTAKCltJbnN0YWxsXQpXYW50ZWRCeT1tdWx0aS11c2VyLnRhcmdldApFT0YKCnN5c3RlbWN0bCBlbmFibGUgcHJveHkuc2VydmljZQoKY2F0ID4vZXRjL2Nyb24ud2Vla2x5L3B1bGwtaW1hZ2UgPDwnRU9GJwojIS9iaW4vYmFzaAoKZG9ja2VyIHB1bGwgJFBST1hZSU1BR0UKc3lzdGVtY3RsIHJlc3RhcnQgcHJveHkuc2VydmljZQpFT0YKY2htb2QgK3ggL2V0Yy9jcm9uLndlZWtseS9wdWxsLWltYWdlCgpjYXQgPi9ldGMvY3Jvbi53ZWVrbHkveXVtdXBkYXRlIDw8J0VPRicKIyEvYmluL2Jhc2gKCnl1bSB1cGRhdGUgLXkKRU9GCmNobW9kICt4IC9ldGMvY3Jvbi53ZWVrbHkveXVtdXBkYXRlCgpjYXQgPi9ldGMvY3Jvbi5kYWlseS9yZXN0YXJ0LXByb3h5IDw8J0VPRicKIyEvYmluL2Jhc2gKCnN5c3RlbWN0bCByZXN0YXJ0IHByb3h5LnNlcnZpY2UKRU9GCmNobW9kICt4IC9ldGMvY3Jvbi5kYWlseS9yZXN0YXJ0LXByb3h5CgooCglzbGVlcCAzMAoJcmVib290CikgJgo=')))]" + "script": "[base64(concat(base64ToString('c2V0IC1leAoK'),'PROXYIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('proxyImage')),''')\n','PROXYIMAGEAUTH=$(base64 -d \u003c\u003c\u003c''',base64(parameters('proxyImageAuth')),''')\n','PROXYCERT=''',parameters('proxyCert'),'''\n','PROXYCLIENTCERT=''',parameters('proxyClientCert'),'''\n','PROXYKEY=''',parameters('proxyKey'),'''\n','\n',base64ToString('IyEvYmluL2Jhc2gKI0FkZGluZyByZXRyeSBsb2dpYyB0byB5dW0gY29tbWFuZHMgaW4gb3JkZXIgdG8gYXZvaWQgc3RhbGxpbmcgb3V0IG9uIHJlc291cmNlIGxvY2tzCmVjaG8gImluc3RhbGxpbmcgbW9ieS1lbmdpbmUgKGRvY2tlcikiCmZvciBhdHRlbXB0IGluIHsxLi42MH07IGRvCgl0ZG5mIGluc3RhbGwgLXkgbW9ieS1lbmdpbmUgbW9ieS1jbGkgJiYgYnJlYWsKCWlmIFtbICR7YXR0ZW1wdH0gLWx0IDYwIF1dOyB0aGVuIHNsZWVwIDMwOyBlbHNlIGV4aXQgMTsgZmkKZG9uZQoKc3lzdGVtY3RsIGVuYWJsZSBkb2NrZXIKc3lzdGVtY3RsIHN0YXJ0IGRvY2tlcgoKbWtkaXIgL3Jvb3QvLmRvY2tlcgpjYXQgPi9yb290Ly5kb2NrZXIvY29uZmlnLmpzb24gPDxFT0YKewoJImF1dGhzIjogewoJCSIke1BST1hZSU1BR0UlJS8qfSI6IHsKCQkJImF1dGgiOiAiJFBST1hZSU1BR0VBVVRIIgoJCX0KCX0KfQpFT0YKCmRvY2tlciBwdWxsICIkUFJPWFlJTUFHRSIKCm1rZGlyIC9ldGMvcHJveHkKYmFzZTY0IC1kIDw8PCIkUFJPWFlDRVJUIiA+L2V0Yy9wcm94eS9wcm94eS5jcnQKYmFzZTY0IC1kIDw8PCIkUFJPWFlLRVkiID4vZXRjL3Byb3h5L3Byb3h5LmtleQpiYXNlNjQgLWQgPDw8IiRQUk9YWUNMSUVOVENFUlQiID4vZXRjL3Byb3h5L3Byb3h5LWNsaWVudC5jcnQKY2hvd24gLVIgMTAwMDoxMDAwIC9ldGMvcHJveHkKY2htb2QgMDYwMCAvZXRjL3Byb3h5L3Byb3h5LmtleQoKY2F0ID4vZXRjL3N5c2NvbmZpZy9wcm94eSA8PEVPRgpQUk9YWV9JTUFHRT0nJFBST1hZSU1BR0UnCkVPRgoKY2F0ID4vZXRjL3N5c3RlbWQvc3lzdGVtL3Byb3h5LnNlcnZpY2UgPDwnRU9GJwpbVW5pdF0KQWZ0ZXI9bmV0d29yay1vbmxpbmUudGFyZ2V0CldhbnRzPW5ldHdvcmstb25saW5lLnRhcmdldAoKW1NlcnZpY2VdCkVudmlyb25tZW50RmlsZT0vZXRjL3N5c2NvbmZpZy9wcm94eQpFeGVjU3RhcnRQcmU9LS91c3IvYmluL2RvY2tlciBybSAtZiAlbgpFeGVjU3RhcnQ9L3Vzci9iaW4vZG9ja2VyIHJ1biAtLXJtIC0tbmFtZSAlbiAtcCA0NDM6ODQ0MyAtdiAvZXRjL3Byb3h5Oi9zZWNyZXRzICRQUk9YWV9JTUFHRQpFeGVjU3RvcD0vdXNyL2Jpbi9kb2NrZXIgc3RvcCAlbgpSZXN0YXJ0PWFsd2F5cwpSZXN0YXJ0U2VjPTEKU3RhcnRMaW1pdEludGVydmFsPTAKCltJbnN0YWxsXQpXYW50ZWRCeT1tdWx0aS11c2VyLnRhcmdldApFT0YKCnN5c3RlbWN0bCBlbmFibGUgcHJveHkuc2VydmljZQoKY2F0ID4vZXRjL2Nyb24ud2Vla2x5L3B1bGwtaW1hZ2UgPDwnRU9GJwojIS9iaW4vYmFzaAoKZG9ja2VyIHB1bGwgJFBST1hZSU1BR0UKc3lzdGVtY3RsIHJlc3RhcnQgcHJveHkuc2VydmljZQpFT0YKY2htb2QgK3ggL2V0Yy9jcm9uLndlZWtseS9wdWxsLWltYWdlCgpjYXQgPi9ldGMvY3Jvbi53ZWVrbHkveXVtdXBkYXRlIDw8J0VPRicKIyEvYmluL2Jhc2gKCnl1bSB1cGRhdGUgLXkKRU9GCmNobW9kICt4IC9ldGMvY3Jvbi53ZWVrbHkveXVtdXBkYXRlCgpjYXQgPi9ldGMvY3Jvbi5kYWlseS9yZXN0YXJ0LXByb3h5IDw8J0VPRicKIyEvYmluL2Jhc2gKCnN5c3RlbWN0bCByZXN0YXJ0IHByb3h5LnNlcnZpY2UKRU9GCmNobW9kICt4IC9ldGMvY3Jvbi5kYWlseS9yZXN0YXJ0LXByb3h5CgooCglzbGVlcCAzMAoJcmVib290CikgJgo=')))]" }, "provisionAfterExtensions": [ "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", diff --git a/pkg/deploy/assets/gateway-production.json b/pkg/deploy/assets/gateway-production.json index 0c4690775c4..7d28407a363 100644 --- a/pkg/deploy/assets/gateway-production.json +++ b/pkg/deploy/assets/gateway-production.json @@ -290,7 +290,20 @@ "autoUpgradeMinorVersion": true, "settings": {}, "protectedSettings": { - "script": "[base64(concat(base64ToString('c2V0IC1leAoK'),'ACRRESOURCEID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('acrResourceId')),''')\n','AZURECLOUDNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureCloudName')),''')\n','AZURESECPACKQUALYSURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackQualysUrl')),''')\n','AZURESECPACKVSATENANTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackVSATenantId')),''')\n','DATABASEACCOUNTNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('databaseAccountName')),''')\n','MDMFRONTENDURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdmFrontendUrl')),''')\n','MDSDENVIRONMENT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdsdEnvironment')),''')\n','FLUENTBITIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fluentbitImage')),''')\n','GATEWAYMDSDCONFIGVERSION=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayMdsdConfigVersion')),''')\n','GATEWAYDOMAINS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayDomains')),''')\n','GATEWAYFEATURES=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayFeatures')),''')\n','KEYVAULTDNSSUFFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultDNSSuffix')),''')\n','KEYVAULTPREFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultPrefix')),''')\n','RPIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpImage')),''')\n','RPMDMACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdmAccount')),''')\n','RPMDSDACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdAccount')),''')\n','RPMDSDNAMESPACE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdNamespace')),''')\n','MDMIMAGE=''/distroless/genevamdm:2.2024.626.1539-d1a6e7-20240715t0935@sha256:372fbc981bbfdf2b9a9d0ffdca2c51ed389b291a3bcff0401e9afb0c01605823''\n','LOCATION=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().location),''')\n','SUBSCRIPTIONID=$(base64 -d \u003c\u003c\u003c''',base64(subscription().subscriptionId),''')\n','RESOURCEGROUPNAME=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().name),''')\n','\n',base64ToString('#!/bin/bash

echo "setting ssh password authentication"
# We need to manually set PasswordAuthentication to true in order for the VMSS Access JIT to work
sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/g' /etc/ssh/sshd_config
systemctl reload sshd.service

#Adding retry logic to yum commands in order to avoid stalling out on resource locks
echo "running RHUI fix"
for attempt in {1..60}; do
  yum update -y --disablerepo='*' --enablerepo='rhui-microsoft-azure*' && break
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

echo "running yum update"
for attempt in {1..60}; do
  yum -y -x WALinuxAgent -x WALinuxAgent-udev update --allowerasing && break
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

echo "extending partition table"
# Linux block devices are inconsistently named
# it's difficult to tie the lvm pv to the physical disk using /dev/disk files, which is why lvs is used here
physical_disk="$(lvs -o devices -a | head -n2 | tail -n1 | cut -d ' ' -f 3 | cut -d \( -f 1 | tr -d '[:digit:]')"
growpart "$physical_disk" 2

echo "extending filesystems"
lvextend -l +20%FREE /dev/rootvg/rootlv
xfs_growfs /

lvextend -l +100%FREE /dev/rootvg/varlv
xfs_growfs /var

rpm --import https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-8
rpm --import https://packages.microsoft.com/keys/microsoft.asc

for attempt in {1..60}; do
  yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && break
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

echo "configuring logrotate"

# gateway_logdir is a readonly variable that specifies the host path mount point for the gateway container log file
# for the purpose of rotating the gateway logs
declare -r gateway_logdir='/var/log/aro-gateway'

cat >/etc/logrotate.conf <<EOF
# see "man logrotate" for details
# rotate log files weekly
weekly

# keep 2 weeks worth of backlogs
rotate 2

# create new (empty) log files after rotating old ones
create

# use date as a suffix of the rotated file
dateext

# uncomment this if you want your log files compressed
compress

# RPM packages drop log rotation information into this directory
include /etc/logrotate.d

# no packages own wtmp and btmp -- we'll rotate them here
/var/log/wtmp {
    monthly
    create 0664 root utmp
        minsize 1M
    rotate 1
}

/var/log/btmp {
    missingok
    monthly
    create 0600 root utmp
    rotate 1
}

# Maximum log directory size is 100G with this configuration
# Setting limit to 100G to allow space for other logging services
# copytruncate is a critical option used to prevent logs from being shipped twice
${gateway_logdir} {
    size 20G
    rotate 5
    create 0600 root root
    copytruncate
    noolddir
    compress
}
EOF

echo "configuring yum repository and running yum update"
cat >/etc/yum.repos.d/azure.repo <<'EOF'
[azure-cli]
name=azure-cli
baseurl=https://packages.microsoft.com/yumrepos/azure-cli
enabled=yes
gpgcheck=yes

[azurecore]
name=azurecore
baseurl=https://packages.microsoft.com/yumrepos/azurecore
enabled=yes
gpgcheck=no
EOF

semanage fcontext -a -t var_log_t "/var/log/journal(/.*)?"
mkdir -p /var/log/journal

for attempt in {1..60}; do
  yum -y install clamav azsec-clamav azsec-monitor azure-cli azure-mdsd azure-security podman-docker openssl-perl python3 && break
  # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

echo "applying firewall rules"
# https://access.redhat.com/security/cve/cve-2020-13401
cat >/etc/sysctl.d/02-disable-accept-ra.conf <<'EOF'
net.ipv6.conf.all.accept_ra=0
EOF

cat >/etc/sysctl.d/01-disable-core.conf <<'EOF'
kernel.core_pattern = |/bin/true
EOF
sysctl --system

firewall-cmd --add-port=80/tcp --permanent
firewall-cmd --add-port=8081/tcp --permanent
firewall-cmd --add-port=443/tcp --permanent

echo "logging into prod acr"
export AZURE_CLOUD_NAME=$AZURECLOUDNAME
az login -i --allow-no-subscriptions

# The managed identity that the VM runs as only has a single roleassignment.
# This role assignment is ACRPull which is not necessarily present in the
# subscription we're deploying into.  If the identity does not have any
# role assignments scoped on the subscription we're deploying into, it will
# not show on az login -i, which is why the below line is commented.
# az account set -s "$SUBSCRIPTIONID"

# Suppress emulation output for podman instead of docker for az acr compatability
mkdir -p /etc/containers/
touch /etc/containers/nodocker

mkdir -p /root/.docker
REGISTRY_AUTH_FILE=/root/.docker/config.json az acr login --name "$(sed -e 's|.*/||' <<<"$ACRRESOURCEID")"

MDMIMAGE="${RPIMAGE%%/*}/${MDMIMAGE#*/}"
docker pull "$MDMIMAGE"
docker pull "$RPIMAGE"
docker pull "$FLUENTBITIMAGE"

az logout

echo "configuring fluentbit service"
mkdir -p /etc/fluentbit/
mkdir -p /var/lib/fluent

cat >/etc/fluentbit/fluentbit.conf <<'EOF'
[INPUT]
	Name systemd
	Tag journald
	Systemd_Filter _COMM=aro
	DB /var/lib/fluent/journaldb

[FILTER]
	Name modify
	Match journald
	Remove_wildcard _
	Remove TIMESTAMP

[OUTPUT]
	Name forward
	Match *
	Port 29230
EOF

echo "FLUENTBITIMAGE=$FLUENTBITIMAGE" >/etc/sysconfig/fluentbit

cat >/etc/systemd/system/fluentbit.service <<'EOF'
[Unit]
After=network-online.target
Wants=network-online.target
StartLimitIntervalSec=0

[Service]
RestartSec=1s
EnvironmentFile=/etc/sysconfig/fluentbit
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStart=/usr/bin/docker run \
  --security-opt label=disable \
  --entrypoint /opt/td-agent-bit/bin/td-agent-bit \
  --net=host \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -v /etc/fluentbit/fluentbit.conf:/etc/fluentbit/fluentbit.conf \
  -v /var/lib/fluent:/var/lib/fluent:z \
  -v /var/log/journal:/var/log/journal:ro \
  -v /etc/machine-id:/etc/machine-id:ro \
  $FLUENTBITIMAGE \
  -c /etc/fluentbit/fluentbit.conf

ExecStop=/usr/bin/docker stop %N
Restart=always
RestartSec=5
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
EOF

echo "configuring mdm service"
cat >/etc/sysconfig/mdm <<EOF
MDMFRONTENDURL='$MDMFRONTENDURL'
MDMIMAGE='$MDMIMAGE'
MDMSOURCEENVIRONMENT='$LOCATION'
MDMSOURCEROLE=gateway
MDMSOURCEROLEINSTANCE='$(hostname)'
EOF

mkdir /var/etw
cat >/etc/systemd/system/mdm.service <<'EOF'
[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/mdm
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStart=/usr/bin/docker run \
  --entrypoint /usr/sbin/MetricsExtension \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -m 2g \
  -v /etc/mdm.pem:/etc/mdm.pem \
  -v /var/etw:/var/etw:z \
  $MDMIMAGE \
  -CertFile /etc/mdm.pem \
  -FrontEndUrl $MDMFRONTENDURL \
  -Logger Console \
  -LogLevel Warning \
  -PrivateKeyFile /etc/mdm.pem \
  -SourceEnvironment $MDMSOURCEENVIRONMENT \
  -SourceRole $MDMSOURCEROLE \
  -SourceRoleInstance $MDMSOURCEROLEINSTANCE
ExecStop=/usr/bin/docker stop %N
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
EOF

echo "configuring aro-gateway service"
cat >/etc/sysconfig/aro-gateway <<EOF
ACR_RESOURCE_ID='$ACRRESOURCEID'
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
MDM_ACCOUNT="$RPMDMACCOUNT"
MDM_NAMESPACE=Gateway
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_FEATURES='$GATEWAYFEATURES'
RPIMAGE='$RPIMAGE'
EOF

cat >/etc/systemd/system/aro-gateway.service <<EOF
[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/aro-gateway
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStartPre=/usr/bin/mkdir -p ${gateway_logdir}
ExecStart=/usr/bin/docker run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e ACR_RESOURCE_ID \
  -e DATABASE_ACCOUNT_NAME \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_FEATURES \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -m 2g \
  -p 80:8080 \
  -p 8081:8081 \
  -p 443:8443 \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  -v ${gateway_logdir}:/ctr.log:z \
  \$RPIMAGE \
  gateway
ExecStop=/usr/bin/docker stop -t 3600 %N
TimeoutStopSec=3600
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
EOF

chcon -R system_u:object_r:var_log_t:s0 /var/opt/microsoft/linuxmonagent

mkdir -p /var/lib/waagent/Microsoft.Azure.KeyVault.Store

echo "configuring mdsd and mdm services"
for var in "mdsd" "mdm"; do
cat >/etc/systemd/system/download-$var-credentials.service <<EOF
[Unit]
Description=Periodic $var credentials refresh

[Service]
Type=oneshot
ExecStart=/usr/local/bin/download-credentials.sh $var
EOF

cat >/etc/systemd/system/download-$var-credentials.timer <<EOF
[Unit]
Description=Periodic $var credentials refresh
After=network-online.target
Wants=network-online.target

[Timer]
OnBootSec=0min
OnCalendar=0/12:00:00
AccuracySec=5s

[Install]
WantedBy=timers.target
EOF
done

cat >/usr/local/bin/download-credentials.sh <<EOF
#!/bin/bash
set -eu

COMPONENT="\$1"
echo "Download \$COMPONENT credentials"

TEMP_DIR=\$(mktemp -d)
export AZURE_CONFIG_DIR=\$(mktemp -d)

echo "Logging into Azure..."
RETRIES=3
while [ "\$RETRIES" -gt 0 ]; do
    if az login -i --allow-no-subscriptions
    then
        echo "az login successful"
        break
    else
        echo "az login failed. Retrying..."
        let RETRIES-=1
        sleep 5
    fi
done

trap "cleanup" EXIT

cleanup() {
  az logout
  [[ "\$TEMP_DIR" =~ /tmp/.+ ]] && rm -rf \$TEMP_DIR
  [[ "\$AZURE_CONFIG_DIR" =~ /tmp/.+ ]] && rm -rf \$AZURE_CONFIG_DIR
}

if [ "\$COMPONENT" = "mdm" ]; then
  CURRENT_CERT_FILE="/etc/mdm.pem"
elif [ "\$COMPONENT" = "mdsd" ]; then
  CURRENT_CERT_FILE="/var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem"
else
  echo Invalid usage && exit 1
fi

SECRET_NAME="gwy-\${COMPONENT}"
NEW_CERT_FILE="\$TEMP_DIR/\$COMPONENT.pem"
for attempt in {1..5}; do
  az keyvault secret download --file \$NEW_CERT_FILE --id "https://$KEYVAULTPREFIX-gwy.$KEYVAULTDNSSUFFIX/secrets/\$SECRET_NAME" && break
  if [[ \$attempt -lt 5 ]]; then sleep 10; else exit 1; fi
done

if [ -f \$NEW_CERT_FILE ]; then
  if [ "\$COMPONENT" = "mdsd" ]; then
    chown syslog:syslog \$NEW_CERT_FILE
  else
    sed -i -ne '1,/END CERTIFICATE/ p' \$NEW_CERT_FILE
  fi

  new_cert_sn="\$(openssl x509 -in "\$NEW_CERT_FILE" -noout -serial | awk -F= '{print \$2}')"
  current_cert_sn="\$(openssl x509 -in "\$CURRENT_CERT_FILE" -noout -serial | awk -F= '{print \$2}')"
  if [[ ! -z \$new_cert_sn ]] && [[ \$new_cert_sn != "\$current_cert_sn" ]]; then
    echo updating certificate for \$COMPONENT
    chmod 0600 \$NEW_CERT_FILE
    mv \$NEW_CERT_FILE \$CURRENT_CERT_FILE
  fi
else
  echo Failed to refresh certificate for \$COMPONENT && exit 1
fi
EOF

chmod u+x /usr/local/bin/download-credentials.sh

systemctl enable download-mdsd-credentials.timer
systemctl enable download-mdm-credentials.timer

/usr/local/bin/download-credentials.sh mdsd
/usr/local/bin/download-credentials.sh mdm
MDSDCERTIFICATESAN=$(openssl x509 -in /var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem -noout -subject | sed -e 's/.*CN = //')

cat >/etc/systemd/system/watch-mdm-credentials.service <<EOF
[Unit]
Description=Watch for changes in mdm.pem and restarts the mdm service

[Service]
Type=oneshot
ExecStart=/usr/bin/systemctl restart mdm.service

[Install]
WantedBy=multi-user.target
EOF

cat >/etc/systemd/system/watch-mdm-credentials.path <<EOF
[Path]
PathModified=/etc/mdm.pem

[Install]
WantedBy=multi-user.target
EOF

systemctl enable watch-mdm-credentials.path
systemctl start watch-mdm-credentials.path

mkdir /etc/systemd/system/mdsd.service.d
cat >/etc/systemd/system/mdsd.service.d/override.conf <<'EOF'
[Unit]
After=network-online.target
EOF

cat >/etc/default/mdsd <<EOF
MDSD_ROLE_PREFIX=/var/run/mdsd/default
MDSD_OPTIONS="-A -d -r \$MDSD_ROLE_PREFIX"

export MONITORING_GCS_ENVIRONMENT='$MDSDENVIRONMENT'
export MONITORING_GCS_ACCOUNT='$RPMDSDACCOUNT'
export MONITORING_GCS_REGION='$LOCATION'
export MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault
export MONITORING_GCS_AUTH_ID='$MDSDCERTIFICATESAN'
export MONITORING_GCS_NAMESPACE='$RPMDSDNAMESPACE'
export MONITORING_CONFIG_VERSION='$GATEWAYMDSDCONFIGVERSION'
export MONITORING_USE_GENEVA_CONFIG_SERVICE=true

export MONITORING_TENANT='$LOCATION'
export MONITORING_ROLE=gateway
export MONITORING_ROLE_INSTANCE='$(hostname)'

export MDSD_MSGPACK_SORT_COLUMNS=1
EOF

# setting MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault seems to have caused mdsd not
# to honour SSL_CERT_FILE any more, heaven only knows why.
mkdir -p /usr/lib/ssl/certs
csplit -f /usr/lib/ssl/certs/cert- -b %03d.pem /etc/pki/tls/certs/ca-bundle.crt /^$/1 {*} >/dev/null
c_rehash /usr/lib/ssl/certs

# we leave clientId blank as long as only 1 managed identity assigned to vmss
# if we have more than 1, we will need to populate with clientId used for off-node scanning
cat >/etc/default/vsa-nodescan-agent.config <<EOF
{
    "Nice": 19,
    "Timeout": 10800,
    "ClientId": "",
    "TenantId": "$AZURESECPACKVSATENANTID",
    "QualysStoreBaseUrl": "$AZURESECPACKQUALYSURL",
    "ProcessTimeout": 300,
    "CommandDelay": 0
  }
EOF

echo "enabling aro services"
for service in aro-gateway auoms azsecd azsecmond mdsd mdm chronyd fluentbit; do
  systemctl enable $service.service
done

for scan in baseline clamav software; do
  /usr/local/bin/azsecd config -s $scan -d P1D
done

echo "rebooting"
restorecon -RF /var/log/*
(sleep 30; reboot) &
')))]" + "script": "[base64(concat(base64ToString('c2V0IC1leAoK'),'ACRRESOURCEID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('acrResourceId')),''')\n','AZURECLOUDNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureCloudName')),''')\n','AZURESECPACKQUALYSURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackQualysUrl')),''')\n','AZURESECPACKVSATENANTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackVSATenantId')),''')\n','DATABASEACCOUNTNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('databaseAccountName')),''')\n','MDMFRONTENDURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdmFrontendUrl')),''')\n','MDSDENVIRONMENT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdsdEnvironment')),''')\n','FLUENTBITIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fluentbitImage')),''')\n','GATEWAYMDSDCONFIGVERSION=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayMdsdConfigVersion')),''')\n','GATEWAYDOMAINS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayDomains')),''')\n','GATEWAYFEATURES=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayFeatures')),''')\n','KEYVAULTDNSSUFFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultDNSSuffix')),''')\n','KEYVAULTPREFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultPrefix')),''')\n','RPIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpImage')),''')\n','RPMDMACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdmAccount')),''')\n','RPMDSDACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdAccount')),''')\n','RPMDSDNAMESPACE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdNamespace')),''')\n','MDMIMAGE=''/distroless/genevamdm:2.2024.626.1539-d1a6e7-20240715t0935@sha256:372fbc981bbfdf2b9a9d0ffdca2c51ed389b291a3bcff0401e9afb0c01605823''\n','LOCATION=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().location),''')\n','SUBSCRIPTIONID=$(base64 -d \u003c\u003c\u003c''',base64(subscription().subscriptionId),''')\n','RESOURCEGROUPNAME=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().name),''')\n','\n',base64ToString('#!/bin/bash
# Internal Functions and Constants

# empty_str - constant; used by functions for optional nameref string arguements
# shellcheck disable=SC2034
declare -r empty_str=""

# role_gateway is used to determine which VMSS is being bootstrapped
# this should be referenced by scripts sourcing this file
declare -r role_gateway="gateway"
# role_rp is used to determine which VMSS is being bootstrapped
# this should be referenced by scripts sourcing this file
declare -r role_rp="rp"

# log is a wrapper for echo that includes the function name
# Args
# 1) msg - string
# 2) stack_level - int; optional, defaults to calling function
log() {
    local -r msg="${1:-"log message is empty"}"
    local -r stack_level="${2:-1}"
    echo "${FUNCNAME[${stack_level}]}: ${msg}"
}

# abort is a wrapper for log that exits with an error code
abort() {
    local -ri origin_stacklevel=2
    log "${1}" "$origin_stacklevel"
    log "Exiting"
    exit 1
}

# write_file
# Args
# 1) filename - string
# 2) file_contents - string
# 3) clobber - boolean; optional - defaults to false
write_file() {
    local -n filename="$1"
    local -n file_contents="$2"
    local -r clobber="${3:-false}"

    if $clobber; then
        log "Overwriting file $filename"
        echo "$file_contents" > "$filename"
    else
        log "Appending to $filename"
        echo "$file_contents" >> "$filename"
    fi
}

# retry Adding retry logic to yum commands in order to avoid stalling out on resource locks
# args:
# 1) cmd_retry - nameref, array; Command and arguement(s) to retry
# 2) wait_time - nameref, integer; Time to wait before retrying command
# 3) retries - integer, optional; Ammount of times to retry command, defaults to 5
retry() {
    local -n cmd_retry="$1"
    local -n wait_time="$2"
    local -ri retries="${3:-5}"

    for attempt in {1..5}; do
        log "attempt #${attempt} - ${FUNCNAME[2]}"
        # shellcheck disable=SC2068
        ${cmd_retry[@]} &

        wait $! && break
        if [ "${attempt}" -le "$retries" ]; then
            sleep "$wait_time"
        else
            abort "attempt #${attempt} - Failed to update packages"
        fi
    done
}

# verify_role
# args:
# 1) test_role - nameref; role being verified
# 2) certs - boolean, optional; defaults to false. Set to true to add devproxy to allowed roles
verify_role() {
    local -n test_role="$1"
    local -r certs="${2:-false}"

    allowed_roles_glob="($role_rp|$role_gateway)"
    if $certs; then
        # remove trailing ")" and append additional role
        allowed_roles_glob="${allowed_roles_glob%\)*}|devproxy)"
    fi

    if [[ "$test_role" =~ $allowed_roles_glob ]]; then
        log "Verified role \"$test_role\""
    else
        abort "failed to verify role, role \"${test_role}\" not in \"${allowed_roles_glob}\""
    fi
}

# get_keyvault_suffix
# args:
# 1) rl - nameref, string; role to get short role for
# 2) kv_suffix - nameref, string; short role will be assigned to this nameref
# 3) sec_prefix - nameref, string; keyvault certificate prefix will be assigned to this nameref
get_keyvault_suffix() {
    local -n rl="$1"
    local -n kv_suffix="$2"
    local -n sec_prefix="$3"

    local -r keyvault_suffix_rp="svc"
    local -r keyvault_prefix_gateway="gwy"

    case "$rl" in
        "$role_gateway")
            kv_suffix="$keyvault_prefix_gateway"
            sec_prefix="$keyvault_prefix_gateway"
            ;;
        "$role_rp")
            kv_suffix="$keyvault_suffix_rp"
            sec_prefix="$role_rp"
            ;;
        *)
            abort "unkown role $rl"
            ;;
    esac
}

# reboot_vm restores all selinux file contexts, then schedules a reboot for one hour later
# Reboots should scheduled after all VM extensions have had time to complete
# Reference: https://learn.microsoft.com/en-us/azure/virtual-machines/extensions/custom-script-linux#tips
reboot_vm() {
    log "starting"

    (shutdown -r now &)
}
#!/bin/bash
# Repository and package management related functions

configure_repo_mariner_extended() {
    local -r extended_repo_config="https://packages.microsoft.com/cbl-mariner/2.0/prod/extended/x86_64/config.repo"
    curl -sSL "$extended_repo_config" -o /etc/yum.repos.d/mariner-extended.repo

    local -r repo_name="cbl-mariner2.0prodextendedx86_64"

    local -ra cmd=(
        dnf
        update
        -y
        --enablerepo="$repo_name"
    )

    log "Enabling repo $repo_name"
    retry cmd "$1" "${2:-}"
}

# configure_rpm_repos
# New repositories should be added in their own functions, and called here
# args:
# 1) wait_time - nameref, integer; Time to wait before retrying command
# 2) retries - integer, optional; Amount of times to retry command, defaults to 5
configure_rpm_repos() {
    log "starting"

    configure_repo_mariner_extended "$1" "${2:-1}"
}

# dnf_install_pkgs
# args:
# 1) pkgs - nameref, string array; Packages to be installed
# 2) wait_time - nameref, integer; Time to wait before retrying command
# 3) retries - integer, optional; Amount of times to retry command, defaults to 5
dnf_install_pkgs() {
    local -n pkgs="$1"
    log "starting"

    local -a cmd=(
        dnf
        -y
        install
    )
    
    # Reference: https://www.shellcheck.net/wiki/SC2206
    # append pkgs array to cmd
    mapfile -O $(( ${#cmd[@]} + 1 )) -d ' ' cmd <<< "${pkgs[@]}"
    local -r cmd

    log "Attempting to install packages: ${pkgs[*]}"
    retry cmd "$2" "${3:-}"
}


# dnf_update_pkgs
# args:
# 1) excludes - nameref, string array, optional; Packages to exclude from updating
#       Each index must be prefixed with -x 
# 2) wait_time - nameref, integer; Time to wait before retrying command
# 3) retries - integer, optional; Ammount of times to retry command, defaults to 5
dnf_update_pkgs() {
    local -n excludes="${1:-empty_str}"
    log "starting"

    local -a cmd=(
        dnf
        -y
        # Replaced with excludes
        ""
        update
        --allowerasing
    )

    if [ -n "${excludes}" ]; then
        # Reference https://www.shellcheck.net/wiki/SC2206
        mapfile -O 2 cmd <<< "${excludes[@]}"
    else
        # Remove empty string if we aren't replacing them, probably doesn't matter, but why not be safe
        unset "cmd[2]"
    fi
    local -r cmd

    log "Updating all packages excluding \"${excludes[*]:-}\""
    retry cmd "$2" "${3:-}"
}

# configure_dnf_cron_job
# create cron job to auto update rpm packages
configure_dnf_cron_job() {
    log "starting"
    local -r cron_weekly_dnf_update_filename='/etc/cron.weekly/dnfupdate'
    local -r cron_weekly_dnf_update_file="#!/bin/bash
dnf update -y"

    write_file cron_weekly_dnf_update_filename cron_weekly_dnf_update_file true
    chmod u+x "$cron_weekly_dnf_update_filename"
}

# rpm_import_keys
# args:
# 1) keys - nameref, string array; rpm keys to be imported
# 2) wait_time - nameref, integer; Time to wait before retrying command
rpm_import_keys() {
    local -n keys="$1"
    log "starting"

    # shellcheck disable=SC2068
    for key in ${keys[@]}; do
        if [ ${#keys[@]} -eq 0 ]; then
            break
        fi
            local -a cmd=(
                rpm
                --import
                -v
                "$key"
            )

            log "Importing rpm repository key $key"
            retry cmd "$2" "${3:-}" && unset key
    done
}
#!/bin/bash
# ARO service setup functions

# enable_services enables the systemd services that are passed in
# args:
# 1) services - array; services to be enabled
enable_services() {
    local -n svcs="$1"
    log "starting"

    systemctl daemon-reload

    log "enabling services ${svcs[*]}"
    # shellcheck disable=SC2068
    for svc in ${svcs[@]}; do
        log "Enabling and starting $svc now"
        systemctl enable \
                  --now \
                  "$svc"
    done
}

# configure_service_aro_gateway
# args:
# 1) image - nameref, string; container image
# 2) role - nameref, string; VMSS role
# 3) conf_file - nameref, string; aro gateway environment file
# 4) network - nameref, string; podman network name to be attached
configure_service_aro_gateway() {
    local -n image="$1"
    local -n role="$2"
    local -n conf_file="$3"
    local -n network="$4"
    log "starting"
    log "Configuring aro-gateway service"

    local -r aro_gateway_conf_filename='/etc/sysconfig/aro-gateway'

    write_file aro_gateway_conf_filename conf_file true

    # shellcheck disable=SC2034
    local -r aro_gateway_service_filename='/etc/systemd/system/aro-gateway.service'

    # shellcheck disable=SC2034
    local -r aro_gateway_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=${aro_gateway_conf_filename}
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e ACR_RESOURCE_ID \
  -e DATABASE_ACCOUNT_NAME \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_FEATURES \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -m 2g \
  --network=$network \
  -p 80:8080 \
  -p 8081:8081 \
  -p 443:8443 \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  ${role,,}
ExecStop=/usr/bin/podman stop -t 3600 %N
TimeoutStopSec=3600
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
    "

    write_file aro_gateway_service_filename aro_gateway_service_file true
}

# configure_service_aro_rp
# args:
# 1) image - nameref, string; RP container image
# 2) role - nameref, string; VMSS role
# 3) conf_file - nameref, string; aro rp environment file
# 4) network - nameref, string; podman network name to be attached
configure_service_aro_rp() {
    local -n image="$1"
    local -n role="$2"
    local -n conf_file="$3"
    local -n network="$4"
    log "starting"
    log "Configuring aro-rp service"

    local -r aro_rp_conf_filename='/etc/sysconfig/aro-rp'

    write_file aro_rp_conf_filename conf_file true

    # shellcheck disable=SC2034
    local -r aro_rp_service_filename='/etc/systemd/system/aro-rp.service'
    # shellcheck disable=SC2034
    local -r aro_rp_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=${aro_rp_conf_filename}
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e ACR_RESOURCE_ID \
  -e ADMIN_API_CLIENT_CERT_COMMON_NAME \
  -e ARM_API_CLIENT_CERT_COMMON_NAME \
  -e AZURE_ARM_CLIENT_ID \
  -e AZURE_FP_CLIENT_ID \
  -e CLUSTER_MDM_ACCOUNT \
  -e CLUSTER_MDM_NAMESPACE \
  -e CLUSTER_MDSD_ACCOUNT \
  -e CLUSTER_MDSD_CONFIG_VERSION \
  -e CLUSTER_MDSD_NAMESPACE \
  -e DATABASE_ACCOUNT_NAME \
  -e DOMAIN_NAME \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_RESOURCEGROUP \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -e MDSD_ENVIRONMENT \
  -e RP_FEATURES \
  -e ARO_INSTALL_VIA_HIVE \
  -e ARO_HIVE_DEFAULT_INSTALLER_PULLSPEC \
  -e ARO_ADOPT_BY_HIVE \
  -e OIDC_AFD_ENDPOINT \
  -e OIDC_STORAGE_ACCOUNT_NAME \
  -m 2g \
  --network=$network \
  -p 443:8443 \
  -v /etc/aro-rp:/etc/aro-rp \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  ${role,,}
ExecStop=/usr/bin/podman stop -t 3600 %N
TimeoutStopSec=3600
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file aro_rp_service_filename aro_rp_service_file true
}

# configure_service_aro_monitor
# args:
# 1) image - nameref, string; RP container image
# 2) network - nameref, string; podman network name to be attached
configure_service_aro_monitor() {
    local -n image="$1"
    local -n network="$2"
    log "starting"
    log "Configuring aro-monitor service"

    # DOMAIN_NAME, CLUSTER_MDSD_ACCOUNT, CLUSTER_MDSD_CONFIG_VERSION, GATEWAY_DOMAINS, GATEWAY_RESOURCEGROUP, MDSD_ENVIRONMENT CLUSTER_MDSD_NAMESPACE
    # are not used, but can't easily be refactored out. Should be revisited in the future.
    # shellcheck disable=SC2034
    local -r aro_monitor_service_conf_filename='/etc/sysconfig/aro-monitor'
    # shellcheck disable=SC2034
    local -r aro_monitor_service_conf_file="AZURE_FP_CLIENT_ID='$FPCLIENTID'
DOMAIN_NAME='$LOCATION.$CLUSTERPARENTDOMAINNAME'
CLUSTER_MDSD_ACCOUNT='$CLUSTERMDSDACCOUNT'
CLUSTER_MDSD_CONFIG_VERSION='$CLUSTERMDSDCONFIGVERSION'
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_RESOURCEGROUP='$GATEWAYRESOURCEGROUPNAME'
MDSD_ENVIRONMENT='$MDSDENVIRONMENT'
CLUSTER_MDSD_NAMESPACE='$CLUSTERMDSDNAMESPACE'
CLUSTER_MDM_ACCOUNT='$CLUSTERMDMACCOUNT'
CLUSTER_MDM_NAMESPACE=BBM
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE=BBM
RPIMAGE='$image'"

    write_file aro_monitor_service_conf_filename aro_monitor_service_conf_file true

    # shellcheck disable=SC2034
    local -r aro_monitor_service_filename='/etc/systemd/system/aro-monitor.service'
    # shellcheck disable=SC2034
    local -r aro_monitor_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/aro-monitor
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  --network=$network \
  -e AZURE_FP_CLIENT_ID \
  -e DOMAIN_NAME \
  -e CLUSTER_MDSD_ACCOUNT \
  -e CLUSTER_MDSD_CONFIG_VERSION \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_RESOURCEGROUP \
  -e MDSD_ENVIRONMENT \
  -e CLUSTER_MDSD_NAMESPACE \
  -e CLUSTER_MDM_ACCOUNT \
  -e CLUSTER_MDM_NAMESPACE \
  -e DATABASE_ACCOUNT_NAME \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -m 2.5g \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  monitor
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file aro_monitor_service_filename aro_monitor_service_file true
}

# configure_service_aro_portal
# args:
# 1) image - nameref, string; RP container image
# 2) network - nameref, string; podman network name to be attached
configure_service_aro_portal() {
    local -n image="$1"
    local -n network="$2"
    log "starting"
    log "Configuring aro portal service"

    # shellcheck disable=SC2034
    local -r aro_portal_service_conf_filename='/etc/sysconfig/aro-portal'
    # shellcheck disable=SC2034
    local -r aro_portal_service_conf_file="AZURE_PORTAL_ACCESS_GROUP_IDS='$PORTALACCESSGROUPIDS'
AZURE_PORTAL_CLIENT_ID='$PORTALCLIENTID'
AZURE_PORTAL_ELEVATED_GROUP_IDS='$PORTALELEVATEDGROUPIDS'
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE=Portal
PORTAL_HOSTNAME='$LOCATION.admin.$RPPARENTDOMAINNAME'
RPIMAGE='$image'"

    write_file aro_portal_service_conf_filename aro_portal_service_conf_file true

    # shellcheck disable=SC2034
    local -r aro_portal_service_filename='/etc/systemd/system/aro-portal.service'
    # shellcheck disable=SC2034
    local -r aro_portal_service_file="[Unit]
After=network-online.target
Wants=network-online.target
StartLimitInterval=0

[Service]
EnvironmentFile=/etc/sysconfig/aro-portal
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  --network=$network \
  -e AZURE_PORTAL_ACCESS_GROUP_IDS \
  -e AZURE_PORTAL_CLIENT_ID \
  -e AZURE_PORTAL_ELEVATED_GROUP_IDS \
  -e DATABASE_ACCOUNT_NAME \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -e PORTAL_HOSTNAME \
  -m 2g \
  -p 444:8444 \
  -p 2222:2222 \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  portal
Restart=always
RestartSec=1

[Install]
WantedBy=multi-user.target"

    write_file aro_portal_service_filename aro_portal_service_file true
}

# configure_service_mdsd
# args:
# 1) monitoring_role - nameref, string; can be "gateway" or "rp"
# 2) monitor_config_version - nameref, string; mdsd config version
configure_service_mdsd() {
    local -n role="$1"
    local -n monitor_config_version="$2"
    log "starting"
    log "configuring mdsd service"

    verify_role role

    local -r mdsd_service_dir="/etc/systemd/system/mdsd.service.d"
    mkdir -p "$mdsd_service_dir"

    # shellcheck disable=SC2034
    local -r mdsd_override_conf_filename="$mdsd_service_dir/override.conf"
    local -r mdsd_certificate_san="$(openssl x509 -in /var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem -noout -subject | sed -e 's/.*CN = //')"
    # shellcheck disable=SC2034
    local -r mdsd_override_conf_file="[Unit]
After=network-online.target"

    write_file mdsd_override_conf_filename mdsd_override_conf_file true

    # shellcheck disable=SC2034
    local -r default_mdsd_filename="/etc/default/mdsd"
    # shellcheck disable=SC2034
    local -r default_mdsd_file="MDSD_ROLE_PREFIX=/var/run/mdsd/default
MDSD_OPTIONS=\"-A -d -r \$MDSD_ROLE_PREFIX\"

export MONITORING_GCS_ENVIRONMENT='$MDSDENVIRONMENT'
export MONITORING_GCS_ACCOUNT='$RPMDSDACCOUNT'
export MONITORING_GCS_REGION='$LOCATION'
export MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault
export MONITORING_GCS_AUTH_ID='$mdsd_certificate_san'
export MONITORING_GCS_NAMESPACE='$RPMDSDNAMESPACE'
export MONITORING_CONFIG_VERSION='$monitor_config_version'
export MONITORING_USE_GENEVA_CONFIG_SERVICE=true

export MONITORING_TENANT='$LOCATION'
export MONITORING_ROLE='$role'
export MONITORING_ROLE_INSTANCE=\"$(hostname)\"

export MDSD_MSGPACK_SORT_COLUMNS=\"1\""

    write_file default_mdsd_filename default_mdsd_file true
}

# configure_service_fluentbit
# args:
# 1) conf_file - string; fluenbit configuration file
# 2) image - string; fluentbit container image to run
# 3) network - nameref, string; podman network name to be attached
configure_service_fluentbit() {
    # shellcheck disable=SC2034
    local -n conf_file="$1"
    local -n image="$2"
    local -n network="$3"
    log "starting"
    log "Configuring fluentbit service"

    mkdir -p /etc/fluentbit/
    mkdir -p /var/lib/fluent

    # shellcheck disable=SC2034
    local -r conf_filename='/etc/fluentbit/fluentbit.conf'
    write_file conf_filename conf_file true

    # shellcheck disable=SC2034
    local -r sysconfig_filename='/etc/sysconfig/fluentbit'
    # shellcheck disable=SC2034
    local -r sysconfig_file="FLUENTBITIMAGE=$image"

    write_file sysconfig_filename sysconfig_file true

    # shellcheck disable=SC2034
    local -r service_filename='/etc/systemd/system/fluentbit.service'
    # shellcheck disable=SC2034
    local -r service_file="[Unit]
After=network-online.target
Wants=network-online.target
StartLimitIntervalSec=0

[Service]
RestartSec=1s
EnvironmentFile=/etc/sysconfig/fluentbit
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --security-opt label=disable \
  --entrypoint /opt/td-agent-bit/bin/td-agent-bit \
  --net=host \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -v /etc/fluentbit/fluentbit.conf:/etc/fluentbit/fluentbit.conf \
  -v /var/lib/fluent:/var/lib/fluent:z \
  -v /var/log/journal:/var/log/journal:ro \
  -v /etc/machine-id:/etc/machine-id:ro \
  $image \
  -c /etc/fluentbit/fluentbit.conf

ExecStop=/usr/bin/podman stop %N
Restart=always
RestartSec=5
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file service_filename service_file true
}

# configure_timers_mdm_mdsd
# args:
# 1) role - string; can be "gateway" or "rp"
configure_timers_mdm_mdsd() {
    local -n role="$1"
    log "starting"

    verify_role role

    local keyvault_suffix secret_prefix
    get_keyvault_suffix role keyvault_suffix secret_prefix

    for var in "mdsd" "mdm"; do
        # shellcheck disable=SC2034
        local download_creds_service_filename="/etc/systemd/system/download-$var-credentials.service"
        # shellcheck disable=SC2034
        local download_creds_service_file="[Unit]
Description=Periodic $var credentials refresh

[Service]
Type=oneshot
ExecStart=/usr/local/bin/download-credentials.sh $var"

        write_file download_creds_service_filename download_creds_service_file true

        # shellcheck disable=SC2034
        local download_creds_timer_filename="/etc/systemd/system/download-$var-credentials.timer"
        # shellcheck disable=SC2034
        local download_creds_timer_file="[Unit]
Description=Periodic $var credentials refresh
After=network-online.target
Wants=network-online.target

[Timer]
OnBootSec=0min
OnCalendar=0/12:00:00
AccuracySec=5s

[Install]
WantedBy=timers.target"

        write_file download_creds_timer_filename download_creds_timer_file true
    done

    local -r download_creds_script_filename="/usr/local/bin/download-credentials.sh"
    # shellcheck disable=SC2034
    local -r download_creds_script_file="#!/bin/bash
set -eu

COMPONENT=\$1
echo \"Download \$COMPONENT credentials\"

TEMP_DIR=\"\$(mktemp -d)\"
export AZURE_CONFIG_DIR=\"\$(mktemp -d)\"

echo \"Logging into Azure...\"
RETRIES=3
while [[ \$RETRIES -gt 0 ]]; do
    if az login -i --allow-no-subscriptions
    then
        echo \"az login successful\"
        break
    else
        echo \"az login failed. Retrying...\"
        let RETRIES-=1
        sleep 5
    fi
done

trap \"cleanup\" EXIT

cleanup() {
  az logout
  [[ \$TEMP_DIR =~ /tmp/.+ ]] && rm -rf \$TEMP_DIR
  [[ \$AZURE_CONFIG_DIR =~ /tmp/.+ ]] && rm -rf \$AZURE_CONFIG_DIR
}

if [[ \$COMPONENT = \"mdm\" ]]; then
  CURRENT_CERT_FILE=\"/etc/mdm.pem\"
elif [[ \$COMPONENT = \"mdsd\" ]]; then
  CURRENT_CERT_FILE=\"/var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem\"
else
  echo Invalid usage && exit 1
fi

SECRET_NAME=\"$secret_prefix-\${COMPONENT}\"
NEW_CERT_FILE=\"\$TEMP_DIR/\$COMPONENT.pem\"
for attempt in {1..5}; do
  az keyvault \
    secret \
    download \
    --file \"\$NEW_CERT_FILE\" \
    --id \"https://$KEYVAULTPREFIX-$keyvault_suffix.$KEYVAULTDNSSUFFIX/secrets/\$SECRET_NAME\" \
    && break
  if [[ \$attempt -lt 5 ]]; then sleep 10; else exit 1; fi
done

if [ -f \$NEW_CERT_FILE ]; then
  if [[ \$COMPONENT = \"mdsd\" ]]; then
    chown syslog:syslog \$NEW_CERT_FILE
  else
    sed -i -ne '1,/END CERTIFICATE/ p' \$NEW_CERT_FILE
  fi

  new_cert_sn=\"\$(openssl x509 -in \"\$NEW_CERT_FILE\" -noout -serial | awk -F= '{print \$2}')\"
  current_cert_sn=\"\$(openssl x509 -in \"\$CURRENT_CERT_FILE\" -noout -serial | awk -F= '{print \$2}')\"
  if [[ ! -z \$new_cert_sn ]] && [[ \$new_cert_sn != \"\$current_cert_sn\" ]]; then
    echo updating certificate for \$COMPONENT
    chmod 0600 \$NEW_CERT_FILE
    mv \$NEW_CERT_FILE \$CURRENT_CERT_FILE
  fi
else
  echo Failed to refresh certificate for \$COMPONENT && exit 1
fi"

    write_file download_creds_script_filename download_creds_script_file true

    chmod u+x /usr/local/bin/download-credentials.sh

    $download_creds_script_filename mdsd &
    wait "$!"


    $download_creds_script_filename mdm &
    wait "$!"

    # shellcheck disable=SC2034
    local -r watch_mdm_creds_service_filename="/etc/systemd/system/watch-mdm-credentials.service"
    # shellcheck disable=SC2034
    local -r watch_mdm_creds_service_file="[Unit]
Description=Watch for changes in mdm.pem and restarts the mdm service

[Service]
Type=oneshot
ExecStart=/usr/bin/systemctl restart mdm.service

[Install]
WantedBy=multi-user.target"

    write_file watch_mdm_creds_service_filename watch_mdm_creds_service_file true

    # shellcheck disable=SC2034
    local -r watch_mdm_creds_path_filename='/usr/lib/systemd/system/watch-mdm-credentials.path'
    # shellcheck disable=SC2034
    local -r watch_mdm_creds_path_file='[Path]
PathModified=/etc/mdm.pem

[Install]
WantedBy=multi-user.target'

    write_file watch_mdm_creds_path_filename watch_mdm_creds_path_file true

    local -r watch_mdm_creds='watch-mdm-credentials.path'
    systemctl enable --now "$watch_mdm_creds" || abort "failed to enable and start $watch_mdm_creds"
}

# configure_service_mdm
# args:
# 1) role - nameref, string; can be "gateway" or "rp"
# 2) image - nameref, string; mdm container image to run
# 3) network - nameref, string; podman network name to be attached
configure_service_mdm() {
    local -n role="$1"
    local -n image="$2"
    local -n network="$3"
    log "starting"
    log "Configuring mdm service"

    verify_role role

    # shellcheck disable=SC2034
    local -r sysconfig_mdm_filename="/etc/sysconfig/mdm"
    # shellcheck disable=SC2034
    local -r sysconfig_mdm_file="MDMFRONTENDURL='$MDMFRONTENDURL'
MDMIMAGE='$image'
MDMSOURCEENVIRONMENT='$LOCATION'
MDMSOURCEROLE='$role'
MDMSOURCEROLEINSTANCE=\"$(hostname)\""

    write_file sysconfig_mdm_filename sysconfig_mdm_file true

    mkdir -p /var/etw
    # shellcheck disable=SC2034
    local -r mdm_service_filename="/etc/systemd/system/mdm.service"
    # shellcheck disable=SC2034
    local -r mdm_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/mdm
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --entrypoint /usr/sbin/MetricsExtension \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  --network=$network \
  -m 2g \
  -v /etc/mdm.pem:/etc/mdm.pem \
  -v /var/etw:/var/etw:z \
  $image \
  -CertFile /etc/mdm.pem \
  -FrontEndUrl $MDMFRONTENDURL \
  -Logger Console \
  -LogLevel Warning \
  -PrivateKeyFile /etc/mdm.pem \
  -SourceEnvironment $LOCATION \
  -SourceRole $role \
  -SourceRoleInstance $HOSTNAME
ExecStop=/usr/bin/podman stop %N
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file mdm_service_filename mdm_service_file true
}

# configure_vmss_aro_service
# args:
# 1) r - nameref, string; role of VMSS
# 2) images - nameref, associative array; ARO container images
# 3) configs - nameref, associative array; configuration files and versions. The values should be a reference to variables, not dereferenced.
#                                          This is because the value is used when creating nameref variables by helper functions.
configure_vmss_aro_services() {
    local -n r="$1"
    local -n images="$2"
    local -n configs="$3"
    log "starting"
    verify_role "$1"

    if [ "$r" == "$role_gateway" ]; then
        configure_service_aro_gateway "${images["rp"]}" "$1" "${configs["gateway_config"]}" "${configs["network"]}"
    elif [ "$r" == "$role_rp" ]; then
        configure_service_aro_rp "${images["rp"]}" "$1" "${configs["rp_config"]}" "${configs["network"]}"
        configure_service_aro_monitor "${images["rp"]}" "${configs["network"]}"
        configure_service_aro_portal "${images["rp"]}" "${configs["network"]}"
    fi

    configure_service_fluentbit "${configs["fluentbit"]}" "${images["fluentbit"]}" "${configs["network"]}"
    configure_timers_mdm_mdsd "$1"
    configure_service_mdm "$1" "${images["mdm"]}" "${configs["network"]}"
    configure_service_mdsd "$1" "${configs["mdsd"]}"
    run_azsecd_config_scan
}

util_common="util-common.sh"
if [ -f "$util_common" ]; then
    # shellcheck source=util-common.sh
    source "$util_common"
fi
#!/bin/bash
# This file is intended to be sourced by bootstrapping scripts for commonly used functions

# configure_sshd
# We need to configure PasswordAuthentication to yes in order for the VMSS Access JIT to work
configure_sshd() {
    log "starting"
    local -r sshd_config="/etc/ssh/sshd_config"

    log "Editing $sshd_config to allow password authentication"
    sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/g' "$sshd_config"

    systemctl reload sshd.service || abort "sshd failed to reload"
}

# configure_logrotate clobbers /etc/logrotate.conf
# args:
# 1) dropin_files - nameref, associative array, optional; logrotate files to write to /etc/logrotate.d
#       Key name dictates filenames written to /etc/logrotate.d.
# Example: 
#   Key dictates the filename written in /etc/logrotate.d
#   shellcheck disable=SC2034
#   local -rA logrotate_dropins=(
#      ["gateway"]="$gateway_log_file"
#   )
configure_logrotate() {
    local -n dropin_files="${1:-empty_str}"
    log "starting"

    # shellcheck disable=SC2034
    local -r logrotate_conf_filename='/etc/logrotate.conf'
    # shellcheck disable=SC2034
    local -r logrotate_conf_file='# see "man logrotate" for details
# rotate log files weekly
weekly

# keep 2 weeks worth of backlogs
rotate 2

# create new (empty) log files after rotating old ones
create

# use date as a suffix of the rotated file
dateext

# uncomment this if you want your log files compressed
compress

# RPM packages drop log rotation information into this directory
include /etc/logrotate.d

# no packages own wtmp and btmp -- we will rotate them here
/var/log/wtmp {
    monthly
    create 0664 root utmp
        minsize 1M
    rotate 1
}

/var/log/btmp {
    missingok
    monthly
    create 0600 root utmp
    rotate 1
}'

    write_file logrotate_conf_filename logrotate_conf_file true

    if [ -n "${dropin_files[*]}" ]; then
        local -r logrotate_d="/etc/logrotate.d"
        log "Writing logrotate files to $logrotate_d"
        for dropin_name in "${!dropin_files[@]}"; do
            # shellcheck disable=SC2034
            local -r dropin_filename="$logrotate_d/$dropin_name"
            # shellcheck disable=SC2034
            local -r dropin_file="${dropin_files["$dropin_name"]}"
            write_file dropin_filename dropin_file true
        done
    fi
}

# pull_container_images
# args:
# 1) pull_images - nameref, string array
# 2) registry_conf - nameref, string, optional; path to docker/podman configuration file
pull_container_images() {
    local -n pull_images="$1"
    local -n registry_conf="${2:-empty_str}"
    log "starting"

    # shellcheck disable=SC2034
    local -ri retry_time=30
    # The managed identity that the VM runs as only has a single roleassignment.
    # This role assignment is ACRPull which is not necessarily present in the
    # subscription we're deploying into.  If the identity does not have any
    # role assignments scoped on the subscription we're deploying into, it will
    # not show on az login -i, which is why the below line is commented.
    # az account set -s "$SUBSCRIPTIONID"
    cmd=(
        az
        login
        -i
        --allow-no-subscriptions
    )

    log "Running az login with retries"
    retry cmd retry_time

    # Suppress emulation output for podman instead of docker for az acr compatability
    mkdir -p /etc/containers/
    mkdir -p /root/.docker
    touch /etc/containers/nodocker

    # This name is used in the case that az acr login searches for this in it's environment
    export REGISTRY_AUTH_FILE="/root/.docker/config.json"
    
    if [ -n "${registry_conf}" ]; then
        write_file REGISTRY_AUTH_FILE registry_conf true
    fi

    log "logging into prod acr"
    cmd=(
        az
        acr
        login
        --name
        # TODO replace this with variable expansion
        # Reference: https://www.shellcheck.net/wiki/SC2001
        "$(sed -e 's|.*/||' <<<"$ACRRESOURCEID")"
    )

    retry cmd retry_time

    # shellcheck disable=SC2068
    for i in ${pull_images[@]}; do
        local -n image="$i"
        cmd=(
            podman
            pull
            "$image"
        )

        log "Pulling image $image with retries now"
        retry cmd retry_time
    done

    # shellcheck disable=SC2034
    cmd=(
        az
        logout
    )

    log "Running az logout with retries"
    retry cmd retry_time
}

# configure_certs
# args:
# 1) role - string; can be "devproxy" or "rp"
configure_certs() {
    local -n role="$1"
    log "starting"
    log "Configuring certificates for $role"

    verify_role role true

    if [ "$role" == "devproxy" ]; then
        local -r proxy_certs_basedir="/etc/proxy"
        mkdir -p "$proxy_certs_basedir"
        base64 -d <<<"$PROXYCERT" > "$proxy_certs_basedir/proxy.crt"
        base64 -d <<<"$PROXYKEY" > "$proxy_certs_basedir/proxy.key"
        base64 -d <<<"$PROXYCLIENTCERT" > "$proxy_certs_basedir/proxy-client.crt"
        chown -R 1000:1000 /etc/proxy
        chmod 0600 "$proxy_certs_basedir/proxy.key"
        return 0
    fi

    if [ "$role" == "rp" ]; then
        local -r rp_certs_basedir="/etc/aro-rp"
        mkdir -p "$rp_certs_basedir"
        base64 -d <<<"$ADMINAPICABUNDLE" > "$rp_certs_basedir/admin-ca-bundle.pem"
        if [[ -n "$ARMAPICABUNDLE" ]]; then
        base64 -d <<<"$ARMAPICABUNDLE" > "$rp_certs_basedir/arm-ca-bundle.pem"
        fi
        chown -R 1000:1000 "$rp_certs_basedir"
    fi

    # setting MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault seems to have caused mdsd not
    # to honour SSL_CERT_FILE any more, heaven only knows why.
    local -r ssl_certs_basedir="/usr/lib/ssl/certs"
    mkdir -p "$ssl_certs_basedir"
    csplit -f "$ssl_certs_basedir/cert-" -b %03d.pem /etc/pki/tls/certs/ca-bundle.crt /^$/1 "{*}" 1>/dev/null
    c_rehash "$ssl_certs_basedir"

    # we leave clientId blank as long as only 1 managed identity assigned to vmss
    # if we have more than 1, we will need to populate with clientId used for off-node scanning
    # shellcheck disable=SC2034
    local -r nodescan_agent_filename="/etc/default/vsa-nodescan-agent.config"
    # shellcheck disable=SC2034
    local -r nodescan_agent_file="{
    \"Nice\": 19,
    \"Timeout\": 10800,
    \"ClientId\": \"\",
    \"TenantId\": $AZURESECPACKVSATENANTID,
    \"QualysStoreBaseUrl\": $AZURESECPACKQUALYSURL,
    \"ProcessTimeout\": 300,
    \"CommandDelay\": 0
  }"

    write_file nodescan_agent_filename nodescan_agent_file true
}

# run_azsecd_config_scan
run_azsecd_config_scan() {
    log "starting"

    local -ar configs=(
        "baseline"
        "clamav"
        "software"
    )

    log "Scanning configuration files with azsecd ${configs[*]}"
    # shellcheck disable=SC2068
    for scan in ${configs[@]}; do
        log "Scanning config file $scan now"
        /usr/local/bin/azsecd config -s "$scan" -d P1D
    done
}

# create_required_dirs
create_required_dirs() {
    create_dirs=(
        /var/log/journal
        /var/lib/waagent/Microsoft.Azure.KeyVault.Store
        # Does not exist on devProxyVMSS
        /var/opt/microsoft/linuxmonagent
    )

    # shellcheck disable=SC2068
    for d in ${create_dirs[@]}; do
        log "Creating directory $d"
        mkdir -p "$d" || abort "failed to create directory $d"
    done
}

# create_podman_networks()
# args:
# 1) nets - nameref, associative array; Networks to be created
#       Key is the network name, value is the subnet with cidr notation
create_podman_networks() {
    local -n nets="$1"
    log "starting"

    # shellcheck disable=SC2068
    for n in ${!nets[@]}; do
        log "Creating podman network \"$n\" with subnet \"${nets[$n]}\""
        podman network \
            create \
            --subnet "${nets["$n"]}" \
            "$n"
    done
}

# firewalld_configure_backend
firewalld_configure_backend() {
    log "starting"

    log "Changing firewalld backend to iptables"
    conf_file="/etc/firewalld/firewalld.conf"
    sed -i 's/FirewallBackend=nftables/FirewallBackend=iptables/g' "$conf_file"
}

# firewalld_configure
# args:
# 1) ports - nameref, string array; ports to be enabled.
#       Ports must be postfixed with /tcp or /udp
firewalld_configure() {
    local -n ports="$1"
    log "starting"

    firewalld_configure_backend

    # shellcheck disable=SC2034
    local -ra service=(
        "firewalld"
    )
    enable_services service

    log "Enabling ports ${ports[*]} on default firewalld zone"
    # shellcheck disable=SC2068
    for port in ${ports[@]}; do
        log "Enabling port $port now"
        firewall-cmd "--add-port=$port" \
                     --permanent
    done

    log "Writing runtime config to permanent config"
    firewall-cmd --runtime-to-permanent
}

#Start of gatewayVMSS.sh
#!/bin/bash

set -o errexit \
    -o pipefail \
    -o nounset

main() {
    # transaction attempt retry time in seconds
    # shellcheck disable=SC2034
    local -ri retry_wait_time=30
    # shellcheck disable=SC2068
    local -ri pkg_retry_count=60

    create_required_dirs
    configure_sshd
    configure_rpm_repos retry_wait_time \
                    "$pkg_retry_count"

    # shellcheck disable=SC2034
    local -ar exclude_pkgs=(
        "-x WALinuxAgent"
        "-x WALinuxAgent-udev"
    )

    dnf_update_pkgs exclude_pkgs \
                    retry_wait_time \
                    "$pkg_retry_count"

    # shellcheck disable=SC2034
    local -ra install_pkgs=(
        azure-cli
        clamav
        azsec-clamav
        azure-cli
        azure-mdsd
        azure-security
        podman
        podman-docker
        openssl-perl
        # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505
        python3
        # required for podman networking
        firewalld
    )

    dnf_install_pkgs install_pkgs \
                     retry_wait_time \
                     "$pkg_retry_count"

    # TODO remove this once MicrosoftCBLMariner:cbl-mariner:cbl-mariner-2-gen2-fips supports automatic updates
    # Reference: https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade#supported-os-images
    configure_dnf_cron_job

    # shellcheck disable=SC2119
    configure_logrotate

    # shellcheck disable=SC2034 disable=SC2153
    local -r mdmimage="${RPIMAGE%%/*}/${MDMIMAGE#*/}"
    local -r rpimage="$RPIMAGE"
    # shellcheck disable=SC2034
    local -r fluentbit_image="$FLUENTBITIMAGE"
    # values are references to variables, they should not be dereferenced here
    # shellcheck disable=SC2034
    local -rA aro_images=(
        ["mdm"]="mdmimage"
        ["rp"]="rpimage"
        ["fluentbit"]="fluentbit_image"
    )

    pull_container_images aro_images

    local -r aro_network="aro"
    # shellcheck disable=SC2034
    local -rA networks=(
        ["$aro_network"]="192.168.254.0/24"
    )
    create_podman_networks networks

    # shellcheck disable=SC2034
    local -ra enable_ports=(
        # RP gateway
        "80/tcp"
        "8081/tcp"
        "443/tcp"
        # JIT ssh
        "22/tcp"
    )

    firewalld_configure enable_ports


    # shellcheck disable=SC2034
    local -r fluentbit_conf_file="[INPUT]
Name systemd
Tag journald
Systemd_Filter _COMM=aro
DB /var/lib/fluent/journaldb

[FILTER]
	Name modify
	Match journald
	Remove_wildcard _
	Remove TIMESTAMP

[OUTPUT]
	Name forward
	Match *
	Port 29230"

    # shellcheck disable=SC2034
    local -r aro_gateway_conf_file="ACR_RESOURCE_ID='$ACRRESOURCEID'
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE='${role_gateway^}'
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_FEATURES='$GATEWAYFEATURES'
RPIMAGE='$rpimage'"

    # shellcheck disable=SC2034
    local -r mdsd_config_version="$GATEWAYMDSDCONFIGVERSION"

    # values are references to variables, they should not be dereferenced here
    # shellcheck disable=SC2034
    local -rA aro_configs=(
        ["gateway_config"]="aro_gateway_conf_file"
        ["fluentbit"]="fluentbit_conf_file"
        ["mdsd"]="mdsd_config_version"
        ["network"]="aro_network"
    )

    configure_vmss_aro_services role_gateway \
                                aro_images \
                                aro_configs

    # shellcheck disable=SC2034
    local -ra gateway_services=(
        "aro-gateway"
        "azsecd"
        "mdsd"
        "mdm"
        "chronyd"
        "fluentbit"
        "download-mdsd-credentials.timer"
        "download-mdm-credentials.timer"
        "firewalld"
    )

    enable_services gateway_services

    reboot_vm
}

export AZURE_CLOUD_NAME="${AZURECLOUDNAME:?"Failed to carry over variables"}"

# util.sh does not exist when deployed to VMSS via VMSS extensions
# This is because commonVMSS.sh is concatenated with this script
util="util.sh"
if [ -f "$util" ]; then
    # shellcheck source=util.sh
    source "$util"
fi

main "$@"
')))]" + } + } + }, + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": true, + "enableAutomaticUpgrade": true, + "settings": { + "GCS_AUTO_CONFIG": true } } } diff --git a/pkg/deploy/assets/rp-production.json b/pkg/deploy/assets/rp-production.json index c073531b2bd..952d638d946 100644 --- a/pkg/deploy/assets/rp-production.json +++ b/pkg/deploy/assets/rp-production.json @@ -439,7 +439,20 @@ "autoUpgradeMinorVersion": true, "settings": {}, "protectedSettings": { - "script": "[base64(concat(base64ToString('c2V0IC1leAoK'),'ACRRESOURCEID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('acrResourceId')),''')\n','ADMINAPICLIENTCERTCOMMONNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('adminApiClientCertCommonName')),''')\n','ARMAPICLIENTCERTCOMMONNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('armApiClientCertCommonName')),''')\n','ARMCLIENTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('armClientId')),''')\n','AZURECLOUDNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureCloudName')),''')\n','AZURESECPACKQUALYSURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackQualysUrl')),''')\n','AZURESECPACKVSATENANTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackVSATenantId')),''')\n','CLUSTERMDMACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdmAccount')),''')\n','CLUSTERMDSDACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdsdAccount')),''')\n','CLUSTERMDSDCONFIGVERSION=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdsdConfigVersion')),''')\n','CLUSTERMDSDNAMESPACE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdsdNamespace')),''')\n','CLUSTERPARENTDOMAINNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterParentDomainName')),''')\n','DATABASEACCOUNTNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('databaseAccountName')),''')\n','FLUENTBITIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fluentbitImage')),''')\n','FPCLIENTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fpClientId')),''')\n','FPSERVICEPRINCIPALID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fpServicePrincipalId')),''')\n','GATEWAYDOMAINS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayDomains')),''')\n','GATEWAYRESOURCEGROUPNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayResourceGroupName')),''')\n','GATEWAYSERVICEPRINCIPALID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayServicePrincipalId')),''')\n','KEYVAULTDNSSUFFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultDNSSuffix')),''')\n','KEYVAULTPREFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultPrefix')),''')\n','MDMFRONTENDURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdmFrontendUrl')),''')\n','MDSDENVIRONMENT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdsdEnvironment')),''')\n','PORTALACCESSGROUPIDS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('portalAccessGroupIds')),''')\n','PORTALCLIENTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('portalClientId')),''')\n','PORTALELEVATEDGROUPIDS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('portalElevatedGroupIds')),''')\n','RPFEATURES=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpFeatures')),''')\n','RPIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpImage')),''')\n','RPMDMACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdmAccount')),''')\n','RPMDSDACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdAccount')),''')\n','RPMDSDCONFIGVERSION=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdConfigVersion')),''')\n','RPMDSDNAMESPACE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdNamespace')),''')\n','RPPARENTDOMAINNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpParentDomainName')),''')\n','OIDCSTORAGEACCOUNTNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('oidcStorageAccountName')),''')\n','CLUSTERSINSTALLVIAHIVE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clustersInstallViaHive')),''')\n','CLUSTERSADOPTBYHIVE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clustersAdoptByHive')),''')\n','CLUSTERDEFAULTINSTALLERPULLSPEC=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterDefaultInstallerPullspec')),''')\n','ADMINAPICABUNDLE=''',parameters('adminApiCaBundle'),'''\n','ARMAPICABUNDLE=''',parameters('armApiCaBundle'),'''\n','MDMIMAGE=''/distroless/genevamdm:2.2024.626.1539-d1a6e7-20240715t0935@sha256:372fbc981bbfdf2b9a9d0ffdca2c51ed389b291a3bcff0401e9afb0c01605823''\n','LOCATION=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().location),''')\n','SUBSCRIPTIONID=$(base64 -d \u003c\u003c\u003c''',base64(subscription().subscriptionId),''')\n','RESOURCEGROUPNAME=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().name),''')\n','\n',base64ToString('#!/bin/bash

echo "setting ssh password authentication"
# We need to manually set PasswordAuthentication to true in order for the VMSS Access JIT to work
sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/g' /etc/ssh/sshd_config
systemctl reload sshd.service

#Adding retry logic to yum commands in order to avoid stalling out on resource locks
echo "running RHUI fix"
for attempt in {1..60}; do
  yum update -y --disablerepo='*' --enablerepo='rhui-microsoft-azure*' && break
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

echo "running yum update"
for attempt in {1..60}; do
  yum -y -x WALinuxAgent -x WALinuxAgent-udev update --allowerasing && break
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

echo "extending partition table"
# Linux block devices are inconsistently named
# it's difficult to tie the lvm pv to the physical disk using /dev/disk files, which is why lvs is used here
physicalDisk="$(lvs -o devices -a | head -n2 | tail -n1 | cut -d ' ' -f 3 | cut -d \( -f 1 | tr -d '[:digit:]')"
growpart "$physicalDisk" 2

echo "extending filesystems"
lvextend -l +20%FREE /dev/rootvg/rootlv
xfs_growfs /

lvextend -l +100%FREE /dev/rootvg/varlv
xfs_growfs /var

echo "importing rpm repositories"
rpm --import https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-8
rpm --import https://packages.microsoft.com/keys/microsoft.asc

for attempt in {1..60}; do
  yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && break
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

echo "configuring logrotate"
cat >/etc/logrotate.conf <<'EOF'
# see "man logrotate" for details
# rotate log files weekly
weekly

# keep 2 weeks worth of backlogs
rotate 2

# create new (empty) log files after rotating old ones
create

# use date as a suffix of the rotated file
dateext

# uncomment this if you want your log files compressed
compress

# RPM packages drop log rotation information into this directory
include /etc/logrotate.d

# no packages own wtmp and btmp -- we'll rotate them here
/var/log/wtmp {
    monthly
    create 0664 root utmp
        minsize 1M
    rotate 1
}

/var/log/btmp {
    missingok
    monthly
    create 0600 root utmp
    rotate 1
}
EOF

echo "configuring yum repository and running yum update"
cat >/etc/yum.repos.d/azure.repo <<'EOF'
[azure-cli]
name=azure-cli
baseurl=https://packages.microsoft.com/yumrepos/azure-cli
enabled=yes
gpgcheck=yes

[azurecore]
name=azurecore
baseurl=https://packages.microsoft.com/yumrepos/azurecore
enabled=yes
gpgcheck=no
EOF

semanage fcontext -a -t var_log_t "/var/log/journal(/.*)?"
mkdir -p /var/log/journal

for attempt in {1..60}; do
yum -y install clamav azsec-clamav azsec-monitor azure-cli azure-mdsd azure-security podman podman-docker openssl-perl python3 && break
  # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505
  if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi
done

# https://access.redhat.com/security/cve/cve-2020-13401
echo "applying firewall rules"
cat >/etc/sysctl.d/02-disable-accept-ra.conf <<'EOF'
net.ipv6.conf.all.accept_ra=0
EOF

cat >/etc/sysctl.d/01-disable-core.conf <<'EOF'
kernel.core_pattern = |/bin/true
EOF
sysctl --system

firewall-cmd --add-port=443/tcp --permanent
firewall-cmd --add-port=444/tcp --permanent
firewall-cmd --add-port=2222/tcp --permanent

export AZURE_CLOUD_NAME=$AZURECLOUDNAME

echo "logging into prod acr"
az login -i --allow-no-subscriptions

# Suppress emulation output for podman instead of docker for az acr compatability
mkdir -p /etc/containers/
touch /etc/containers/nodocker

mkdir -p /root/.docker
REGISTRY_AUTH_FILE=/root/.docker/config.json az acr login --name "$(sed -e 's|.*/||' <<<"$ACRRESOURCEID")"

MDMIMAGE="${RPIMAGE%%/*}/${MDMIMAGE#*/}"
docker pull "$MDMIMAGE"
docker pull "$RPIMAGE"
docker pull "$FLUENTBITIMAGE"

az logout

echo "configuring fluentbit service"
mkdir -p /etc/fluentbit/
mkdir -p /var/lib/fluent

cat >/etc/fluentbit/fluentbit.conf <<'EOF'
[INPUT]
	Name systemd
	Tag journald
	Systemd_Filter _COMM=aro
	DB /var/lib/fluent/journaldb

[FILTER]
	Name modify
	Match journald
	Remove_wildcard _
	Remove TIMESTAMP

[FILTER]
	Name rewrite_tag
	Match journald
	Rule $LOGKIND asyncqos asyncqos true

[FILTER]
	Name modify
	Match asyncqos
	Remove CLIENT_PRINCIPAL_NAME
	Remove FILE
	Remove COMPONENT

[FILTER]
	Name rewrite_tag
	Match journald
	Rule $LOGKIND ifxaudit ifxaudit false

[FILTER]
	Name rewrite_tag
	Match journald
	Rule $LOGKIND outboundRequests outboundRequests false

[FILTER]
	Name modify
	Match  outboundRequests
	Remove CLIENT_PRINCIPAL_NAME
	Remove FILE
	Remove COMPONENT

[OUTPUT]
	Name forward
	Match *
	Port 29230
EOF

echo "FLUENTBITIMAGE=$FLUENTBITIMAGE" >/etc/sysconfig/fluentbit

cat >/etc/systemd/system/fluentbit.service <<'EOF'
[Unit]
After=network-online.target
Wants=network-online.target
StartLimitIntervalSec=0

[Service]
RestartSec=1s
EnvironmentFile=/etc/sysconfig/fluentbit
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStart=/usr/bin/docker run \
  --security-opt label=disable \
  --entrypoint /opt/td-agent-bit/bin/td-agent-bit \
  --net=host \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -v /etc/fluentbit/fluentbit.conf:/etc/fluentbit/fluentbit.conf \
  -v /var/lib/fluent:/var/lib/fluent:z \
  -v /var/log/journal:/var/log/journal:ro \
  -v /etc/machine-id:/etc/machine-id:ro \
  $FLUENTBITIMAGE \
  -c /etc/fluentbit/fluentbit.conf

ExecStop=/usr/bin/docker stop %N
Restart=always
RestartSec=5
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
EOF

mkdir /etc/aro-rp
base64 -d <<<"$ADMINAPICABUNDLE" >/etc/aro-rp/admin-ca-bundle.pem
if [[ -n "$ARMAPICABUNDLE" ]]; then
  base64 -d <<<"$ARMAPICABUNDLE" >/etc/aro-rp/arm-ca-bundle.pem
fi
chown -R 1000:1000 /etc/aro-rp

echo "configuring mdm service"
cat >/etc/sysconfig/mdm <<EOF
MDMFRONTENDURL='$MDMFRONTENDURL'
MDMIMAGE='$MDMIMAGE'
MDMSOURCEENVIRONMENT='$LOCATION'
MDMSOURCEROLE=rp
MDMSOURCEROLEINSTANCE='$(hostname)'
EOF

mkdir /var/etw
cat >/etc/systemd/system/mdm.service <<'EOF'
[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/mdm
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStart=/usr/bin/docker run \
  --entrypoint /usr/sbin/MetricsExtension \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -m 2g \
  -v /etc/mdm.pem:/etc/mdm.pem \
  -v /var/etw:/var/etw:z \
  $MDMIMAGE \
  -CertFile /etc/mdm.pem \
  -FrontEndUrl $MDMFRONTENDURL \
  -Logger Console \
  -LogLevel Warning \
  -PrivateKeyFile /etc/mdm.pem \
  -SourceEnvironment $MDMSOURCEENVIRONMENT \
  -SourceRole $MDMSOURCEROLE \
  -SourceRoleInstance $MDMSOURCEROLEINSTANCE
ExecStop=/usr/bin/docker stop %N
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
EOF

echo "configuring aro-rp service"
cat >/etc/sysconfig/aro-rp <<EOF
ACR_RESOURCE_ID='$ACRRESOURCEID'
ADMIN_API_CLIENT_CERT_COMMON_NAME='$ADMINAPICLIENTCERTCOMMONNAME'
ARM_API_CLIENT_CERT_COMMON_NAME='$ARMAPICLIENTCERTCOMMONNAME'
AZURE_ARM_CLIENT_ID='$ARMCLIENTID'
AZURE_FP_CLIENT_ID='$FPCLIENTID'
AZURE_FP_SERVICE_PRINCIPAL_ID='$FPSERVICEPRINCIPALID'
CLUSTER_MDM_ACCOUNT='$CLUSTERMDMACCOUNT'
CLUSTER_MDM_NAMESPACE=RP
CLUSTER_MDSD_ACCOUNT='$CLUSTERMDSDACCOUNT'
CLUSTER_MDSD_CONFIG_VERSION='$CLUSTERMDSDCONFIGVERSION'
CLUSTER_MDSD_NAMESPACE='$CLUSTERMDSDNAMESPACE'
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
DOMAIN_NAME='$LOCATION.$CLUSTERPARENTDOMAINNAME'
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_RESOURCEGROUP='$GATEWAYRESOURCEGROUPNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE=RP
MDSD_ENVIRONMENT='$MDSDENVIRONMENT'
RP_FEATURES='$RPFEATURES'
RPIMAGE='$RPIMAGE'
ARO_INSTALL_VIA_HIVE='$CLUSTERSINSTALLVIAHIVE'
ARO_HIVE_DEFAULT_INSTALLER_PULLSPEC='$CLUSTERDEFAULTINSTALLERPULLSPEC'
ARO_ADOPT_BY_HIVE='$CLUSTERSADOPTBYHIVE'
OIDC_AFD_ENDPOINT='$LOCATION.oic.$RPPARENTDOMAINNAME'
OIDC_STORAGE_ACCOUNT_NAME='$OIDCSTORAGEACCOUNTNAME'
EOF

cat >/etc/systemd/system/aro-rp.service <<'EOF'
[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/aro-rp
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStart=/usr/bin/docker run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e ACR_RESOURCE_ID \
  -e ADMIN_API_CLIENT_CERT_COMMON_NAME \
  -e ARM_API_CLIENT_CERT_COMMON_NAME \
  -e AZURE_ARM_CLIENT_ID \
  -e AZURE_FP_CLIENT_ID \
  -e CLUSTER_MDM_ACCOUNT \
  -e CLUSTER_MDM_NAMESPACE \
  -e CLUSTER_MDSD_ACCOUNT \
  -e CLUSTER_MDSD_CONFIG_VERSION \
  -e CLUSTER_MDSD_NAMESPACE \
  -e DATABASE_ACCOUNT_NAME \
  -e DOMAIN_NAME \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_RESOURCEGROUP \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -e MDSD_ENVIRONMENT \
  -e RP_FEATURES \
  -e ARO_INSTALL_VIA_HIVE \
  -e ARO_HIVE_DEFAULT_INSTALLER_PULLSPEC \
  -e ARO_ADOPT_BY_HIVE \
  -e OIDC_AFD_ENDPOINT \
  -e OIDC_STORAGE_ACCOUNT_NAME \
  -m 2g \
  -p 443:8443 \
  -v /etc/aro-rp:/etc/aro-rp \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $RPIMAGE \
  rp
ExecStop=/usr/bin/docker stop -t 3600 %N
TimeoutStopSec=3600
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
EOF

# DOMAIN_NAME, CLUSTER_MDSD_ACCOUNT, CLUSTER_MDSD_CONFIG_VERSION, GATEWAY_DOMAINS, GATEWAY_RESOURCEGROUP, MDSD_ENVIRONMENT CLUSTER_MDSD_NAMESPACE
# are not used, but can't easily be refactored out. Should be revisited in the future.
echo "configuring aro-monitor service"
cat >/etc/sysconfig/aro-monitor <<EOF
AZURE_FP_CLIENT_ID='$FPCLIENTID'
DOMAIN_NAME='$LOCATION.$CLUSTERPARENTDOMAINNAME'
CLUSTER_MDSD_ACCOUNT='$CLUSTERMDSDACCOUNT'
CLUSTER_MDSD_CONFIG_VERSION='$CLUSTERMDSDCONFIGVERSION'
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_RESOURCEGROUP='$GATEWAYRESOURCEGROUPNAME'
MDSD_ENVIRONMENT='$MDSDENVIRONMENT'
CLUSTER_MDSD_NAMESPACE='$CLUSTERMDSDNAMESPACE'
CLUSTER_MDM_ACCOUNT='$CLUSTERMDMACCOUNT'
CLUSTER_MDM_NAMESPACE=BBM
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE=BBM
RPIMAGE='$RPIMAGE'
EOF

cat >/etc/systemd/system/aro-monitor.service <<'EOF'
[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/aro-monitor
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStart=/usr/bin/docker run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e AZURE_FP_CLIENT_ID \
  -e DOMAIN_NAME \
  -e CLUSTER_MDSD_ACCOUNT \
  -e CLUSTER_MDSD_CONFIG_VERSION \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_RESOURCEGROUP \
  -e MDSD_ENVIRONMENT \
  -e CLUSTER_MDSD_NAMESPACE \
  -e CLUSTER_MDM_ACCOUNT \
  -e CLUSTER_MDM_NAMESPACE \
  -e DATABASE_ACCOUNT_NAME \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -m 2.5g \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $RPIMAGE \
  monitor
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
EOF

echo "configuring aro-portal service"
cat >/etc/sysconfig/aro-portal <<EOF
AZURE_PORTAL_ACCESS_GROUP_IDS='$PORTALACCESSGROUPIDS'
AZURE_PORTAL_CLIENT_ID='$PORTALCLIENTID'
AZURE_PORTAL_ELEVATED_GROUP_IDS='$PORTALELEVATEDGROUPIDS'
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE=Portal
PORTAL_HOSTNAME='$LOCATION.admin.$RPPARENTDOMAINNAME'
RPIMAGE='$RPIMAGE'
EOF

cat >/etc/systemd/system/aro-portal.service <<'EOF'
[Unit]
After=network-online.target
Wants=network-online.target
StartLimitInterval=0

[Service]
EnvironmentFile=/etc/sysconfig/aro-portal
ExecStartPre=-/usr/bin/docker rm -f %N
ExecStart=/usr/bin/docker run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e AZURE_PORTAL_ACCESS_GROUP_IDS \
  -e AZURE_PORTAL_CLIENT_ID \
  -e AZURE_PORTAL_ELEVATED_GROUP_IDS \
  -e DATABASE_ACCOUNT_NAME \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -e PORTAL_HOSTNAME \
  -m 2g \
  -p 444:8444 \
  -p 2222:2222 \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $RPIMAGE \
  portal
Restart=always
RestartSec=1

[Install]
WantedBy=multi-user.target
EOF

echo "configuring mdsd and mdm services"
chcon -R system_u:object_r:var_log_t:s0 /var/opt/microsoft/linuxmonagent

mkdir -p /var/lib/waagent/Microsoft.Azure.KeyVault.Store

for var in "mdsd" "mdm"; do
cat >/etc/systemd/system/download-$var-credentials.service <<EOF
[Unit]
Description=Periodic $var credentials refresh

[Service]
Type=oneshot
ExecStart=/usr/local/bin/download-credentials.sh $var
EOF

cat >/etc/systemd/system/download-$var-credentials.timer <<EOF
[Unit]
Description=Periodic $var credentials refresh
After=network-online.target
Wants=network-online.target

[Timer]
OnBootSec=0min
OnCalendar=0/12:00:00
AccuracySec=5s

[Install]
WantedBy=timers.target
EOF
done

cat >/usr/local/bin/download-credentials.sh <<EOF
#!/bin/bash
set -eu

COMPONENT="\$1"
echo "Download \$COMPONENT credentials"

TEMP_DIR=\$(mktemp -d)
export AZURE_CONFIG_DIR=\$(mktemp -d)

echo "Logging into Azure..."
RETRIES=3
while [ "\$RETRIES" -gt 0 ]; do
    if az login -i --allow-no-subscriptions
    then
        echo "az login successful"
        break
    else
        echo "az login failed. Retrying..."
        let RETRIES-=1
        sleep 5
    fi
done

trap "cleanup" EXIT

cleanup() {
  az logout
  [[ "\$TEMP_DIR" =~ /tmp/.+ ]] && rm -rf \$TEMP_DIR
  [[ "\$AZURE_CONFIG_DIR" =~ /tmp/.+ ]] && rm -rf \$AZURE_CONFIG_DIR
}

if [ "\$COMPONENT" = "mdm" ]; then
  CURRENT_CERT_FILE="/etc/mdm.pem"
elif [ "\$COMPONENT" = "mdsd" ]; then
  CURRENT_CERT_FILE="/var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem"
else
  echo Invalid usage && exit 1
fi

SECRET_NAME="rp-\${COMPONENT}"
NEW_CERT_FILE="\$TEMP_DIR/\$COMPONENT.pem"
for attempt in {1..5}; do
  az keyvault secret download --file \$NEW_CERT_FILE --id "https://$KEYVAULTPREFIX-svc.$KEYVAULTDNSSUFFIX/secrets/\$SECRET_NAME" && break
  if [[ \$attempt -lt 5 ]]; then sleep 10; else exit 1; fi
done

if [ -f \$NEW_CERT_FILE ]; then
  if [ "\$COMPONENT" = "mdsd" ]; then
    chown syslog:syslog \$NEW_CERT_FILE
  else
    sed -i -ne '1,/END CERTIFICATE/ p' \$NEW_CERT_FILE
  fi

  new_cert_sn="\$(openssl x509 -in "\$NEW_CERT_FILE" -noout -serial | awk -F= '{print \$2}')"
  current_cert_sn="\$(openssl x509 -in "\$CURRENT_CERT_FILE" -noout -serial | awk -F= '{print \$2}')"
  if [[ ! -z \$new_cert_sn ]] && [[ \$new_cert_sn != "\$current_cert_sn" ]]; then
    echo updating certificate for \$COMPONENT
    chmod 0600 \$NEW_CERT_FILE
    mv \$NEW_CERT_FILE \$CURRENT_CERT_FILE
  fi
else
  echo Failed to refresh certificate for \$COMPONENT && exit 1
fi
EOF

chmod u+x /usr/local/bin/download-credentials.sh

systemctl enable download-mdsd-credentials.timer
systemctl enable download-mdm-credentials.timer

/usr/local/bin/download-credentials.sh mdsd
/usr/local/bin/download-credentials.sh mdm
MDSDCERTIFICATESAN=$(openssl x509 -in /var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem -noout -subject | sed -e 's/.*CN = //')

cat >/etc/systemd/system/watch-mdm-credentials.service <<EOF
[Unit]
Description=Watch for changes in mdm.pem and restarts the mdm service

[Service]
Type=oneshot
ExecStart=/usr/bin/systemctl restart mdm.service

[Install]
WantedBy=multi-user.target
EOF

cat >/etc/systemd/system/watch-mdm-credentials.path <<EOF
[Path]
PathModified=/etc/mdm.pem

[Install]
WantedBy=multi-user.target
EOF

systemctl enable watch-mdm-credentials.path
systemctl start watch-mdm-credentials.path

mkdir /etc/systemd/system/mdsd.service.d
cat >/etc/systemd/system/mdsd.service.d/override.conf <<'EOF'
[Unit]
After=network-online.target
EOF

cat >/etc/default/mdsd <<EOF
MDSD_ROLE_PREFIX=/var/run/mdsd/default
MDSD_OPTIONS="-A -d -r \$MDSD_ROLE_PREFIX"

export MONITORING_GCS_ENVIRONMENT='$MDSDENVIRONMENT'
export MONITORING_GCS_ACCOUNT='$RPMDSDACCOUNT'
export MONITORING_GCS_REGION='$LOCATION'
export MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault
export MONITORING_GCS_AUTH_ID='$MDSDCERTIFICATESAN'
export MONITORING_GCS_NAMESPACE='$RPMDSDNAMESPACE'
export MONITORING_CONFIG_VERSION='$RPMDSDCONFIGVERSION'
export MONITORING_USE_GENEVA_CONFIG_SERVICE=true

export MONITORING_TENANT='$LOCATION'
export MONITORING_ROLE=rp
export MONITORING_ROLE_INSTANCE='$(hostname)'

export MDSD_MSGPACK_SORT_COLUMNS=1
EOF

# setting MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault seems to have caused mdsd not
# to honour SSL_CERT_FILE any more, heaven only knows why.
mkdir -p /usr/lib/ssl/certs
csplit -f /usr/lib/ssl/certs/cert- -b %03d.pem /etc/pki/tls/certs/ca-bundle.crt /^$/1 {*} >/dev/null
c_rehash /usr/lib/ssl/certs

# we leave clientId blank as long as only 1 managed identity assigned to vmss
# if we have more than 1, we will need to populate with clientId used for off-node scanning
cat >/etc/default/vsa-nodescan-agent.config <<EOF
{
    "Nice": 19,
    "Timeout": 10800,
    "ClientId": "",
    "TenantId": "$AZURESECPACKVSATENANTID",
    "QualysStoreBaseUrl": "$AZURESECPACKQUALYSURL",
    "ProcessTimeout": 300,
    "CommandDelay": 0
  }
EOF

echo "enabling aro services"
for service in aro-monitor aro-portal aro-rp auoms azsecd azsecmond mdsd mdm chronyd fluentbit; do
  systemctl enable $service.service
done

for scan in baseline clamav software; do
  /usr/local/bin/azsecd config -s $scan -d P1D
done

echo "rebooting"
restorecon -RF /var/log/*
(sleep 30; reboot) &
')))]" + "script": "[base64(concat(base64ToString('c2V0IC1leAoK'),'ACRRESOURCEID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('acrResourceId')),''')\n','ADMINAPICLIENTCERTCOMMONNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('adminApiClientCertCommonName')),''')\n','ARMAPICLIENTCERTCOMMONNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('armApiClientCertCommonName')),''')\n','ARMCLIENTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('armClientId')),''')\n','AZURECLOUDNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureCloudName')),''')\n','AZURESECPACKQUALYSURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackQualysUrl')),''')\n','AZURESECPACKVSATENANTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('azureSecPackVSATenantId')),''')\n','CLUSTERMDMACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdmAccount')),''')\n','CLUSTERMDSDACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdsdAccount')),''')\n','CLUSTERMDSDCONFIGVERSION=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdsdConfigVersion')),''')\n','CLUSTERMDSDNAMESPACE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterMdsdNamespace')),''')\n','CLUSTERPARENTDOMAINNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterParentDomainName')),''')\n','DATABASEACCOUNTNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('databaseAccountName')),''')\n','FLUENTBITIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fluentbitImage')),''')\n','FPCLIENTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fpClientId')),''')\n','FPSERVICEPRINCIPALID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('fpServicePrincipalId')),''')\n','GATEWAYDOMAINS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayDomains')),''')\n','GATEWAYRESOURCEGROUPNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayResourceGroupName')),''')\n','GATEWAYSERVICEPRINCIPALID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('gatewayServicePrincipalId')),''')\n','KEYVAULTDNSSUFFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultDNSSuffix')),''')\n','KEYVAULTPREFIX=$(base64 -d \u003c\u003c\u003c''',base64(parameters('keyvaultPrefix')),''')\n','MDMFRONTENDURL=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdmFrontendUrl')),''')\n','MDSDENVIRONMENT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('mdsdEnvironment')),''')\n','PORTALACCESSGROUPIDS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('portalAccessGroupIds')),''')\n','PORTALCLIENTID=$(base64 -d \u003c\u003c\u003c''',base64(parameters('portalClientId')),''')\n','PORTALELEVATEDGROUPIDS=$(base64 -d \u003c\u003c\u003c''',base64(parameters('portalElevatedGroupIds')),''')\n','RPFEATURES=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpFeatures')),''')\n','RPIMAGE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpImage')),''')\n','RPMDMACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdmAccount')),''')\n','RPMDSDACCOUNT=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdAccount')),''')\n','RPMDSDCONFIGVERSION=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdConfigVersion')),''')\n','RPMDSDNAMESPACE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpMdsdNamespace')),''')\n','RPPARENTDOMAINNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('rpParentDomainName')),''')\n','OIDCSTORAGEACCOUNTNAME=$(base64 -d \u003c\u003c\u003c''',base64(parameters('oidcStorageAccountName')),''')\n','CLUSTERSINSTALLVIAHIVE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clustersInstallViaHive')),''')\n','CLUSTERSADOPTBYHIVE=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clustersAdoptByHive')),''')\n','CLUSTERDEFAULTINSTALLERPULLSPEC=$(base64 -d \u003c\u003c\u003c''',base64(parameters('clusterDefaultInstallerPullspec')),''')\n','ADMINAPICABUNDLE=''',parameters('adminApiCaBundle'),'''\n','ARMAPICABUNDLE=''',parameters('armApiCaBundle'),'''\n','MDMIMAGE=''/distroless/genevamdm:2.2024.626.1539-d1a6e7-20240715t0935@sha256:372fbc981bbfdf2b9a9d0ffdca2c51ed389b291a3bcff0401e9afb0c01605823''\n','LOCATION=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().location),''')\n','SUBSCRIPTIONID=$(base64 -d \u003c\u003c\u003c''',base64(subscription().subscriptionId),''')\n','RESOURCEGROUPNAME=$(base64 -d \u003c\u003c\u003c''',base64(resourceGroup().name),''')\n','\n',base64ToString('#!/bin/bash
# Internal Functions and Constants

# empty_str - constant; used by functions for optional nameref string arguements
# shellcheck disable=SC2034
declare -r empty_str=""

# role_gateway is used to determine which VMSS is being bootstrapped
# this should be referenced by scripts sourcing this file
declare -r role_gateway="gateway"
# role_rp is used to determine which VMSS is being bootstrapped
# this should be referenced by scripts sourcing this file
declare -r role_rp="rp"

# log is a wrapper for echo that includes the function name
# Args
# 1) msg - string
# 2) stack_level - int; optional, defaults to calling function
log() {
    local -r msg="${1:-"log message is empty"}"
    local -r stack_level="${2:-1}"
    echo "${FUNCNAME[${stack_level}]}: ${msg}"
}

# abort is a wrapper for log that exits with an error code
abort() {
    local -ri origin_stacklevel=2
    log "${1}" "$origin_stacklevel"
    log "Exiting"
    exit 1
}

# write_file
# Args
# 1) filename - string
# 2) file_contents - string
# 3) clobber - boolean; optional - defaults to false
write_file() {
    local -n filename="$1"
    local -n file_contents="$2"
    local -r clobber="${3:-false}"

    if $clobber; then
        log "Overwriting file $filename"
        echo "$file_contents" > "$filename"
    else
        log "Appending to $filename"
        echo "$file_contents" >> "$filename"
    fi
}

# retry Adding retry logic to yum commands in order to avoid stalling out on resource locks
# args:
# 1) cmd_retry - nameref, array; Command and arguement(s) to retry
# 2) wait_time - nameref, integer; Time to wait before retrying command
# 3) retries - integer, optional; Ammount of times to retry command, defaults to 5
retry() {
    local -n cmd_retry="$1"
    local -n wait_time="$2"
    local -ri retries="${3:-5}"

    for attempt in {1..5}; do
        log "attempt #${attempt} - ${FUNCNAME[2]}"
        # shellcheck disable=SC2068
        ${cmd_retry[@]} &

        wait $! && break
        if [ "${attempt}" -le "$retries" ]; then
            sleep "$wait_time"
        else
            abort "attempt #${attempt} - Failed to update packages"
        fi
    done
}

# verify_role
# args:
# 1) test_role - nameref; role being verified
# 2) certs - boolean, optional; defaults to false. Set to true to add devproxy to allowed roles
verify_role() {
    local -n test_role="$1"
    local -r certs="${2:-false}"

    allowed_roles_glob="($role_rp|$role_gateway)"
    if $certs; then
        # remove trailing ")" and append additional role
        allowed_roles_glob="${allowed_roles_glob%\)*}|devproxy)"
    fi

    if [[ "$test_role" =~ $allowed_roles_glob ]]; then
        log "Verified role \"$test_role\""
    else
        abort "failed to verify role, role \"${test_role}\" not in \"${allowed_roles_glob}\""
    fi
}

# get_keyvault_suffix
# args:
# 1) rl - nameref, string; role to get short role for
# 2) kv_suffix - nameref, string; short role will be assigned to this nameref
# 3) sec_prefix - nameref, string; keyvault certificate prefix will be assigned to this nameref
get_keyvault_suffix() {
    local -n rl="$1"
    local -n kv_suffix="$2"
    local -n sec_prefix="$3"

    local -r keyvault_suffix_rp="svc"
    local -r keyvault_prefix_gateway="gwy"

    case "$rl" in
        "$role_gateway")
            kv_suffix="$keyvault_prefix_gateway"
            sec_prefix="$keyvault_prefix_gateway"
            ;;
        "$role_rp")
            kv_suffix="$keyvault_suffix_rp"
            sec_prefix="$role_rp"
            ;;
        *)
            abort "unkown role $rl"
            ;;
    esac
}

# reboot_vm restores all selinux file contexts, then schedules a reboot for one hour later
# Reboots should scheduled after all VM extensions have had time to complete
# Reference: https://learn.microsoft.com/en-us/azure/virtual-machines/extensions/custom-script-linux#tips
reboot_vm() {
    log "starting"

    (shutdown -r now &)
}
#!/bin/bash
# Repository and package management related functions

configure_repo_mariner_extended() {
    local -r extended_repo_config="https://packages.microsoft.com/cbl-mariner/2.0/prod/extended/x86_64/config.repo"
    curl -sSL "$extended_repo_config" -o /etc/yum.repos.d/mariner-extended.repo

    local -r repo_name="cbl-mariner2.0prodextendedx86_64"

    local -ra cmd=(
        dnf
        update
        -y
        --enablerepo="$repo_name"
    )

    log "Enabling repo $repo_name"
    retry cmd "$1" "${2:-}"
}

# configure_rpm_repos
# New repositories should be added in their own functions, and called here
# args:
# 1) wait_time - nameref, integer; Time to wait before retrying command
# 2) retries - integer, optional; Amount of times to retry command, defaults to 5
configure_rpm_repos() {
    log "starting"

    configure_repo_mariner_extended "$1" "${2:-1}"
}

# dnf_install_pkgs
# args:
# 1) pkgs - nameref, string array; Packages to be installed
# 2) wait_time - nameref, integer; Time to wait before retrying command
# 3) retries - integer, optional; Amount of times to retry command, defaults to 5
dnf_install_pkgs() {
    local -n pkgs="$1"
    log "starting"

    local -a cmd=(
        dnf
        -y
        install
    )
    
    # Reference: https://www.shellcheck.net/wiki/SC2206
    # append pkgs array to cmd
    mapfile -O $(( ${#cmd[@]} + 1 )) -d ' ' cmd <<< "${pkgs[@]}"
    local -r cmd

    log "Attempting to install packages: ${pkgs[*]}"
    retry cmd "$2" "${3:-}"
}


# dnf_update_pkgs
# args:
# 1) excludes - nameref, string array, optional; Packages to exclude from updating
#       Each index must be prefixed with -x 
# 2) wait_time - nameref, integer; Time to wait before retrying command
# 3) retries - integer, optional; Ammount of times to retry command, defaults to 5
dnf_update_pkgs() {
    local -n excludes="${1:-empty_str}"
    log "starting"

    local -a cmd=(
        dnf
        -y
        # Replaced with excludes
        ""
        update
        --allowerasing
    )

    if [ -n "${excludes}" ]; then
        # Reference https://www.shellcheck.net/wiki/SC2206
        mapfile -O 2 cmd <<< "${excludes[@]}"
    else
        # Remove empty string if we aren't replacing them, probably doesn't matter, but why not be safe
        unset "cmd[2]"
    fi
    local -r cmd

    log "Updating all packages excluding \"${excludes[*]:-}\""
    retry cmd "$2" "${3:-}"
}

# configure_dnf_cron_job
# create cron job to auto update rpm packages
configure_dnf_cron_job() {
    log "starting"
    local -r cron_weekly_dnf_update_filename='/etc/cron.weekly/dnfupdate'
    local -r cron_weekly_dnf_update_file="#!/bin/bash
dnf update -y"

    write_file cron_weekly_dnf_update_filename cron_weekly_dnf_update_file true
    chmod u+x "$cron_weekly_dnf_update_filename"
}

# rpm_import_keys
# args:
# 1) keys - nameref, string array; rpm keys to be imported
# 2) wait_time - nameref, integer; Time to wait before retrying command
rpm_import_keys() {
    local -n keys="$1"
    log "starting"

    # shellcheck disable=SC2068
    for key in ${keys[@]}; do
        if [ ${#keys[@]} -eq 0 ]; then
            break
        fi
            local -a cmd=(
                rpm
                --import
                -v
                "$key"
            )

            log "Importing rpm repository key $key"
            retry cmd "$2" "${3:-}" && unset key
    done
}
#!/bin/bash
# ARO service setup functions

# enable_services enables the systemd services that are passed in
# args:
# 1) services - array; services to be enabled
enable_services() {
    local -n svcs="$1"
    log "starting"

    systemctl daemon-reload

    log "enabling services ${svcs[*]}"
    # shellcheck disable=SC2068
    for svc in ${svcs[@]}; do
        log "Enabling and starting $svc now"
        systemctl enable \
                  --now \
                  "$svc"
    done
}

# configure_service_aro_gateway
# args:
# 1) image - nameref, string; container image
# 2) role - nameref, string; VMSS role
# 3) conf_file - nameref, string; aro gateway environment file
# 4) network - nameref, string; podman network name to be attached
configure_service_aro_gateway() {
    local -n image="$1"
    local -n role="$2"
    local -n conf_file="$3"
    local -n network="$4"
    log "starting"
    log "Configuring aro-gateway service"

    local -r aro_gateway_conf_filename='/etc/sysconfig/aro-gateway'

    write_file aro_gateway_conf_filename conf_file true

    # shellcheck disable=SC2034
    local -r aro_gateway_service_filename='/etc/systemd/system/aro-gateway.service'

    # shellcheck disable=SC2034
    local -r aro_gateway_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=${aro_gateway_conf_filename}
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e ACR_RESOURCE_ID \
  -e DATABASE_ACCOUNT_NAME \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_FEATURES \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -m 2g \
  --network=$network \
  -p 80:8080 \
  -p 8081:8081 \
  -p 443:8443 \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  ${role,,}
ExecStop=/usr/bin/podman stop -t 3600 %N
TimeoutStopSec=3600
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target
    "

    write_file aro_gateway_service_filename aro_gateway_service_file true
}

# configure_service_aro_rp
# args:
# 1) image - nameref, string; RP container image
# 2) role - nameref, string; VMSS role
# 3) conf_file - nameref, string; aro rp environment file
# 4) network - nameref, string; podman network name to be attached
configure_service_aro_rp() {
    local -n image="$1"
    local -n role="$2"
    local -n conf_file="$3"
    local -n network="$4"
    log "starting"
    log "Configuring aro-rp service"

    local -r aro_rp_conf_filename='/etc/sysconfig/aro-rp'

    write_file aro_rp_conf_filename conf_file true

    # shellcheck disable=SC2034
    local -r aro_rp_service_filename='/etc/systemd/system/aro-rp.service'
    # shellcheck disable=SC2034
    local -r aro_rp_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=${aro_rp_conf_filename}
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -e ACR_RESOURCE_ID \
  -e ADMIN_API_CLIENT_CERT_COMMON_NAME \
  -e ARM_API_CLIENT_CERT_COMMON_NAME \
  -e AZURE_ARM_CLIENT_ID \
  -e AZURE_FP_CLIENT_ID \
  -e CLUSTER_MDM_ACCOUNT \
  -e CLUSTER_MDM_NAMESPACE \
  -e CLUSTER_MDSD_ACCOUNT \
  -e CLUSTER_MDSD_CONFIG_VERSION \
  -e CLUSTER_MDSD_NAMESPACE \
  -e DATABASE_ACCOUNT_NAME \
  -e DOMAIN_NAME \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_RESOURCEGROUP \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -e MDSD_ENVIRONMENT \
  -e RP_FEATURES \
  -e ARO_INSTALL_VIA_HIVE \
  -e ARO_HIVE_DEFAULT_INSTALLER_PULLSPEC \
  -e ARO_ADOPT_BY_HIVE \
  -e OIDC_AFD_ENDPOINT \
  -e OIDC_STORAGE_ACCOUNT_NAME \
  -m 2g \
  --network=$network \
  -p 443:8443 \
  -v /etc/aro-rp:/etc/aro-rp \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  ${role,,}
ExecStop=/usr/bin/podman stop -t 3600 %N
TimeoutStopSec=3600
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file aro_rp_service_filename aro_rp_service_file true
}

# configure_service_aro_monitor
# args:
# 1) image - nameref, string; RP container image
# 2) network - nameref, string; podman network name to be attached
configure_service_aro_monitor() {
    local -n image="$1"
    local -n network="$2"
    log "starting"
    log "Configuring aro-monitor service"

    # DOMAIN_NAME, CLUSTER_MDSD_ACCOUNT, CLUSTER_MDSD_CONFIG_VERSION, GATEWAY_DOMAINS, GATEWAY_RESOURCEGROUP, MDSD_ENVIRONMENT CLUSTER_MDSD_NAMESPACE
    # are not used, but can't easily be refactored out. Should be revisited in the future.
    # shellcheck disable=SC2034
    local -r aro_monitor_service_conf_filename='/etc/sysconfig/aro-monitor'
    # shellcheck disable=SC2034
    local -r aro_monitor_service_conf_file="AZURE_FP_CLIENT_ID='$FPCLIENTID'
DOMAIN_NAME='$LOCATION.$CLUSTERPARENTDOMAINNAME'
CLUSTER_MDSD_ACCOUNT='$CLUSTERMDSDACCOUNT'
CLUSTER_MDSD_CONFIG_VERSION='$CLUSTERMDSDCONFIGVERSION'
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_RESOURCEGROUP='$GATEWAYRESOURCEGROUPNAME'
MDSD_ENVIRONMENT='$MDSDENVIRONMENT'
CLUSTER_MDSD_NAMESPACE='$CLUSTERMDSDNAMESPACE'
CLUSTER_MDM_ACCOUNT='$CLUSTERMDMACCOUNT'
CLUSTER_MDM_NAMESPACE=BBM
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE=BBM
RPIMAGE='$image'"

    write_file aro_monitor_service_conf_filename aro_monitor_service_conf_file true

    # shellcheck disable=SC2034
    local -r aro_monitor_service_filename='/etc/systemd/system/aro-monitor.service'
    # shellcheck disable=SC2034
    local -r aro_monitor_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/aro-monitor
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  --network=$network \
  -e AZURE_FP_CLIENT_ID \
  -e DOMAIN_NAME \
  -e CLUSTER_MDSD_ACCOUNT \
  -e CLUSTER_MDSD_CONFIG_VERSION \
  -e GATEWAY_DOMAINS \
  -e GATEWAY_RESOURCEGROUP \
  -e MDSD_ENVIRONMENT \
  -e CLUSTER_MDSD_NAMESPACE \
  -e CLUSTER_MDM_ACCOUNT \
  -e CLUSTER_MDM_NAMESPACE \
  -e DATABASE_ACCOUNT_NAME \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -m 2.5g \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  monitor
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file aro_monitor_service_filename aro_monitor_service_file true
}

# configure_service_aro_portal
# args:
# 1) image - nameref, string; RP container image
# 2) network - nameref, string; podman network name to be attached
configure_service_aro_portal() {
    local -n image="$1"
    local -n network="$2"
    log "starting"
    log "Configuring aro portal service"

    # shellcheck disable=SC2034
    local -r aro_portal_service_conf_filename='/etc/sysconfig/aro-portal'
    # shellcheck disable=SC2034
    local -r aro_portal_service_conf_file="AZURE_PORTAL_ACCESS_GROUP_IDS='$PORTALACCESSGROUPIDS'
AZURE_PORTAL_CLIENT_ID='$PORTALCLIENTID'
AZURE_PORTAL_ELEVATED_GROUP_IDS='$PORTALELEVATEDGROUPIDS'
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE=Portal
PORTAL_HOSTNAME='$LOCATION.admin.$RPPARENTDOMAINNAME'
RPIMAGE='$image'"

    write_file aro_portal_service_conf_filename aro_portal_service_conf_file true

    # shellcheck disable=SC2034
    local -r aro_portal_service_filename='/etc/systemd/system/aro-portal.service'
    # shellcheck disable=SC2034
    local -r aro_portal_service_file="[Unit]
After=network-online.target
Wants=network-online.target
StartLimitInterval=0

[Service]
EnvironmentFile=/etc/sysconfig/aro-portal
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  --network=$network \
  -e AZURE_PORTAL_ACCESS_GROUP_IDS \
  -e AZURE_PORTAL_CLIENT_ID \
  -e AZURE_PORTAL_ELEVATED_GROUP_IDS \
  -e DATABASE_ACCOUNT_NAME \
  -e KEYVAULT_PREFIX \
  -e MDM_ACCOUNT \
  -e MDM_NAMESPACE \
  -e PORTAL_HOSTNAME \
  -m 2g \
  -p 444:8444 \
  -p 2222:2222 \
  -v /run/systemd/journal:/run/systemd/journal \
  -v /var/etw:/var/etw:z \
  $image \
  portal
Restart=always
RestartSec=1

[Install]
WantedBy=multi-user.target"

    write_file aro_portal_service_filename aro_portal_service_file true
}

# configure_service_mdsd
# args:
# 1) monitoring_role - nameref, string; can be "gateway" or "rp"
# 2) monitor_config_version - nameref, string; mdsd config version
configure_service_mdsd() {
    local -n role="$1"
    local -n monitor_config_version="$2"
    log "starting"
    log "configuring mdsd service"

    verify_role role

    local -r mdsd_service_dir="/etc/systemd/system/mdsd.service.d"
    mkdir -p "$mdsd_service_dir"

    # shellcheck disable=SC2034
    local -r mdsd_override_conf_filename="$mdsd_service_dir/override.conf"
    local -r mdsd_certificate_san="$(openssl x509 -in /var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem -noout -subject | sed -e 's/.*CN = //')"
    # shellcheck disable=SC2034
    local -r mdsd_override_conf_file="[Unit]
After=network-online.target"

    write_file mdsd_override_conf_filename mdsd_override_conf_file true

    # shellcheck disable=SC2034
    local -r default_mdsd_filename="/etc/default/mdsd"
    # shellcheck disable=SC2034
    local -r default_mdsd_file="MDSD_ROLE_PREFIX=/var/run/mdsd/default
MDSD_OPTIONS=\"-A -d -r \$MDSD_ROLE_PREFIX\"

export MONITORING_GCS_ENVIRONMENT='$MDSDENVIRONMENT'
export MONITORING_GCS_ACCOUNT='$RPMDSDACCOUNT'
export MONITORING_GCS_REGION='$LOCATION'
export MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault
export MONITORING_GCS_AUTH_ID='$mdsd_certificate_san'
export MONITORING_GCS_NAMESPACE='$RPMDSDNAMESPACE'
export MONITORING_CONFIG_VERSION='$monitor_config_version'
export MONITORING_USE_GENEVA_CONFIG_SERVICE=true

export MONITORING_TENANT='$LOCATION'
export MONITORING_ROLE='$role'
export MONITORING_ROLE_INSTANCE=\"$(hostname)\"

export MDSD_MSGPACK_SORT_COLUMNS=\"1\""

    write_file default_mdsd_filename default_mdsd_file true
}

# configure_service_fluentbit
# args:
# 1) conf_file - string; fluenbit configuration file
# 2) image - string; fluentbit container image to run
# 3) network - nameref, string; podman network name to be attached
configure_service_fluentbit() {
    # shellcheck disable=SC2034
    local -n conf_file="$1"
    local -n image="$2"
    local -n network="$3"
    log "starting"
    log "Configuring fluentbit service"

    mkdir -p /etc/fluentbit/
    mkdir -p /var/lib/fluent

    # shellcheck disable=SC2034
    local -r conf_filename='/etc/fluentbit/fluentbit.conf'
    write_file conf_filename conf_file true

    # shellcheck disable=SC2034
    local -r sysconfig_filename='/etc/sysconfig/fluentbit'
    # shellcheck disable=SC2034
    local -r sysconfig_file="FLUENTBITIMAGE=$image"

    write_file sysconfig_filename sysconfig_file true

    # shellcheck disable=SC2034
    local -r service_filename='/etc/systemd/system/fluentbit.service'
    # shellcheck disable=SC2034
    local -r service_file="[Unit]
After=network-online.target
Wants=network-online.target
StartLimitIntervalSec=0

[Service]
RestartSec=1s
EnvironmentFile=/etc/sysconfig/fluentbit
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --security-opt label=disable \
  --entrypoint /opt/td-agent-bit/bin/td-agent-bit \
  --net=host \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  -v /etc/fluentbit/fluentbit.conf:/etc/fluentbit/fluentbit.conf \
  -v /var/lib/fluent:/var/lib/fluent:z \
  -v /var/log/journal:/var/log/journal:ro \
  -v /etc/machine-id:/etc/machine-id:ro \
  $image \
  -c /etc/fluentbit/fluentbit.conf

ExecStop=/usr/bin/podman stop %N
Restart=always
RestartSec=5
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file service_filename service_file true
}

# configure_timers_mdm_mdsd
# args:
# 1) role - string; can be "gateway" or "rp"
configure_timers_mdm_mdsd() {
    local -n role="$1"
    log "starting"

    verify_role role

    local keyvault_suffix secret_prefix
    get_keyvault_suffix role keyvault_suffix secret_prefix

    for var in "mdsd" "mdm"; do
        # shellcheck disable=SC2034
        local download_creds_service_filename="/etc/systemd/system/download-$var-credentials.service"
        # shellcheck disable=SC2034
        local download_creds_service_file="[Unit]
Description=Periodic $var credentials refresh

[Service]
Type=oneshot
ExecStart=/usr/local/bin/download-credentials.sh $var"

        write_file download_creds_service_filename download_creds_service_file true

        # shellcheck disable=SC2034
        local download_creds_timer_filename="/etc/systemd/system/download-$var-credentials.timer"
        # shellcheck disable=SC2034
        local download_creds_timer_file="[Unit]
Description=Periodic $var credentials refresh
After=network-online.target
Wants=network-online.target

[Timer]
OnBootSec=0min
OnCalendar=0/12:00:00
AccuracySec=5s

[Install]
WantedBy=timers.target"

        write_file download_creds_timer_filename download_creds_timer_file true
    done

    local -r download_creds_script_filename="/usr/local/bin/download-credentials.sh"
    # shellcheck disable=SC2034
    local -r download_creds_script_file="#!/bin/bash
set -eu

COMPONENT=\$1
echo \"Download \$COMPONENT credentials\"

TEMP_DIR=\"\$(mktemp -d)\"
export AZURE_CONFIG_DIR=\"\$(mktemp -d)\"

echo \"Logging into Azure...\"
RETRIES=3
while [[ \$RETRIES -gt 0 ]]; do
    if az login -i --allow-no-subscriptions
    then
        echo \"az login successful\"
        break
    else
        echo \"az login failed. Retrying...\"
        let RETRIES-=1
        sleep 5
    fi
done

trap \"cleanup\" EXIT

cleanup() {
  az logout
  [[ \$TEMP_DIR =~ /tmp/.+ ]] && rm -rf \$TEMP_DIR
  [[ \$AZURE_CONFIG_DIR =~ /tmp/.+ ]] && rm -rf \$AZURE_CONFIG_DIR
}

if [[ \$COMPONENT = \"mdm\" ]]; then
  CURRENT_CERT_FILE=\"/etc/mdm.pem\"
elif [[ \$COMPONENT = \"mdsd\" ]]; then
  CURRENT_CERT_FILE=\"/var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem\"
else
  echo Invalid usage && exit 1
fi

SECRET_NAME=\"$secret_prefix-\${COMPONENT}\"
NEW_CERT_FILE=\"\$TEMP_DIR/\$COMPONENT.pem\"
for attempt in {1..5}; do
  az keyvault \
    secret \
    download \
    --file \"\$NEW_CERT_FILE\" \
    --id \"https://$KEYVAULTPREFIX-$keyvault_suffix.$KEYVAULTDNSSUFFIX/secrets/\$SECRET_NAME\" \
    && break
  if [[ \$attempt -lt 5 ]]; then sleep 10; else exit 1; fi
done

if [ -f \$NEW_CERT_FILE ]; then
  if [[ \$COMPONENT = \"mdsd\" ]]; then
    chown syslog:syslog \$NEW_CERT_FILE
  else
    sed -i -ne '1,/END CERTIFICATE/ p' \$NEW_CERT_FILE
  fi

  new_cert_sn=\"\$(openssl x509 -in \"\$NEW_CERT_FILE\" -noout -serial | awk -F= '{print \$2}')\"
  current_cert_sn=\"\$(openssl x509 -in \"\$CURRENT_CERT_FILE\" -noout -serial | awk -F= '{print \$2}')\"
  if [[ ! -z \$new_cert_sn ]] && [[ \$new_cert_sn != \"\$current_cert_sn\" ]]; then
    echo updating certificate for \$COMPONENT
    chmod 0600 \$NEW_CERT_FILE
    mv \$NEW_CERT_FILE \$CURRENT_CERT_FILE
  fi
else
  echo Failed to refresh certificate for \$COMPONENT && exit 1
fi"

    write_file download_creds_script_filename download_creds_script_file true

    chmod u+x /usr/local/bin/download-credentials.sh

    $download_creds_script_filename mdsd &
    wait "$!"


    $download_creds_script_filename mdm &
    wait "$!"

    # shellcheck disable=SC2034
    local -r watch_mdm_creds_service_filename="/etc/systemd/system/watch-mdm-credentials.service"
    # shellcheck disable=SC2034
    local -r watch_mdm_creds_service_file="[Unit]
Description=Watch for changes in mdm.pem and restarts the mdm service

[Service]
Type=oneshot
ExecStart=/usr/bin/systemctl restart mdm.service

[Install]
WantedBy=multi-user.target"

    write_file watch_mdm_creds_service_filename watch_mdm_creds_service_file true

    # shellcheck disable=SC2034
    local -r watch_mdm_creds_path_filename='/usr/lib/systemd/system/watch-mdm-credentials.path'
    # shellcheck disable=SC2034
    local -r watch_mdm_creds_path_file='[Path]
PathModified=/etc/mdm.pem

[Install]
WantedBy=multi-user.target'

    write_file watch_mdm_creds_path_filename watch_mdm_creds_path_file true

    local -r watch_mdm_creds='watch-mdm-credentials.path'
    systemctl enable --now "$watch_mdm_creds" || abort "failed to enable and start $watch_mdm_creds"
}

# configure_service_mdm
# args:
# 1) role - nameref, string; can be "gateway" or "rp"
# 2) image - nameref, string; mdm container image to run
# 3) network - nameref, string; podman network name to be attached
configure_service_mdm() {
    local -n role="$1"
    local -n image="$2"
    local -n network="$3"
    log "starting"
    log "Configuring mdm service"

    verify_role role

    # shellcheck disable=SC2034
    local -r sysconfig_mdm_filename="/etc/sysconfig/mdm"
    # shellcheck disable=SC2034
    local -r sysconfig_mdm_file="MDMFRONTENDURL='$MDMFRONTENDURL'
MDMIMAGE='$image'
MDMSOURCEENVIRONMENT='$LOCATION'
MDMSOURCEROLE='$role'
MDMSOURCEROLEINSTANCE=\"$(hostname)\""

    write_file sysconfig_mdm_filename sysconfig_mdm_file true

    mkdir -p /var/etw
    # shellcheck disable=SC2034
    local -r mdm_service_filename="/etc/systemd/system/mdm.service"
    # shellcheck disable=SC2034
    local -r mdm_service_file="[Unit]
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=/etc/sysconfig/mdm
ExecStartPre=-/usr/bin/podman rm -f %N
ExecStart=/usr/bin/podman run \
  --entrypoint /usr/sbin/MetricsExtension \
  --hostname %H \
  --name %N \
  --rm \
  --cap-drop net_raw \
  --network=$network \
  -m 2g \
  -v /etc/mdm.pem:/etc/mdm.pem \
  -v /var/etw:/var/etw:z \
  $image \
  -CertFile /etc/mdm.pem \
  -FrontEndUrl $MDMFRONTENDURL \
  -Logger Console \
  -LogLevel Warning \
  -PrivateKeyFile /etc/mdm.pem \
  -SourceEnvironment $LOCATION \
  -SourceRole $role \
  -SourceRoleInstance $HOSTNAME
ExecStop=/usr/bin/podman stop %N
Restart=always
RestartSec=1
StartLimitInterval=0

[Install]
WantedBy=multi-user.target"

    write_file mdm_service_filename mdm_service_file true
}

# configure_vmss_aro_service
# args:
# 1) r - nameref, string; role of VMSS
# 2) images - nameref, associative array; ARO container images
# 3) configs - nameref, associative array; configuration files and versions. The values should be a reference to variables, not dereferenced.
#                                          This is because the value is used when creating nameref variables by helper functions.
configure_vmss_aro_services() {
    local -n r="$1"
    local -n images="$2"
    local -n configs="$3"
    log "starting"
    verify_role "$1"

    if [ "$r" == "$role_gateway" ]; then
        configure_service_aro_gateway "${images["rp"]}" "$1" "${configs["gateway_config"]}" "${configs["network"]}"
    elif [ "$r" == "$role_rp" ]; then
        configure_service_aro_rp "${images["rp"]}" "$1" "${configs["rp_config"]}" "${configs["network"]}"
        configure_service_aro_monitor "${images["rp"]}" "${configs["network"]}"
        configure_service_aro_portal "${images["rp"]}" "${configs["network"]}"
    fi

    configure_service_fluentbit "${configs["fluentbit"]}" "${images["fluentbit"]}" "${configs["network"]}"
    configure_timers_mdm_mdsd "$1"
    configure_service_mdm "$1" "${images["mdm"]}" "${configs["network"]}"
    configure_service_mdsd "$1" "${configs["mdsd"]}"
    run_azsecd_config_scan
}

util_common="util-common.sh"
if [ -f "$util_common" ]; then
    # shellcheck source=util-common.sh
    source "$util_common"
fi
#!/bin/bash
# This file is intended to be sourced by bootstrapping scripts for commonly used functions

# configure_sshd
# We need to configure PasswordAuthentication to yes in order for the VMSS Access JIT to work
configure_sshd() {
    log "starting"
    local -r sshd_config="/etc/ssh/sshd_config"

    log "Editing $sshd_config to allow password authentication"
    sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/g' "$sshd_config"

    systemctl reload sshd.service || abort "sshd failed to reload"
}

# configure_logrotate clobbers /etc/logrotate.conf
# args:
# 1) dropin_files - nameref, associative array, optional; logrotate files to write to /etc/logrotate.d
#       Key name dictates filenames written to /etc/logrotate.d.
# Example: 
#   Key dictates the filename written in /etc/logrotate.d
#   shellcheck disable=SC2034
#   local -rA logrotate_dropins=(
#      ["gateway"]="$gateway_log_file"
#   )
configure_logrotate() {
    local -n dropin_files="${1:-empty_str}"
    log "starting"

    # shellcheck disable=SC2034
    local -r logrotate_conf_filename='/etc/logrotate.conf'
    # shellcheck disable=SC2034
    local -r logrotate_conf_file='# see "man logrotate" for details
# rotate log files weekly
weekly

# keep 2 weeks worth of backlogs
rotate 2

# create new (empty) log files after rotating old ones
create

# use date as a suffix of the rotated file
dateext

# uncomment this if you want your log files compressed
compress

# RPM packages drop log rotation information into this directory
include /etc/logrotate.d

# no packages own wtmp and btmp -- we will rotate them here
/var/log/wtmp {
    monthly
    create 0664 root utmp
        minsize 1M
    rotate 1
}

/var/log/btmp {
    missingok
    monthly
    create 0600 root utmp
    rotate 1
}'

    write_file logrotate_conf_filename logrotate_conf_file true

    if [ -n "${dropin_files[*]}" ]; then
        local -r logrotate_d="/etc/logrotate.d"
        log "Writing logrotate files to $logrotate_d"
        for dropin_name in "${!dropin_files[@]}"; do
            # shellcheck disable=SC2034
            local -r dropin_filename="$logrotate_d/$dropin_name"
            # shellcheck disable=SC2034
            local -r dropin_file="${dropin_files["$dropin_name"]}"
            write_file dropin_filename dropin_file true
        done
    fi
}

# pull_container_images
# args:
# 1) pull_images - nameref, string array
# 2) registry_conf - nameref, string, optional; path to docker/podman configuration file
pull_container_images() {
    local -n pull_images="$1"
    local -n registry_conf="${2:-empty_str}"
    log "starting"

    # shellcheck disable=SC2034
    local -ri retry_time=30
    # The managed identity that the VM runs as only has a single roleassignment.
    # This role assignment is ACRPull which is not necessarily present in the
    # subscription we're deploying into.  If the identity does not have any
    # role assignments scoped on the subscription we're deploying into, it will
    # not show on az login -i, which is why the below line is commented.
    # az account set -s "$SUBSCRIPTIONID"
    cmd=(
        az
        login
        -i
        --allow-no-subscriptions
    )

    log "Running az login with retries"
    retry cmd retry_time

    # Suppress emulation output for podman instead of docker for az acr compatability
    mkdir -p /etc/containers/
    mkdir -p /root/.docker
    touch /etc/containers/nodocker

    # This name is used in the case that az acr login searches for this in it's environment
    export REGISTRY_AUTH_FILE="/root/.docker/config.json"
    
    if [ -n "${registry_conf}" ]; then
        write_file REGISTRY_AUTH_FILE registry_conf true
    fi

    log "logging into prod acr"
    cmd=(
        az
        acr
        login
        --name
        # TODO replace this with variable expansion
        # Reference: https://www.shellcheck.net/wiki/SC2001
        "$(sed -e 's|.*/||' <<<"$ACRRESOURCEID")"
    )

    retry cmd retry_time

    # shellcheck disable=SC2068
    for i in ${pull_images[@]}; do
        local -n image="$i"
        cmd=(
            podman
            pull
            "$image"
        )

        log "Pulling image $image with retries now"
        retry cmd retry_time
    done

    # shellcheck disable=SC2034
    cmd=(
        az
        logout
    )

    log "Running az logout with retries"
    retry cmd retry_time
}

# configure_certs
# args:
# 1) role - string; can be "devproxy" or "rp"
configure_certs() {
    local -n role="$1"
    log "starting"
    log "Configuring certificates for $role"

    verify_role role true

    if [ "$role" == "devproxy" ]; then
        local -r proxy_certs_basedir="/etc/proxy"
        mkdir -p "$proxy_certs_basedir"
        base64 -d <<<"$PROXYCERT" > "$proxy_certs_basedir/proxy.crt"
        base64 -d <<<"$PROXYKEY" > "$proxy_certs_basedir/proxy.key"
        base64 -d <<<"$PROXYCLIENTCERT" > "$proxy_certs_basedir/proxy-client.crt"
        chown -R 1000:1000 /etc/proxy
        chmod 0600 "$proxy_certs_basedir/proxy.key"
        return 0
    fi

    if [ "$role" == "rp" ]; then
        local -r rp_certs_basedir="/etc/aro-rp"
        mkdir -p "$rp_certs_basedir"
        base64 -d <<<"$ADMINAPICABUNDLE" > "$rp_certs_basedir/admin-ca-bundle.pem"
        if [[ -n "$ARMAPICABUNDLE" ]]; then
        base64 -d <<<"$ARMAPICABUNDLE" > "$rp_certs_basedir/arm-ca-bundle.pem"
        fi
        chown -R 1000:1000 "$rp_certs_basedir"
    fi

    # setting MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault seems to have caused mdsd not
    # to honour SSL_CERT_FILE any more, heaven only knows why.
    local -r ssl_certs_basedir="/usr/lib/ssl/certs"
    mkdir -p "$ssl_certs_basedir"
    csplit -f "$ssl_certs_basedir/cert-" -b %03d.pem /etc/pki/tls/certs/ca-bundle.crt /^$/1 "{*}" 1>/dev/null
    c_rehash "$ssl_certs_basedir"

    # we leave clientId blank as long as only 1 managed identity assigned to vmss
    # if we have more than 1, we will need to populate with clientId used for off-node scanning
    # shellcheck disable=SC2034
    local -r nodescan_agent_filename="/etc/default/vsa-nodescan-agent.config"
    # shellcheck disable=SC2034
    local -r nodescan_agent_file="{
    \"Nice\": 19,
    \"Timeout\": 10800,
    \"ClientId\": \"\",
    \"TenantId\": $AZURESECPACKVSATENANTID,
    \"QualysStoreBaseUrl\": $AZURESECPACKQUALYSURL,
    \"ProcessTimeout\": 300,
    \"CommandDelay\": 0
  }"

    write_file nodescan_agent_filename nodescan_agent_file true
}

# run_azsecd_config_scan
run_azsecd_config_scan() {
    log "starting"

    local -ar configs=(
        "baseline"
        "clamav"
        "software"
    )

    log "Scanning configuration files with azsecd ${configs[*]}"
    # shellcheck disable=SC2068
    for scan in ${configs[@]}; do
        log "Scanning config file $scan now"
        /usr/local/bin/azsecd config -s "$scan" -d P1D
    done
}

# create_required_dirs
create_required_dirs() {
    create_dirs=(
        /var/log/journal
        /var/lib/waagent/Microsoft.Azure.KeyVault.Store
        # Does not exist on devProxyVMSS
        /var/opt/microsoft/linuxmonagent
    )

    # shellcheck disable=SC2068
    for d in ${create_dirs[@]}; do
        log "Creating directory $d"
        mkdir -p "$d" || abort "failed to create directory $d"
    done
}

# create_podman_networks()
# args:
# 1) nets - nameref, associative array; Networks to be created
#       Key is the network name, value is the subnet with cidr notation
create_podman_networks() {
    local -n nets="$1"
    log "starting"

    # shellcheck disable=SC2068
    for n in ${!nets[@]}; do
        log "Creating podman network \"$n\" with subnet \"${nets[$n]}\""
        podman network \
            create \
            --subnet "${nets["$n"]}" \
            "$n"
    done
}

# firewalld_configure_backend
firewalld_configure_backend() {
    log "starting"

    log "Changing firewalld backend to iptables"
    conf_file="/etc/firewalld/firewalld.conf"
    sed -i 's/FirewallBackend=nftables/FirewallBackend=iptables/g' "$conf_file"
}

# firewalld_configure
# args:
# 1) ports - nameref, string array; ports to be enabled.
#       Ports must be postfixed with /tcp or /udp
firewalld_configure() {
    local -n ports="$1"
    log "starting"

    firewalld_configure_backend

    # shellcheck disable=SC2034
    local -ra service=(
        "firewalld"
    )
    enable_services service

    log "Enabling ports ${ports[*]} on default firewalld zone"
    # shellcheck disable=SC2068
    for port in ${ports[@]}; do
        log "Enabling port $port now"
        firewall-cmd "--add-port=$port" \
                     --permanent
    done

    log "Writing runtime config to permanent config"
    firewall-cmd --runtime-to-permanent
}

#Start of rpVMSS.sh
#!/bin/bash

set -o errexit \
    -o pipefail \
    -o nounset

main() {
    # transaction attempt retry time in seconds
    # shellcheck disable=SC2034
    local -ri retry_wait_time=30
    local -ri pkg_retry_count=60

    create_required_dirs
    configure_sshd
    configure_rpm_repos retry_wait_time \
                        "$pkg_retry_count"

    # shellcheck disable=SC2034
    local -ar exclude_pkgs=(
        "-x WALinuxAgent"
        "-x WALinuxAgent-udev"
    )

    dnf_update_pkgs exclude_pkgs \
                    retry_wait_time \
                    "$pkg_retry_count"

    # shellcheck disable=SC2034
    local -ra install_pkgs=(
        azure-cli
        clamav
        azsec-clamav
        azure-cli
        azure-mdsd
        azure-security
        podman
        podman-docker
        openssl-perl
        # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505
        python3
        # required for podman networking
        firewalld
    )

    dnf_install_pkgs install_pkgs \
                     retry_wait_time \
                     "$pkg_retry_count"

    # TODO remove this once MicrosoftCBLMariner:cbl-mariner:cbl-mariner-2-gen2-fips supports automatic updates
    # Reference: https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade#supported-os-images
    configure_dnf_cron_job

    # shellcheck disable=SC2119
    configure_logrotate

    # shellcheck disable=SC2153 disable=SC2034
    local -r mdmimage="${RPIMAGE%%/*}/${MDMIMAGE#*/}"
    local -r rpimage="$RPIMAGE"
    # shellcheck disable=SC2034
    local -r fluentbit_image="$FLUENTBITIMAGE"
    # shellcheck disable=SC2034
    local -rA aro_images=(
        ["mdm"]="mdmimage"
        ["rp"]="rpimage"
        ["fluentbit"]="fluentbit_image"
    )

    pull_container_images aro_images

    local -r aro_network="aro"
    # shellcheck disable=SC2034
    local -rA networks=(
        ["$aro_network"]="192.168.254.0/24"
    )
    create_podman_networks networks

    # shellcheck disable=SC2034
    local -ra enable_ports=(
        # RP frontend
        "443/tcp"
        # Portal web
        "444/tcp"
        # Portal ssh
        "2222/tcp"
        # JIT ssh
        "22/tcp"
    )

    firewalld_configure enable_ports

    # shellcheck disable=SC2034
    local -r fluentbit_conf_file="[INPUT]
Name systemd
Tag journald
Systemd_Filter _COMM=aro
DB /var/lib/fluent/journaldb

[FILTER]
	Name modify
	Match journald
	Remove_wildcard _
	Remove TIMESTAMP

[FILTER]
	Name rewrite_tag
	Match journald
	Rule \$LOGKIND asyncqos asyncqos true

[FILTER]
	Name modify
	Match asyncqos
	Remove CLIENT_PRINCIPAL_NAME
	Remove FILE
	Remove COMPONENT

[FILTER]
	Name rewrite_tag
	Match journald
	Rule \$LOGKIND ifxaudit ifxaudit false

[OUTPUT]
	Name forward
	Match *
	Port 29230"


    # shellcheck disable=SC2034
    local -r mdsd_config_version="$RPMDSDCONFIGVERSION"
    # shellcheck disable=SC2034
    local -r aro_rp_conf_file="ACR_RESOURCE_ID='$ACRRESOURCEID'
ADMIN_API_CLIENT_CERT_COMMON_NAME='$ADMINAPICLIENTCERTCOMMONNAME'
ARM_API_CLIENT_CERT_COMMON_NAME='$ARMAPICLIENTCERTCOMMONNAME'
AZURE_ARM_CLIENT_ID='$ARMCLIENTID'
AZURE_FP_CLIENT_ID='$FPCLIENTID'
AZURE_FP_SERVICE_PRINCIPAL_ID='$FPSERVICEPRINCIPALID'
CLUSTER_MDM_ACCOUNT='$CLUSTERMDMACCOUNT'
CLUSTER_MDM_NAMESPACE=RP
CLUSTER_MDSD_ACCOUNT='$CLUSTERMDSDACCOUNT'
CLUSTER_MDSD_CONFIG_VERSION='$CLUSTERMDSDCONFIGVERSION'
CLUSTER_MDSD_NAMESPACE='$CLUSTERMDSDNAMESPACE'
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
DOMAIN_NAME='$LOCATION.$CLUSTERPARENTDOMAINNAME'
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_RESOURCEGROUP='$GATEWAYRESOURCEGROUPNAME'
KEYVAULT_PREFIX='$KEYVAULTPREFIX'
MDM_ACCOUNT='$RPMDMACCOUNT'
MDM_NAMESPACE='${role_rp^^}'
MDSD_ENVIRONMENT='$MDSDENVIRONMENT'
RP_FEATURES='$RPFEATURES'
RPIMAGE='$rpimage'
ARO_INSTALL_VIA_HIVE='$CLUSTERSINSTALLVIAHIVE'
ARO_HIVE_DEFAULT_INSTALLER_PULLSPEC='$CLUSTERDEFAULTINSTALLERPULLSPEC'
ARO_ADOPT_BY_HIVE='$CLUSTERSADOPTBYHIVE'
OIDC_AFD_ENDPOINT='$LOCATION.oic.$RPPARENTDOMAINNAME'
OIDC_STORAGE_ACCOUNT_NAME='$OIDCSTORAGEACCOUNTNAME'
"

    # values are references to variables, they should not be dereferenced here
    # shellcheck disable=SC2034
    local -rA aro_configs=(
        ["rp_config"]="aro_rp_conf_file"
        ["fluentbit"]="fluentbit_conf_file"
        ["mdsd"]="mdsd_config_version"
        ["network"]="aro_network"
    )

    configure_vmss_aro_services role_rp \
                                aro_images \
                                aro_configs

    # shellcheck disable=SC2034
    local -ra aro_services=(
        "aro-monitor"
        "aro-portal"
        "aro-rp"
        "azsecd"
        "mdsd"
        "mdm"
        "chronyd"
        "fluentbit"
        "download-mdsd-credentials.timer"
        "download-mdm-credentials.timer"
        "firewalld"
    )

    enable_services aro_services

    reboot_vm
}

export AZURE_CLOUD_NAME="${AZURECLOUDNAME:?"Failed to carry over variables"}"

# util.sh does not exist when deployed to VMSS via VMSS extensions
# This is because commonVMSS.sh is concatenated with this script
util="util.sh"
if [ -f "$util" ]; then
    # shellcheck source=util.sh
    source "$util"
fi

main "$@"
')))]" + } + } + }, + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": true, + "enableAutomaticUpgrade": true, + "settings": { + "GCS_AUTO_CONFIG": true } } } diff --git a/pkg/deploy/generator/resources_dev.go b/pkg/deploy/generator/resources_dev.go index 1bdb461e6da..a7864df136a 100644 --- a/pkg/deploy/generator/resources_dev.go +++ b/pkg/deploy/generator/resources_dev.go @@ -101,7 +101,11 @@ func (g *generator) devProxyVMSS() *arm.Resource { ) } - trailer := base64.StdEncoding.EncodeToString(scriptDevProxyVMSS) + var sb strings.Builder + + sb.WriteString(string(scriptDevProxyVMSS)) + + trailer := base64.StdEncoding.EncodeToString([]byte(sb.String())) parts = append(parts, "'\n'", fmt.Sprintf("base64ToString('%s')", trailer)) diff --git a/pkg/deploy/generator/resources_gateway.go b/pkg/deploy/generator/resources_gateway.go index ab3d1869e27..2fea65b04df 100644 --- a/pkg/deploy/generator/resources_gateway.go +++ b/pkg/deploy/generator/resources_gateway.go @@ -241,7 +241,19 @@ func (g *generator) gatewayVMSS() *arm.Resource { "''')\n'", ) - trailer := base64.StdEncoding.EncodeToString(scriptGatewayVMSS) + var sb strings.Builder + + // VMSS extensions only support one custom script + // Because of this, the util-*.sh scripts are prefixed to the bootstrapping script + // main is called at the end of the bootstrapping script, so appending them will not work + sb.WriteString(string(scriptUtilCommon)) + sb.WriteString(string(scriptUtilPackages)) + sb.WriteString(string(scriptUtilServices)) + sb.WriteString(string(scriptUtilSystem)) + sb.WriteString("\n#Start of gatewayVMSS.sh\n") + sb.WriteString(string(scriptGatewayVMSS)) + + trailer := base64.StdEncoding.EncodeToString([]byte(sb.String())) parts = append(parts, "'\n'", fmt.Sprintf("base64ToString('%s')", trailer)) @@ -340,6 +352,23 @@ func (g *generator) gatewayVMSS() *arm.Resource { }, }, }, + { + // az-secmonitor package no longer needs to be manually installed + // References: + // https://eng.ms/docs/products/azure-linux/gettingstarted/aks/monitoring + // https://msazure.visualstudio.com/ASMDocs/_wiki/wikis/ASMDocs.wiki/179541/Linux-AzSecPack-AutoConfig-Onboarding-(manual-for-C-AI)?anchor=3.1.1-using-arm-template-resource-elements + Name: to.StringPtr("AzureMonitorLinuxAgent"), + VirtualMachineScaleSetExtensionProperties: &mgmtcompute.VirtualMachineScaleSetExtensionProperties{ + Publisher: to.StringPtr("Microsoft.Azure.Monitor"), + EnableAutomaticUpgrade: to.BoolPtr(true), + AutoUpgradeMinorVersion: to.BoolPtr(true), + TypeHandlerVersion: to.StringPtr("1.0"), + Type: to.StringPtr("AzureMonitorLinuxAgent"), + Settings: map[string]interface{}{ + "GCS_AUTO_CONFIG": true, + }, + }, + }, }, }, DiagnosticsProfile: &mgmtcompute.DiagnosticsProfile{ diff --git a/pkg/deploy/generator/resources_rp.go b/pkg/deploy/generator/resources_rp.go index 9dcf881a97c..03bb2c8dc56 100644 --- a/pkg/deploy/generator/resources_rp.go +++ b/pkg/deploy/generator/resources_rp.go @@ -441,7 +441,19 @@ func (g *generator) rpVMSS() *arm.Resource { "''')\n'", ) - trailer := base64.StdEncoding.EncodeToString(scriptRpVMSS) + var sb strings.Builder + + // VMSS extensions only support one custom script + // Because of this, the util-*.sh scripts are prefixed to the bootstrapping script + // main is called at the end of the bootstrapping script, so appending them will not work + sb.WriteString(string(scriptUtilCommon)) + sb.WriteString(string(scriptUtilPackages)) + sb.WriteString(string(scriptUtilServices)) + sb.WriteString(string(scriptUtilSystem)) + sb.WriteString("\n#Start of rpVMSS.sh\n") + sb.WriteString(string(scriptRpVMSS)) + + trailer := base64.StdEncoding.EncodeToString([]byte(sb.String())) parts = append(parts, "'\n'", fmt.Sprintf("base64ToString('%s')", trailer)) @@ -476,6 +488,7 @@ func (g *generator) rpVMSS() *arm.Resource { }, }, StorageProfile: &mgmtcompute.VirtualMachineScaleSetStorageProfile{ + // https://eng.ms/docs/products/azure-linux/gettingstarted/azurevm/azurevm ImageReference: &mgmtcompute.ImageReference{ Publisher: to.StringPtr("RedHat"), Offer: to.StringPtr("RHEL"), @@ -537,6 +550,23 @@ func (g *generator) rpVMSS() *arm.Resource { }, }, }, + { + // az-secmonitor package no longer needs to be manually installed + // References: + // https://eng.ms/docs/products/azure-linux/gettingstarted/aks/monitoring + // https://msazure.visualstudio.com/ASMDocs/_wiki/wikis/ASMDocs.wiki/179541/Linux-AzSecPack-AutoConfig-Onboarding-(manual-for-C-AI)?anchor=3.1.1-using-arm-template-resource-elements + Name: to.StringPtr("AzureMonitorLinuxAgent"), + VirtualMachineScaleSetExtensionProperties: &mgmtcompute.VirtualMachineScaleSetExtensionProperties{ + Publisher: to.StringPtr("Microsoft.Azure.Monitor"), + EnableAutomaticUpgrade: to.BoolPtr(true), + AutoUpgradeMinorVersion: to.BoolPtr(true), + TypeHandlerVersion: to.StringPtr("1.0"), + Type: to.StringPtr("AzureMonitorLinuxAgent"), + Settings: map[string]interface{}{ + "GCS_AUTO_CONFIG": true, + }, + }, + }, }, }, DiagnosticsProfile: &mgmtcompute.DiagnosticsProfile{ diff --git a/pkg/deploy/generator/scripts.go b/pkg/deploy/generator/scripts.go index a60f9fefb73..c07f7ca9167 100644 --- a/pkg/deploy/generator/scripts.go +++ b/pkg/deploy/generator/scripts.go @@ -15,3 +15,15 @@ var scriptGatewayVMSS []byte //go:embed scripts/rpVMSS.sh var scriptRpVMSS []byte + +//go:embed scripts/util-system.sh +var scriptUtilSystem []byte + +//go:embed scripts/util-services.sh +var scriptUtilServices []byte + +//go:embed scripts/util-packages.sh +var scriptUtilPackages []byte + +//go:embed scripts/util-common.sh +var scriptUtilCommon []byte diff --git a/pkg/deploy/generator/scripts/devProxyVMSS.sh b/pkg/deploy/generator/scripts/devProxyVMSS.sh index f09c9f57f78..659c149bb0e 100644 --- a/pkg/deploy/generator/scripts/devProxyVMSS.sh +++ b/pkg/deploy/generator/scripts/devProxyVMSS.sh @@ -1,3 +1,4 @@ +#!/bin/bash #Adding retry logic to yum commands in order to avoid stalling out on resource locks echo "installing moby-engine (docker)" for attempt in {1..60}; do diff --git a/pkg/deploy/generator/scripts/gatewayVMSS.sh b/pkg/deploy/generator/scripts/gatewayVMSS.sh index 15faeab8e23..64fd6f88723 100644 --- a/pkg/deploy/generator/scripts/gatewayVMSS.sh +++ b/pkg/deploy/generator/scripts/gatewayVMSS.sh @@ -1,172 +1,100 @@ #!/bin/bash -echo "setting ssh password authentication" -# We need to manually set PasswordAuthentication to true in order for the VMSS Access JIT to work -sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/g' /etc/ssh/sshd_config -systemctl reload sshd.service - -#Adding retry logic to yum commands in order to avoid stalling out on resource locks -echo "running RHUI fix" -for attempt in {1..60}; do - yum update -y --disablerepo='*' --enablerepo='rhui-microsoft-azure*' && break - if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi -done - -echo "running yum update" -for attempt in {1..60}; do - yum -y -x WALinuxAgent -x WALinuxAgent-udev update --allowerasing && break - if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi -done - -echo "extending partition table" -# Linux block devices are inconsistently named -# it's difficult to tie the lvm pv to the physical disk using /dev/disk files, which is why lvs is used here -physical_disk="$(lvs -o devices -a | head -n2 | tail -n1 | cut -d ' ' -f 3 | cut -d \( -f 1 | tr -d '[:digit:]')" -growpart "$physical_disk" 2 - -echo "extending filesystems" -lvextend -l +20%FREE /dev/rootvg/rootlv -xfs_growfs / - -lvextend -l +100%FREE /dev/rootvg/varlv -xfs_growfs /var - -rpm --import https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-8 -rpm --import https://packages.microsoft.com/keys/microsoft.asc - -for attempt in {1..60}; do - yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && break - if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi -done - -echo "configuring logrotate" - -# gateway_logdir is a readonly variable that specifies the host path mount point for the gateway container log file -# for the purpose of rotating the gateway logs -declare -r gateway_logdir='/var/log/aro-gateway' - -cat >/etc/logrotate.conf </etc/yum.repos.d/azure.repo <<'EOF' -[azure-cli] -name=azure-cli -baseurl=https://packages.microsoft.com/yumrepos/azure-cli -enabled=yes -gpgcheck=yes - -[azurecore] -name=azurecore -baseurl=https://packages.microsoft.com/yumrepos/azurecore -enabled=yes -gpgcheck=no -EOF - -semanage fcontext -a -t var_log_t "/var/log/journal(/.*)?" -mkdir -p /var/log/journal - -for attempt in {1..60}; do - yum -y install clamav azsec-clamav azsec-monitor azure-cli azure-mdsd azure-security podman-docker openssl-perl python3 && break - # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505 - if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi -done - -echo "applying firewall rules" -# https://access.redhat.com/security/cve/cve-2020-13401 -cat >/etc/sysctl.d/02-disable-accept-ra.conf <<'EOF' -net.ipv6.conf.all.accept_ra=0 -EOF - -cat >/etc/sysctl.d/01-disable-core.conf <<'EOF' -kernel.core_pattern = |/bin/true -EOF -sysctl --system - -firewall-cmd --add-port=80/tcp --permanent -firewall-cmd --add-port=8081/tcp --permanent -firewall-cmd --add-port=443/tcp --permanent - -echo "logging into prod acr" -export AZURE_CLOUD_NAME=$AZURECLOUDNAME -az login -i --allow-no-subscriptions - -# The managed identity that the VM runs as only has a single roleassignment. -# This role assignment is ACRPull which is not necessarily present in the -# subscription we're deploying into. If the identity does not have any -# role assignments scoped on the subscription we're deploying into, it will -# not show on az login -i, which is why the below line is commented. -# az account set -s "$SUBSCRIPTIONID" - -# Suppress emulation output for podman instead of docker for az acr compatability -mkdir -p /etc/containers/ -touch /etc/containers/nodocker - -mkdir -p /root/.docker -REGISTRY_AUTH_FILE=/root/.docker/config.json az acr login --name "$(sed -e 's|.*/||' <<<"$ACRRESOURCEID")" - -MDMIMAGE="${RPIMAGE%%/*}/${MDMIMAGE#*/}" -docker pull "$MDMIMAGE" -docker pull "$RPIMAGE" -docker pull "$FLUENTBITIMAGE" - -az logout - -echo "configuring fluentbit service" -mkdir -p /etc/fluentbit/ -mkdir -p /var/lib/fluent - -cat >/etc/fluentbit/fluentbit.conf <<'EOF' -[INPUT] - Name systemd - Tag journald - Systemd_Filter _COMM=aro - DB /var/lib/fluent/journaldb +set -o errexit \ + -o pipefail \ + -o nounset + +main() { + # transaction attempt retry time in seconds + # shellcheck disable=SC2034 + local -ri retry_wait_time=30 + # shellcheck disable=SC2068 + local -ri pkg_retry_count=60 + + create_required_dirs + configure_sshd + configure_rpm_repos retry_wait_time \ + "$pkg_retry_count" + + # shellcheck disable=SC2034 + local -ar exclude_pkgs=( + "-x WALinuxAgent" + "-x WALinuxAgent-udev" + ) + + dnf_update_pkgs exclude_pkgs \ + retry_wait_time \ + "$pkg_retry_count" + + # shellcheck disable=SC2034 + local -ra install_pkgs=( + azure-cli + clamav + azsec-clamav + azure-cli + azure-mdsd + azure-security + podman + podman-docker + openssl-perl + # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505 + python3 + # required for podman networking + firewalld + ) + + dnf_install_pkgs install_pkgs \ + retry_wait_time \ + "$pkg_retry_count" + + # TODO remove this once MicrosoftCBLMariner:cbl-mariner:cbl-mariner-2-gen2-fips supports automatic updates + # Reference: https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade#supported-os-images + configure_dnf_cron_job + + # shellcheck disable=SC2119 + configure_logrotate + + # shellcheck disable=SC2034 disable=SC2153 + local -r mdmimage="${RPIMAGE%%/*}/${MDMIMAGE#*/}" + local -r rpimage="$RPIMAGE" + # shellcheck disable=SC2034 + local -r fluentbit_image="$FLUENTBITIMAGE" + # values are references to variables, they should not be dereferenced here + # shellcheck disable=SC2034 + local -rA aro_images=( + ["mdm"]="mdmimage" + ["rp"]="rpimage" + ["fluentbit"]="fluentbit_image" + ) + + pull_container_images aro_images + + local -r aro_network="aro" + # shellcheck disable=SC2034 + local -rA networks=( + ["$aro_network"]="192.168.254.0/24" + ) + create_podman_networks networks + + # shellcheck disable=SC2034 + local -ra enable_ports=( + # RP gateway + "80/tcp" + "8081/tcp" + "443/tcp" + # JIT ssh + "22/tcp" + ) + + firewalld_configure enable_ports + + + # shellcheck disable=SC2034 + local -r fluentbit_conf_file="[INPUT] +Name systemd +Tag journald +Systemd_Filter _COMM=aro +DB /var/lib/fluent/journaldb [FILTER] Name modify @@ -177,324 +105,59 @@ cat >/etc/fluentbit/fluentbit.conf <<'EOF' [OUTPUT] Name forward Match * - Port 29230 -EOF - -echo "FLUENTBITIMAGE=$FLUENTBITIMAGE" >/etc/sysconfig/fluentbit - -cat >/etc/systemd/system/fluentbit.service <<'EOF' -[Unit] -After=network-online.target -Wants=network-online.target -StartLimitIntervalSec=0 - -[Service] -RestartSec=1s -EnvironmentFile=/etc/sysconfig/fluentbit -ExecStartPre=-/usr/bin/docker rm -f %N -ExecStart=/usr/bin/docker run \ - --security-opt label=disable \ - --entrypoint /opt/td-agent-bit/bin/td-agent-bit \ - --net=host \ - --hostname %H \ - --name %N \ - --rm \ - --cap-drop net_raw \ - -v /etc/fluentbit/fluentbit.conf:/etc/fluentbit/fluentbit.conf \ - -v /var/lib/fluent:/var/lib/fluent:z \ - -v /var/log/journal:/var/log/journal:ro \ - -v /etc/machine-id:/etc/machine-id:ro \ - $FLUENTBITIMAGE \ - -c /etc/fluentbit/fluentbit.conf - -ExecStop=/usr/bin/docker stop %N -Restart=always -RestartSec=5 -StartLimitInterval=0 - -[Install] -WantedBy=multi-user.target -EOF - -echo "configuring mdm service" -cat >/etc/sysconfig/mdm </etc/systemd/system/mdm.service <<'EOF' -[Unit] -After=network-online.target -Wants=network-online.target - -[Service] -EnvironmentFile=/etc/sysconfig/mdm -ExecStartPre=-/usr/bin/docker rm -f %N -ExecStart=/usr/bin/docker run \ - --entrypoint /usr/sbin/MetricsExtension \ - --hostname %H \ - --name %N \ - --rm \ - --cap-drop net_raw \ - -m 2g \ - -v /etc/mdm.pem:/etc/mdm.pem \ - -v /var/etw:/var/etw:z \ - $MDMIMAGE \ - -CertFile /etc/mdm.pem \ - -FrontEndUrl $MDMFRONTENDURL \ - -Logger Console \ - -LogLevel Warning \ - -PrivateKeyFile /etc/mdm.pem \ - -SourceEnvironment $MDMSOURCEENVIRONMENT \ - -SourceRole $MDMSOURCEROLE \ - -SourceRoleInstance $MDMSOURCEROLEINSTANCE -ExecStop=/usr/bin/docker stop %N -Restart=always -RestartSec=1 -StartLimitInterval=0 - -[Install] -WantedBy=multi-user.target -EOF - -echo "configuring aro-gateway service" -cat >/etc/sysconfig/aro-gateway </etc/systemd/system/aro-gateway.service </etc/systemd/system/download-$var-credentials.service </etc/systemd/system/download-$var-credentials.timer </usr/local/bin/download-credentials.sh </etc/systemd/system/watch-mdm-credentials.service </etc/systemd/system/watch-mdm-credentials.path </etc/systemd/system/mdsd.service.d/override.conf <<'EOF' -[Unit] -After=network-online.target -EOF - -cat >/etc/default/mdsd </dev/null -c_rehash /usr/lib/ssl/certs - -# we leave clientId blank as long as only 1 managed identity assigned to vmss -# if we have more than 1, we will need to populate with clientId used for off-node scanning -cat >/etc/default/vsa-nodescan-agent.config </etc/logrotate.conf <<'EOF' -# see "man logrotate" for details -# rotate log files weekly -weekly - -# keep 2 weeks worth of backlogs -rotate 2 - -# create new (empty) log files after rotating old ones -create - -# use date as a suffix of the rotated file -dateext - -# uncomment this if you want your log files compressed -compress - -# RPM packages drop log rotation information into this directory -include /etc/logrotate.d - -# no packages own wtmp and btmp -- we'll rotate them here -/var/log/wtmp { - monthly - create 0664 root utmp - minsize 1M - rotate 1 -} - -/var/log/btmp { - missingok - monthly - create 0600 root utmp - rotate 1 -} -EOF - -echo "configuring yum repository and running yum update" -cat >/etc/yum.repos.d/azure.repo <<'EOF' -[azure-cli] -name=azure-cli -baseurl=https://packages.microsoft.com/yumrepos/azure-cli -enabled=yes -gpgcheck=yes - -[azurecore] -name=azurecore -baseurl=https://packages.microsoft.com/yumrepos/azurecore -enabled=yes -gpgcheck=no -EOF - -semanage fcontext -a -t var_log_t "/var/log/journal(/.*)?" -mkdir -p /var/log/journal - -for attempt in {1..60}; do -yum -y install clamav azsec-clamav azsec-monitor azure-cli azure-mdsd azure-security podman podman-docker openssl-perl python3 && break - # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505 - if [[ ${attempt} -lt 60 ]]; then sleep 30; else exit 1; fi -done - -# https://access.redhat.com/security/cve/cve-2020-13401 -echo "applying firewall rules" -cat >/etc/sysctl.d/02-disable-accept-ra.conf <<'EOF' -net.ipv6.conf.all.accept_ra=0 -EOF - -cat >/etc/sysctl.d/01-disable-core.conf <<'EOF' -kernel.core_pattern = |/bin/true -EOF -sysctl --system - -firewall-cmd --add-port=443/tcp --permanent -firewall-cmd --add-port=444/tcp --permanent -firewall-cmd --add-port=2222/tcp --permanent - -export AZURE_CLOUD_NAME=$AZURECLOUDNAME - -echo "logging into prod acr" -az login -i --allow-no-subscriptions - -# Suppress emulation output for podman instead of docker for az acr compatability -mkdir -p /etc/containers/ -touch /etc/containers/nodocker - -mkdir -p /root/.docker -REGISTRY_AUTH_FILE=/root/.docker/config.json az acr login --name "$(sed -e 's|.*/||' <<<"$ACRRESOURCEID")" - -MDMIMAGE="${RPIMAGE%%/*}/${MDMIMAGE#*/}" -docker pull "$MDMIMAGE" -docker pull "$RPIMAGE" -docker pull "$FLUENTBITIMAGE" - -az logout - -echo "configuring fluentbit service" -mkdir -p /etc/fluentbit/ -mkdir -p /var/lib/fluent - -cat >/etc/fluentbit/fluentbit.conf <<'EOF' -[INPUT] - Name systemd - Tag journald - Systemd_Filter _COMM=aro - DB /var/lib/fluent/journaldb +set -o errexit \ + -o pipefail \ + -o nounset + +main() { + # transaction attempt retry time in seconds + # shellcheck disable=SC2034 + local -ri retry_wait_time=30 + local -ri pkg_retry_count=60 + + create_required_dirs + configure_sshd + configure_rpm_repos retry_wait_time \ + "$pkg_retry_count" + + # shellcheck disable=SC2034 + local -ar exclude_pkgs=( + "-x WALinuxAgent" + "-x WALinuxAgent-udev" + ) + + dnf_update_pkgs exclude_pkgs \ + retry_wait_time \ + "$pkg_retry_count" + + # shellcheck disable=SC2034 + local -ra install_pkgs=( + azure-cli + clamav + azsec-clamav + azure-cli + azure-mdsd + azure-security + podman + podman-docker + openssl-perl + # hack - we are installing python3 on hosts due to an issue with Azure Linux Extensions https://github.com/Azure/azure-linux-extensions/pull/1505 + python3 + # required for podman networking + firewalld + ) + + dnf_install_pkgs install_pkgs \ + retry_wait_time \ + "$pkg_retry_count" + + # TODO remove this once MicrosoftCBLMariner:cbl-mariner:cbl-mariner-2-gen2-fips supports automatic updates + # Reference: https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade#supported-os-images + configure_dnf_cron_job + + # shellcheck disable=SC2119 + configure_logrotate + + # shellcheck disable=SC2153 disable=SC2034 + local -r mdmimage="${RPIMAGE%%/*}/${MDMIMAGE#*/}" + local -r rpimage="$RPIMAGE" + # shellcheck disable=SC2034 + local -r fluentbit_image="$FLUENTBITIMAGE" + # shellcheck disable=SC2034 + local -rA aro_images=( + ["mdm"]="mdmimage" + ["rp"]="rpimage" + ["fluentbit"]="fluentbit_image" + ) + + pull_container_images aro_images + + local -r aro_network="aro" + # shellcheck disable=SC2034 + local -rA networks=( + ["$aro_network"]="192.168.254.0/24" + ) + create_podman_networks networks + + # shellcheck disable=SC2034 + local -ra enable_ports=( + # RP frontend + "443/tcp" + # Portal web + "444/tcp" + # Portal ssh + "2222/tcp" + # JIT ssh + "22/tcp" + ) + + firewalld_configure enable_ports + + # shellcheck disable=SC2034 + local -r fluentbit_conf_file="[INPUT] +Name systemd +Tag journald +Systemd_Filter _COMM=aro +DB /var/lib/fluent/journaldb [FILTER] Name modify @@ -155,7 +104,7 @@ cat >/etc/fluentbit/fluentbit.conf <<'EOF' [FILTER] Name rewrite_tag Match journald - Rule $LOGKIND asyncqos asyncqos true + Rule \$LOGKIND asyncqos asyncqos true [FILTER] Name modify @@ -167,117 +116,18 @@ cat >/etc/fluentbit/fluentbit.conf <<'EOF' [FILTER] Name rewrite_tag Match journald - Rule $LOGKIND ifxaudit ifxaudit false - -[FILTER] - Name rewrite_tag - Match journald - Rule $LOGKIND outboundRequests outboundRequests false - -[FILTER] - Name modify - Match outboundRequests - Remove CLIENT_PRINCIPAL_NAME - Remove FILE - Remove COMPONENT + Rule \$LOGKIND ifxaudit ifxaudit false [OUTPUT] Name forward Match * - Port 29230 -EOF - -echo "FLUENTBITIMAGE=$FLUENTBITIMAGE" >/etc/sysconfig/fluentbit + Port 29230" -cat >/etc/systemd/system/fluentbit.service <<'EOF' -[Unit] -After=network-online.target -Wants=network-online.target -StartLimitIntervalSec=0 -[Service] -RestartSec=1s -EnvironmentFile=/etc/sysconfig/fluentbit -ExecStartPre=-/usr/bin/docker rm -f %N -ExecStart=/usr/bin/docker run \ - --security-opt label=disable \ - --entrypoint /opt/td-agent-bit/bin/td-agent-bit \ - --net=host \ - --hostname %H \ - --name %N \ - --rm \ - --cap-drop net_raw \ - -v /etc/fluentbit/fluentbit.conf:/etc/fluentbit/fluentbit.conf \ - -v /var/lib/fluent:/var/lib/fluent:z \ - -v /var/log/journal:/var/log/journal:ro \ - -v /etc/machine-id:/etc/machine-id:ro \ - $FLUENTBITIMAGE \ - -c /etc/fluentbit/fluentbit.conf - -ExecStop=/usr/bin/docker stop %N -Restart=always -RestartSec=5 -StartLimitInterval=0 - -[Install] -WantedBy=multi-user.target -EOF - -mkdir /etc/aro-rp -base64 -d <<<"$ADMINAPICABUNDLE" >/etc/aro-rp/admin-ca-bundle.pem -if [[ -n "$ARMAPICABUNDLE" ]]; then - base64 -d <<<"$ARMAPICABUNDLE" >/etc/aro-rp/arm-ca-bundle.pem -fi -chown -R 1000:1000 /etc/aro-rp - -echo "configuring mdm service" -cat >/etc/sysconfig/mdm </etc/systemd/system/mdm.service <<'EOF' -[Unit] -After=network-online.target -Wants=network-online.target - -[Service] -EnvironmentFile=/etc/sysconfig/mdm -ExecStartPre=-/usr/bin/docker rm -f %N -ExecStart=/usr/bin/docker run \ - --entrypoint /usr/sbin/MetricsExtension \ - --hostname %H \ - --name %N \ - --rm \ - --cap-drop net_raw \ - -m 2g \ - -v /etc/mdm.pem:/etc/mdm.pem \ - -v /var/etw:/var/etw:z \ - $MDMIMAGE \ - -CertFile /etc/mdm.pem \ - -FrontEndUrl $MDMFRONTENDURL \ - -Logger Console \ - -LogLevel Warning \ - -PrivateKeyFile /etc/mdm.pem \ - -SourceEnvironment $MDMSOURCEENVIRONMENT \ - -SourceRole $MDMSOURCEROLE \ - -SourceRoleInstance $MDMSOURCEROLEINSTANCE -ExecStop=/usr/bin/docker stop %N -Restart=always -RestartSec=1 -StartLimitInterval=0 - -[Install] -WantedBy=multi-user.target -EOF - -echo "configuring aro-rp service" -cat >/etc/sysconfig/aro-rp </etc/sysconfig/aro-monitor </etc/systemd/system/aro-monitor.service <<'EOF' -[Unit] -After=network-online.target -Wants=network-online.target - -[Service] -EnvironmentFile=/etc/sysconfig/aro-monitor -ExecStartPre=-/usr/bin/docker rm -f %N -ExecStart=/usr/bin/docker run \ - --hostname %H \ - --name %N \ - --rm \ - --cap-drop net_raw \ - -e AZURE_FP_CLIENT_ID \ - -e DOMAIN_NAME \ - -e CLUSTER_MDSD_ACCOUNT \ - -e CLUSTER_MDSD_CONFIG_VERSION \ - -e GATEWAY_DOMAINS \ - -e GATEWAY_RESOURCEGROUP \ - -e MDSD_ENVIRONMENT \ - -e CLUSTER_MDSD_NAMESPACE \ - -e CLUSTER_MDM_ACCOUNT \ - -e CLUSTER_MDM_NAMESPACE \ - -e DATABASE_ACCOUNT_NAME \ - -e KEYVAULT_PREFIX \ - -e MDM_ACCOUNT \ - -e MDM_NAMESPACE \ - -m 2.5g \ - -v /run/systemd/journal:/run/systemd/journal \ - -v /var/etw:/var/etw:z \ - $RPIMAGE \ - monitor -Restart=always -RestartSec=1 -StartLimitInterval=0 - -[Install] -WantedBy=multi-user.target -EOF - -echo "configuring aro-portal service" -cat >/etc/sysconfig/aro-portal </etc/systemd/system/aro-portal.service <<'EOF' -[Unit] -After=network-online.target -Wants=network-online.target -StartLimitInterval=0 - -[Service] -EnvironmentFile=/etc/sysconfig/aro-portal -ExecStartPre=-/usr/bin/docker rm -f %N -ExecStart=/usr/bin/docker run \ - --hostname %H \ - --name %N \ - --rm \ - --cap-drop net_raw \ - -e AZURE_PORTAL_ACCESS_GROUP_IDS \ - -e AZURE_PORTAL_CLIENT_ID \ - -e AZURE_PORTAL_ELEVATED_GROUP_IDS \ - -e DATABASE_ACCOUNT_NAME \ - -e KEYVAULT_PREFIX \ - -e MDM_ACCOUNT \ - -e MDM_NAMESPACE \ - -e PORTAL_HOSTNAME \ - -m 2g \ - -p 444:8444 \ - -p 2222:2222 \ - -v /run/systemd/journal:/run/systemd/journal \ - -v /var/etw:/var/etw:z \ - $RPIMAGE \ - portal -Restart=always -RestartSec=1 - -[Install] -WantedBy=multi-user.target -EOF - -echo "configuring mdsd and mdm services" -chcon -R system_u:object_r:var_log_t:s0 /var/opt/microsoft/linuxmonagent - -mkdir -p /var/lib/waagent/Microsoft.Azure.KeyVault.Store - -for var in "mdsd" "mdm"; do -cat >/etc/systemd/system/download-$var-credentials.service </etc/systemd/system/download-$var-credentials.timer </usr/local/bin/download-credentials.sh </etc/systemd/system/watch-mdm-credentials.service </etc/systemd/system/watch-mdm-credentials.path </etc/systemd/system/mdsd.service.d/override.conf <<'EOF' -[Unit] -After=network-online.target -EOF - -cat >/etc/default/mdsd </dev/null -c_rehash /usr/lib/ssl/certs - -# we leave clientId blank as long as only 1 managed identity assigned to vmss -# if we have more than 1, we will need to populate with clientId used for off-node scanning -cat >/etc/default/vsa-nodescan-agent.config < "$filename" + else + log "Appending to $filename" + echo "$file_contents" >> "$filename" + fi +} + +# retry Adding retry logic to yum commands in order to avoid stalling out on resource locks +# args: +# 1) cmd_retry - nameref, array; Command and arguement(s) to retry +# 2) wait_time - nameref, integer; Time to wait before retrying command +# 3) retries - integer, optional; Ammount of times to retry command, defaults to 5 +retry() { + local -n cmd_retry="$1" + local -n wait_time="$2" + local -ri retries="${3:-5}" + + for attempt in {1..5}; do + log "attempt #${attempt} - ${FUNCNAME[2]}" + # shellcheck disable=SC2068 + ${cmd_retry[@]} & + + wait $! && break + if [ "${attempt}" -le "$retries" ]; then + sleep "$wait_time" + else + abort "attempt #${attempt} - Failed to update packages" + fi + done +} + +# verify_role +# args: +# 1) test_role - nameref; role being verified +# 2) certs - boolean, optional; defaults to false. Set to true to add devproxy to allowed roles +verify_role() { + local -n test_role="$1" + local -r certs="${2:-false}" + + allowed_roles_glob="($role_rp|$role_gateway)" + if $certs; then + # remove trailing ")" and append additional role + allowed_roles_glob="${allowed_roles_glob%\)*}|devproxy)" + fi + + if [[ "$test_role" =~ $allowed_roles_glob ]]; then + log "Verified role \"$test_role\"" + else + abort "failed to verify role, role \"${test_role}\" not in \"${allowed_roles_glob}\"" + fi +} + +# get_keyvault_suffix +# args: +# 1) rl - nameref, string; role to get short role for +# 2) kv_suffix - nameref, string; short role will be assigned to this nameref +# 3) sec_prefix - nameref, string; keyvault certificate prefix will be assigned to this nameref +get_keyvault_suffix() { + local -n rl="$1" + local -n kv_suffix="$2" + local -n sec_prefix="$3" + + local -r keyvault_suffix_rp="svc" + local -r keyvault_prefix_gateway="gwy" + + case "$rl" in + "$role_gateway") + kv_suffix="$keyvault_prefix_gateway" + sec_prefix="$keyvault_prefix_gateway" + ;; + "$role_rp") + kv_suffix="$keyvault_suffix_rp" + sec_prefix="$role_rp" + ;; + *) + abort "unkown role $rl" + ;; + esac +} + +# reboot_vm restores all selinux file contexts, then schedules a reboot for one hour later +# Reboots should scheduled after all VM extensions have had time to complete +# Reference: https://learn.microsoft.com/en-us/azure/virtual-machines/extensions/custom-script-linux#tips +reboot_vm() { + log "starting" + + (shutdown -r now &) +} diff --git a/pkg/deploy/generator/scripts/util-packages.sh b/pkg/deploy/generator/scripts/util-packages.sh new file mode 100644 index 00000000000..7d059697d84 --- /dev/null +++ b/pkg/deploy/generator/scripts/util-packages.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# Repository and package management related functions + +configure_repo_mariner_extended() { + local -r extended_repo_config="https://packages.microsoft.com/cbl-mariner/2.0/prod/extended/x86_64/config.repo" + curl -sSL "$extended_repo_config" -o /etc/yum.repos.d/mariner-extended.repo + + local -r repo_name="cbl-mariner2.0prodextendedx86_64" + + local -ra cmd=( + dnf + update + -y + --enablerepo="$repo_name" + ) + + log "Enabling repo $repo_name" + retry cmd "$1" "${2:-}" +} + +# configure_rpm_repos +# New repositories should be added in their own functions, and called here +# args: +# 1) wait_time - nameref, integer; Time to wait before retrying command +# 2) retries - integer, optional; Amount of times to retry command, defaults to 5 +configure_rpm_repos() { + log "starting" + + configure_repo_mariner_extended "$1" "${2:-1}" +} + +# dnf_install_pkgs +# args: +# 1) pkgs - nameref, string array; Packages to be installed +# 2) wait_time - nameref, integer; Time to wait before retrying command +# 3) retries - integer, optional; Amount of times to retry command, defaults to 5 +dnf_install_pkgs() { + local -n pkgs="$1" + log "starting" + + local -a cmd=( + dnf + -y + install + ) + + # Reference: https://www.shellcheck.net/wiki/SC2206 + # append pkgs array to cmd + mapfile -O $(( ${#cmd[@]} + 1 )) -d ' ' cmd <<< "${pkgs[@]}" + local -r cmd + + log "Attempting to install packages: ${pkgs[*]}" + retry cmd "$2" "${3:-}" +} + + +# dnf_update_pkgs +# args: +# 1) excludes - nameref, string array, optional; Packages to exclude from updating +# Each index must be prefixed with -x +# 2) wait_time - nameref, integer; Time to wait before retrying command +# 3) retries - integer, optional; Ammount of times to retry command, defaults to 5 +dnf_update_pkgs() { + local -n excludes="${1:-empty_str}" + log "starting" + + local -a cmd=( + dnf + -y + # Replaced with excludes + "" + update + --allowerasing + ) + + if [ -n "${excludes}" ]; then + # Reference https://www.shellcheck.net/wiki/SC2206 + mapfile -O 2 cmd <<< "${excludes[@]}" + else + # Remove empty string if we aren't replacing them, probably doesn't matter, but why not be safe + unset "cmd[2]" + fi + local -r cmd + + log "Updating all packages excluding \"${excludes[*]:-}\"" + retry cmd "$2" "${3:-}" +} + +# configure_dnf_cron_job +# create cron job to auto update rpm packages +configure_dnf_cron_job() { + log "starting" + local -r cron_weekly_dnf_update_filename='/etc/cron.weekly/dnfupdate' + local -r cron_weekly_dnf_update_file="#!/bin/bash +dnf update -y" + + write_file cron_weekly_dnf_update_filename cron_weekly_dnf_update_file true + chmod u+x "$cron_weekly_dnf_update_filename" +} + +# rpm_import_keys +# args: +# 1) keys - nameref, string array; rpm keys to be imported +# 2) wait_time - nameref, integer; Time to wait before retrying command +rpm_import_keys() { + local -n keys="$1" + log "starting" + + # shellcheck disable=SC2068 + for key in ${keys[@]}; do + if [ ${#keys[@]} -eq 0 ]; then + break + fi + + local -a cmd=( + rpm + --import + -v + "$key" + ) + + log "Importing rpm repository key $key" + retry cmd "$2" "${3:-}" && unset key + done +} diff --git a/pkg/deploy/generator/scripts/util-services.sh b/pkg/deploy/generator/scripts/util-services.sh new file mode 100644 index 00000000000..d4fc2c436c0 --- /dev/null +++ b/pkg/deploy/generator/scripts/util-services.sh @@ -0,0 +1,673 @@ +#!/bin/bash +# ARO service setup functions + +# enable_services enables the systemd services that are passed in +# args: +# 1) services - array; services to be enabled +enable_services() { + local -n svcs="$1" + log "starting" + + systemctl daemon-reload + + log "enabling services ${svcs[*]}" + # shellcheck disable=SC2068 + for svc in ${svcs[@]}; do + log "Enabling and starting $svc now" + systemctl enable \ + --now \ + "$svc" + done +} + +# configure_service_aro_gateway +# args: +# 1) image - nameref, string; container image +# 2) role - nameref, string; VMSS role +# 3) conf_file - nameref, string; aro gateway environment file +# 4) network - nameref, string; podman network name to be attached +configure_service_aro_gateway() { + local -n image="$1" + local -n role="$2" + local -n conf_file="$3" + local -n network="$4" + log "starting" + log "Configuring aro-gateway service" + + local -r aro_gateway_conf_filename='/etc/sysconfig/aro-gateway' + + write_file aro_gateway_conf_filename conf_file true + + # shellcheck disable=SC2034 + local -r aro_gateway_service_filename='/etc/systemd/system/aro-gateway.service' + + # shellcheck disable=SC2034 + local -r aro_gateway_service_file="[Unit] +After=network-online.target +Wants=network-online.target + +[Service] +EnvironmentFile=${aro_gateway_conf_filename} +ExecStartPre=-/usr/bin/podman rm -f %N +ExecStart=/usr/bin/podman run \ + --hostname %H \ + --name %N \ + --rm \ + --cap-drop net_raw \ + -e ACR_RESOURCE_ID \ + -e DATABASE_ACCOUNT_NAME \ + -e GATEWAY_DOMAINS \ + -e GATEWAY_FEATURES \ + -e MDM_ACCOUNT \ + -e MDM_NAMESPACE \ + -m 2g \ + --network=$network \ + -p 80:8080 \ + -p 8081:8081 \ + -p 443:8443 \ + -v /run/systemd/journal:/run/systemd/journal \ + -v /var/etw:/var/etw:z \ + $image \ + ${role,,} +ExecStop=/usr/bin/podman stop -t 3600 %N +TimeoutStopSec=3600 +Restart=always +RestartSec=1 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target + " + + write_file aro_gateway_service_filename aro_gateway_service_file true +} + +# configure_service_aro_rp +# args: +# 1) image - nameref, string; RP container image +# 2) role - nameref, string; VMSS role +# 3) conf_file - nameref, string; aro rp environment file +# 4) network - nameref, string; podman network name to be attached +configure_service_aro_rp() { + local -n image="$1" + local -n role="$2" + local -n conf_file="$3" + local -n network="$4" + log "starting" + log "Configuring aro-rp service" + + local -r aro_rp_conf_filename='/etc/sysconfig/aro-rp' + + write_file aro_rp_conf_filename conf_file true + + # shellcheck disable=SC2034 + local -r aro_rp_service_filename='/etc/systemd/system/aro-rp.service' + # shellcheck disable=SC2034 + local -r aro_rp_service_file="[Unit] +After=network-online.target +Wants=network-online.target + +[Service] +EnvironmentFile=${aro_rp_conf_filename} +ExecStartPre=-/usr/bin/podman rm -f %N +ExecStart=/usr/bin/podman run \ + --hostname %H \ + --name %N \ + --rm \ + --cap-drop net_raw \ + -e ACR_RESOURCE_ID \ + -e ADMIN_API_CLIENT_CERT_COMMON_NAME \ + -e ARM_API_CLIENT_CERT_COMMON_NAME \ + -e AZURE_ARM_CLIENT_ID \ + -e AZURE_FP_CLIENT_ID \ + -e CLUSTER_MDM_ACCOUNT \ + -e CLUSTER_MDM_NAMESPACE \ + -e CLUSTER_MDSD_ACCOUNT \ + -e CLUSTER_MDSD_CONFIG_VERSION \ + -e CLUSTER_MDSD_NAMESPACE \ + -e DATABASE_ACCOUNT_NAME \ + -e DOMAIN_NAME \ + -e GATEWAY_DOMAINS \ + -e GATEWAY_RESOURCEGROUP \ + -e KEYVAULT_PREFIX \ + -e MDM_ACCOUNT \ + -e MDM_NAMESPACE \ + -e MDSD_ENVIRONMENT \ + -e RP_FEATURES \ + -e ARO_INSTALL_VIA_HIVE \ + -e ARO_HIVE_DEFAULT_INSTALLER_PULLSPEC \ + -e ARO_ADOPT_BY_HIVE \ + -e OIDC_AFD_ENDPOINT \ + -e OIDC_STORAGE_ACCOUNT_NAME \ + -m 2g \ + --network=$network \ + -p 443:8443 \ + -v /etc/aro-rp:/etc/aro-rp \ + -v /run/systemd/journal:/run/systemd/journal \ + -v /var/etw:/var/etw:z \ + $image \ + ${role,,} +ExecStop=/usr/bin/podman stop -t 3600 %N +TimeoutStopSec=3600 +Restart=always +RestartSec=1 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target" + + write_file aro_rp_service_filename aro_rp_service_file true +} + +# configure_service_aro_monitor +# args: +# 1) image - nameref, string; RP container image +# 2) network - nameref, string; podman network name to be attached +configure_service_aro_monitor() { + local -n image="$1" + local -n network="$2" + log "starting" + log "Configuring aro-monitor service" + + # DOMAIN_NAME, CLUSTER_MDSD_ACCOUNT, CLUSTER_MDSD_CONFIG_VERSION, GATEWAY_DOMAINS, GATEWAY_RESOURCEGROUP, MDSD_ENVIRONMENT CLUSTER_MDSD_NAMESPACE + # are not used, but can't easily be refactored out. Should be revisited in the future. + # shellcheck disable=SC2034 + local -r aro_monitor_service_conf_filename='/etc/sysconfig/aro-monitor' + # shellcheck disable=SC2034 + local -r aro_monitor_service_conf_file="AZURE_FP_CLIENT_ID='$FPCLIENTID' +DOMAIN_NAME='$LOCATION.$CLUSTERPARENTDOMAINNAME' +CLUSTER_MDSD_ACCOUNT='$CLUSTERMDSDACCOUNT' +CLUSTER_MDSD_CONFIG_VERSION='$CLUSTERMDSDCONFIGVERSION' +GATEWAY_DOMAINS='$GATEWAYDOMAINS' +GATEWAY_RESOURCEGROUP='$GATEWAYRESOURCEGROUPNAME' +MDSD_ENVIRONMENT='$MDSDENVIRONMENT' +CLUSTER_MDSD_NAMESPACE='$CLUSTERMDSDNAMESPACE' +CLUSTER_MDM_ACCOUNT='$CLUSTERMDMACCOUNT' +CLUSTER_MDM_NAMESPACE=BBM +DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME' +KEYVAULT_PREFIX='$KEYVAULTPREFIX' +MDM_ACCOUNT='$RPMDMACCOUNT' +MDM_NAMESPACE=BBM +RPIMAGE='$image'" + + write_file aro_monitor_service_conf_filename aro_monitor_service_conf_file true + + # shellcheck disable=SC2034 + local -r aro_monitor_service_filename='/etc/systemd/system/aro-monitor.service' + # shellcheck disable=SC2034 + local -r aro_monitor_service_file="[Unit] +After=network-online.target +Wants=network-online.target + +[Service] +EnvironmentFile=/etc/sysconfig/aro-monitor +ExecStartPre=-/usr/bin/podman rm -f %N +ExecStart=/usr/bin/podman run \ + --hostname %H \ + --name %N \ + --rm \ + --cap-drop net_raw \ + --network=$network \ + -e AZURE_FP_CLIENT_ID \ + -e DOMAIN_NAME \ + -e CLUSTER_MDSD_ACCOUNT \ + -e CLUSTER_MDSD_CONFIG_VERSION \ + -e GATEWAY_DOMAINS \ + -e GATEWAY_RESOURCEGROUP \ + -e MDSD_ENVIRONMENT \ + -e CLUSTER_MDSD_NAMESPACE \ + -e CLUSTER_MDM_ACCOUNT \ + -e CLUSTER_MDM_NAMESPACE \ + -e DATABASE_ACCOUNT_NAME \ + -e KEYVAULT_PREFIX \ + -e MDM_ACCOUNT \ + -e MDM_NAMESPACE \ + -m 2.5g \ + -v /run/systemd/journal:/run/systemd/journal \ + -v /var/etw:/var/etw:z \ + $image \ + monitor +Restart=always +RestartSec=1 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target" + + write_file aro_monitor_service_filename aro_monitor_service_file true +} + +# configure_service_aro_portal +# args: +# 1) image - nameref, string; RP container image +# 2) network - nameref, string; podman network name to be attached +configure_service_aro_portal() { + local -n image="$1" + local -n network="$2" + log "starting" + log "Configuring aro portal service" + + # shellcheck disable=SC2034 + local -r aro_portal_service_conf_filename='/etc/sysconfig/aro-portal' + # shellcheck disable=SC2034 + local -r aro_portal_service_conf_file="AZURE_PORTAL_ACCESS_GROUP_IDS='$PORTALACCESSGROUPIDS' +AZURE_PORTAL_CLIENT_ID='$PORTALCLIENTID' +AZURE_PORTAL_ELEVATED_GROUP_IDS='$PORTALELEVATEDGROUPIDS' +DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME' +KEYVAULT_PREFIX='$KEYVAULTPREFIX' +MDM_ACCOUNT='$RPMDMACCOUNT' +MDM_NAMESPACE=Portal +PORTAL_HOSTNAME='$LOCATION.admin.$RPPARENTDOMAINNAME' +RPIMAGE='$image'" + + write_file aro_portal_service_conf_filename aro_portal_service_conf_file true + + # shellcheck disable=SC2034 + local -r aro_portal_service_filename='/etc/systemd/system/aro-portal.service' + # shellcheck disable=SC2034 + local -r aro_portal_service_file="[Unit] +After=network-online.target +Wants=network-online.target +StartLimitInterval=0 + +[Service] +EnvironmentFile=/etc/sysconfig/aro-portal +ExecStartPre=-/usr/bin/podman rm -f %N +ExecStart=/usr/bin/podman run \ + --hostname %H \ + --name %N \ + --rm \ + --cap-drop net_raw \ + --network=$network \ + -e AZURE_PORTAL_ACCESS_GROUP_IDS \ + -e AZURE_PORTAL_CLIENT_ID \ + -e AZURE_PORTAL_ELEVATED_GROUP_IDS \ + -e DATABASE_ACCOUNT_NAME \ + -e KEYVAULT_PREFIX \ + -e MDM_ACCOUNT \ + -e MDM_NAMESPACE \ + -e PORTAL_HOSTNAME \ + -m 2g \ + -p 444:8444 \ + -p 2222:2222 \ + -v /run/systemd/journal:/run/systemd/journal \ + -v /var/etw:/var/etw:z \ + $image \ + portal +Restart=always +RestartSec=1 + +[Install] +WantedBy=multi-user.target" + + write_file aro_portal_service_filename aro_portal_service_file true +} + +# configure_service_mdsd +# args: +# 1) monitoring_role - nameref, string; can be "gateway" or "rp" +# 2) monitor_config_version - nameref, string; mdsd config version +configure_service_mdsd() { + local -n role="$1" + local -n monitor_config_version="$2" + log "starting" + log "configuring mdsd service" + + verify_role role + + local -r mdsd_service_dir="/etc/systemd/system/mdsd.service.d" + mkdir -p "$mdsd_service_dir" + + # shellcheck disable=SC2034 + local -r mdsd_override_conf_filename="$mdsd_service_dir/override.conf" + local -r mdsd_certificate_san="$(openssl x509 -in /var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem -noout -subject | sed -e 's/.*CN = //')" + # shellcheck disable=SC2034 + local -r mdsd_override_conf_file="[Unit] +After=network-online.target" + + write_file mdsd_override_conf_filename mdsd_override_conf_file true + + # shellcheck disable=SC2034 + local -r default_mdsd_filename="/etc/default/mdsd" + # shellcheck disable=SC2034 + local -r default_mdsd_file="MDSD_ROLE_PREFIX=/var/run/mdsd/default +MDSD_OPTIONS=\"-A -d -r \$MDSD_ROLE_PREFIX\" + +export MONITORING_GCS_ENVIRONMENT='$MDSDENVIRONMENT' +export MONITORING_GCS_ACCOUNT='$RPMDSDACCOUNT' +export MONITORING_GCS_REGION='$LOCATION' +export MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault +export MONITORING_GCS_AUTH_ID='$mdsd_certificate_san' +export MONITORING_GCS_NAMESPACE='$RPMDSDNAMESPACE' +export MONITORING_CONFIG_VERSION='$monitor_config_version' +export MONITORING_USE_GENEVA_CONFIG_SERVICE=true + +export MONITORING_TENANT='$LOCATION' +export MONITORING_ROLE='$role' +export MONITORING_ROLE_INSTANCE=\"$(hostname)\" + +export MDSD_MSGPACK_SORT_COLUMNS=\"1\"" + + write_file default_mdsd_filename default_mdsd_file true +} + +# configure_service_fluentbit +# args: +# 1) conf_file - string; fluenbit configuration file +# 2) image - string; fluentbit container image to run +# 3) network - nameref, string; podman network name to be attached +configure_service_fluentbit() { + # shellcheck disable=SC2034 + local -n conf_file="$1" + local -n image="$2" + local -n network="$3" + log "starting" + log "Configuring fluentbit service" + + mkdir -p /etc/fluentbit/ + mkdir -p /var/lib/fluent + + # shellcheck disable=SC2034 + local -r conf_filename='/etc/fluentbit/fluentbit.conf' + write_file conf_filename conf_file true + + # shellcheck disable=SC2034 + local -r sysconfig_filename='/etc/sysconfig/fluentbit' + # shellcheck disable=SC2034 + local -r sysconfig_file="FLUENTBITIMAGE=$image" + + write_file sysconfig_filename sysconfig_file true + + # shellcheck disable=SC2034 + local -r service_filename='/etc/systemd/system/fluentbit.service' + # shellcheck disable=SC2034 + local -r service_file="[Unit] +After=network-online.target +Wants=network-online.target +StartLimitIntervalSec=0 + +[Service] +RestartSec=1s +EnvironmentFile=/etc/sysconfig/fluentbit +ExecStartPre=-/usr/bin/podman rm -f %N +ExecStart=/usr/bin/podman run \ + --security-opt label=disable \ + --entrypoint /opt/td-agent-bit/bin/td-agent-bit \ + --net=host \ + --hostname %H \ + --name %N \ + --rm \ + --cap-drop net_raw \ + -v /etc/fluentbit/fluentbit.conf:/etc/fluentbit/fluentbit.conf \ + -v /var/lib/fluent:/var/lib/fluent:z \ + -v /var/log/journal:/var/log/journal:ro \ + -v /etc/machine-id:/etc/machine-id:ro \ + $image \ + -c /etc/fluentbit/fluentbit.conf + +ExecStop=/usr/bin/podman stop %N +Restart=always +RestartSec=5 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target" + + write_file service_filename service_file true +} + +# configure_timers_mdm_mdsd +# args: +# 1) role - string; can be "gateway" or "rp" +configure_timers_mdm_mdsd() { + local -n role="$1" + log "starting" + + verify_role role + + local keyvault_suffix secret_prefix + get_keyvault_suffix role keyvault_suffix secret_prefix + + for var in "mdsd" "mdm"; do + # shellcheck disable=SC2034 + local download_creds_service_filename="/etc/systemd/system/download-$var-credentials.service" + # shellcheck disable=SC2034 + local download_creds_service_file="[Unit] +Description=Periodic $var credentials refresh + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/download-credentials.sh $var" + + write_file download_creds_service_filename download_creds_service_file true + + # shellcheck disable=SC2034 + local download_creds_timer_filename="/etc/systemd/system/download-$var-credentials.timer" + # shellcheck disable=SC2034 + local download_creds_timer_file="[Unit] +Description=Periodic $var credentials refresh +After=network-online.target +Wants=network-online.target + +[Timer] +OnBootSec=0min +OnCalendar=0/12:00:00 +AccuracySec=5s + +[Install] +WantedBy=timers.target" + + write_file download_creds_timer_filename download_creds_timer_file true + done + + local -r download_creds_script_filename="/usr/local/bin/download-credentials.sh" + # shellcheck disable=SC2034 + local -r download_creds_script_file="#!/bin/bash +set -eu + +COMPONENT=\$1 +echo \"Download \$COMPONENT credentials\" + +TEMP_DIR=\"\$(mktemp -d)\" +export AZURE_CONFIG_DIR=\"\$(mktemp -d)\" + +echo \"Logging into Azure...\" +RETRIES=3 +while [[ \$RETRIES -gt 0 ]]; do + if az login -i --allow-no-subscriptions + then + echo \"az login successful\" + break + else + echo \"az login failed. Retrying...\" + let RETRIES-=1 + sleep 5 + fi +done + +trap \"cleanup\" EXIT + +cleanup() { + az logout + [[ \$TEMP_DIR =~ /tmp/.+ ]] && rm -rf \$TEMP_DIR + [[ \$AZURE_CONFIG_DIR =~ /tmp/.+ ]] && rm -rf \$AZURE_CONFIG_DIR +} + +if [[ \$COMPONENT = \"mdm\" ]]; then + CURRENT_CERT_FILE=\"/etc/mdm.pem\" +elif [[ \$COMPONENT = \"mdsd\" ]]; then + CURRENT_CERT_FILE=\"/var/lib/waagent/Microsoft.Azure.KeyVault.Store/mdsd.pem\" +else + echo Invalid usage && exit 1 +fi + +SECRET_NAME=\"$secret_prefix-\${COMPONENT}\" +NEW_CERT_FILE=\"\$TEMP_DIR/\$COMPONENT.pem\" +for attempt in {1..5}; do + az keyvault \ + secret \ + download \ + --file \"\$NEW_CERT_FILE\" \ + --id \"https://$KEYVAULTPREFIX-$keyvault_suffix.$KEYVAULTDNSSUFFIX/secrets/\$SECRET_NAME\" \ + && break + if [[ \$attempt -lt 5 ]]; then sleep 10; else exit 1; fi +done + +if [ -f \$NEW_CERT_FILE ]; then + if [[ \$COMPONENT = \"mdsd\" ]]; then + chown syslog:syslog \$NEW_CERT_FILE + else + sed -i -ne '1,/END CERTIFICATE/ p' \$NEW_CERT_FILE + fi + + new_cert_sn=\"\$(openssl x509 -in \"\$NEW_CERT_FILE\" -noout -serial | awk -F= '{print \$2}')\" + current_cert_sn=\"\$(openssl x509 -in \"\$CURRENT_CERT_FILE\" -noout -serial | awk -F= '{print \$2}')\" + if [[ ! -z \$new_cert_sn ]] && [[ \$new_cert_sn != \"\$current_cert_sn\" ]]; then + echo updating certificate for \$COMPONENT + chmod 0600 \$NEW_CERT_FILE + mv \$NEW_CERT_FILE \$CURRENT_CERT_FILE + fi +else + echo Failed to refresh certificate for \$COMPONENT && exit 1 +fi" + + write_file download_creds_script_filename download_creds_script_file true + + chmod u+x /usr/local/bin/download-credentials.sh + + $download_creds_script_filename mdsd & + wait "$!" + + + $download_creds_script_filename mdm & + wait "$!" + + # shellcheck disable=SC2034 + local -r watch_mdm_creds_service_filename="/etc/systemd/system/watch-mdm-credentials.service" + # shellcheck disable=SC2034 + local -r watch_mdm_creds_service_file="[Unit] +Description=Watch for changes in mdm.pem and restarts the mdm service + +[Service] +Type=oneshot +ExecStart=/usr/bin/systemctl restart mdm.service + +[Install] +WantedBy=multi-user.target" + + write_file watch_mdm_creds_service_filename watch_mdm_creds_service_file true + + # shellcheck disable=SC2034 + local -r watch_mdm_creds_path_filename='/usr/lib/systemd/system/watch-mdm-credentials.path' + # shellcheck disable=SC2034 + local -r watch_mdm_creds_path_file='[Path] +PathModified=/etc/mdm.pem + +[Install] +WantedBy=multi-user.target' + + write_file watch_mdm_creds_path_filename watch_mdm_creds_path_file true + + local -r watch_mdm_creds='watch-mdm-credentials.path' + systemctl enable --now "$watch_mdm_creds" || abort "failed to enable and start $watch_mdm_creds" +} + +# configure_service_mdm +# args: +# 1) role - nameref, string; can be "gateway" or "rp" +# 2) image - nameref, string; mdm container image to run +# 3) network - nameref, string; podman network name to be attached +configure_service_mdm() { + local -n role="$1" + local -n image="$2" + local -n network="$3" + log "starting" + log "Configuring mdm service" + + verify_role role + + # shellcheck disable=SC2034 + local -r sysconfig_mdm_filename="/etc/sysconfig/mdm" + # shellcheck disable=SC2034 + local -r sysconfig_mdm_file="MDMFRONTENDURL='$MDMFRONTENDURL' +MDMIMAGE='$image' +MDMSOURCEENVIRONMENT='$LOCATION' +MDMSOURCEROLE='$role' +MDMSOURCEROLEINSTANCE=\"$(hostname)\"" + + write_file sysconfig_mdm_filename sysconfig_mdm_file true + + mkdir -p /var/etw + # shellcheck disable=SC2034 + local -r mdm_service_filename="/etc/systemd/system/mdm.service" + # shellcheck disable=SC2034 + local -r mdm_service_file="[Unit] +After=network-online.target +Wants=network-online.target + +[Service] +EnvironmentFile=/etc/sysconfig/mdm +ExecStartPre=-/usr/bin/podman rm -f %N +ExecStart=/usr/bin/podman run \ + --entrypoint /usr/sbin/MetricsExtension \ + --hostname %H \ + --name %N \ + --rm \ + --cap-drop net_raw \ + --network=$network \ + -m 2g \ + -v /etc/mdm.pem:/etc/mdm.pem \ + -v /var/etw:/var/etw:z \ + $image \ + -CertFile /etc/mdm.pem \ + -FrontEndUrl $MDMFRONTENDURL \ + -Logger Console \ + -LogLevel Warning \ + -PrivateKeyFile /etc/mdm.pem \ + -SourceEnvironment $LOCATION \ + -SourceRole $role \ + -SourceRoleInstance $HOSTNAME +ExecStop=/usr/bin/podman stop %N +Restart=always +RestartSec=1 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target" + + write_file mdm_service_filename mdm_service_file true +} + +# configure_vmss_aro_service +# args: +# 1) r - nameref, string; role of VMSS +# 2) images - nameref, associative array; ARO container images +# 3) configs - nameref, associative array; configuration files and versions. The values should be a reference to variables, not dereferenced. +# This is because the value is used when creating nameref variables by helper functions. +configure_vmss_aro_services() { + local -n r="$1" + local -n images="$2" + local -n configs="$3" + log "starting" + verify_role "$1" + + if [ "$r" == "$role_gateway" ]; then + configure_service_aro_gateway "${images["rp"]}" "$1" "${configs["gateway_config"]}" "${configs["network"]}" + elif [ "$r" == "$role_rp" ]; then + configure_service_aro_rp "${images["rp"]}" "$1" "${configs["rp_config"]}" "${configs["network"]}" + configure_service_aro_monitor "${images["rp"]}" "${configs["network"]}" + configure_service_aro_portal "${images["rp"]}" "${configs["network"]}" + fi + + configure_service_fluentbit "${configs["fluentbit"]}" "${images["fluentbit"]}" "${configs["network"]}" + configure_timers_mdm_mdsd "$1" + configure_service_mdm "$1" "${images["mdm"]}" "${configs["network"]}" + configure_service_mdsd "$1" "${configs["mdsd"]}" + run_azsecd_config_scan +} + +util_common="util-common.sh" +if [ -f "$util_common" ]; then + # shellcheck source=util-common.sh + source "$util_common" +fi diff --git a/pkg/deploy/generator/scripts/util-system.sh b/pkg/deploy/generator/scripts/util-system.sh new file mode 100644 index 00000000000..551bdefdf00 --- /dev/null +++ b/pkg/deploy/generator/scripts/util-system.sh @@ -0,0 +1,300 @@ +#!/bin/bash +# This file is intended to be sourced by bootstrapping scripts for commonly used functions + +# configure_sshd +# We need to configure PasswordAuthentication to yes in order for the VMSS Access JIT to work +configure_sshd() { + log "starting" + local -r sshd_config="/etc/ssh/sshd_config" + + log "Editing $sshd_config to allow password authentication" + sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/g' "$sshd_config" + + systemctl reload sshd.service || abort "sshd failed to reload" +} + +# configure_logrotate clobbers /etc/logrotate.conf +# args: +# 1) dropin_files - nameref, associative array, optional; logrotate files to write to /etc/logrotate.d +# Key name dictates filenames written to /etc/logrotate.d. +# Example: +# Key dictates the filename written in /etc/logrotate.d +# shellcheck disable=SC2034 +# local -rA logrotate_dropins=( +# ["gateway"]="$gateway_log_file" +# ) +configure_logrotate() { + local -n dropin_files="${1:-empty_str}" + log "starting" + + # shellcheck disable=SC2034 + local -r logrotate_conf_filename='/etc/logrotate.conf' + # shellcheck disable=SC2034 + local -r logrotate_conf_file='# see "man logrotate" for details +# rotate log files weekly +weekly + +# keep 2 weeks worth of backlogs +rotate 2 + +# create new (empty) log files after rotating old ones +create + +# use date as a suffix of the rotated file +dateext + +# uncomment this if you want your log files compressed +compress + +# RPM packages drop log rotation information into this directory +include /etc/logrotate.d + +# no packages own wtmp and btmp -- we will rotate them here +/var/log/wtmp { + monthly + create 0664 root utmp + minsize 1M + rotate 1 +} + +/var/log/btmp { + missingok + monthly + create 0600 root utmp + rotate 1 +}' + + write_file logrotate_conf_filename logrotate_conf_file true + + if [ -n "${dropin_files[*]}" ]; then + local -r logrotate_d="/etc/logrotate.d" + log "Writing logrotate files to $logrotate_d" + for dropin_name in "${!dropin_files[@]}"; do + # shellcheck disable=SC2034 + local -r dropin_filename="$logrotate_d/$dropin_name" + # shellcheck disable=SC2034 + local -r dropin_file="${dropin_files["$dropin_name"]}" + write_file dropin_filename dropin_file true + done + fi +} + +# pull_container_images +# args: +# 1) pull_images - nameref, string array +# 2) registry_conf - nameref, string, optional; path to docker/podman configuration file +pull_container_images() { + local -n pull_images="$1" + local -n registry_conf="${2:-empty_str}" + log "starting" + + # shellcheck disable=SC2034 + local -ri retry_time=30 + # The managed identity that the VM runs as only has a single roleassignment. + # This role assignment is ACRPull which is not necessarily present in the + # subscription we're deploying into. If the identity does not have any + # role assignments scoped on the subscription we're deploying into, it will + # not show on az login -i, which is why the below line is commented. + # az account set -s "$SUBSCRIPTIONID" + cmd=( + az + login + -i + --allow-no-subscriptions + ) + + log "Running az login with retries" + retry cmd retry_time + + # Suppress emulation output for podman instead of docker for az acr compatability + mkdir -p /etc/containers/ + mkdir -p /root/.docker + touch /etc/containers/nodocker + + # This name is used in the case that az acr login searches for this in it's environment + export REGISTRY_AUTH_FILE="/root/.docker/config.json" + + if [ -n "${registry_conf}" ]; then + write_file REGISTRY_AUTH_FILE registry_conf true + fi + + log "logging into prod acr" + cmd=( + az + acr + login + --name + # TODO replace this with variable expansion + # Reference: https://www.shellcheck.net/wiki/SC2001 + "$(sed -e 's|.*/||' <<<"$ACRRESOURCEID")" + ) + + retry cmd retry_time + + # shellcheck disable=SC2068 + for i in ${pull_images[@]}; do + local -n image="$i" + cmd=( + podman + pull + "$image" + ) + + log "Pulling image $image with retries now" + retry cmd retry_time + done + + # shellcheck disable=SC2034 + cmd=( + az + logout + ) + + log "Running az logout with retries" + retry cmd retry_time +} + +# configure_certs +# args: +# 1) role - string; can be "devproxy" or "rp" +configure_certs() { + local -n role="$1" + log "starting" + log "Configuring certificates for $role" + + verify_role role true + + if [ "$role" == "devproxy" ]; then + local -r proxy_certs_basedir="/etc/proxy" + mkdir -p "$proxy_certs_basedir" + base64 -d <<<"$PROXYCERT" > "$proxy_certs_basedir/proxy.crt" + base64 -d <<<"$PROXYKEY" > "$proxy_certs_basedir/proxy.key" + base64 -d <<<"$PROXYCLIENTCERT" > "$proxy_certs_basedir/proxy-client.crt" + chown -R 1000:1000 /etc/proxy + chmod 0600 "$proxy_certs_basedir/proxy.key" + return 0 + fi + + if [ "$role" == "rp" ]; then + local -r rp_certs_basedir="/etc/aro-rp" + mkdir -p "$rp_certs_basedir" + base64 -d <<<"$ADMINAPICABUNDLE" > "$rp_certs_basedir/admin-ca-bundle.pem" + if [[ -n "$ARMAPICABUNDLE" ]]; then + base64 -d <<<"$ARMAPICABUNDLE" > "$rp_certs_basedir/arm-ca-bundle.pem" + fi + chown -R 1000:1000 "$rp_certs_basedir" + fi + + # setting MONITORING_GCS_AUTH_ID_TYPE=AuthKeyVault seems to have caused mdsd not + # to honour SSL_CERT_FILE any more, heaven only knows why. + local -r ssl_certs_basedir="/usr/lib/ssl/certs" + mkdir -p "$ssl_certs_basedir" + csplit -f "$ssl_certs_basedir/cert-" -b %03d.pem /etc/pki/tls/certs/ca-bundle.crt /^$/1 "{*}" 1>/dev/null + c_rehash "$ssl_certs_basedir" + + # we leave clientId blank as long as only 1 managed identity assigned to vmss + # if we have more than 1, we will need to populate with clientId used for off-node scanning + # shellcheck disable=SC2034 + local -r nodescan_agent_filename="/etc/default/vsa-nodescan-agent.config" + # shellcheck disable=SC2034 + local -r nodescan_agent_file="{ + \"Nice\": 19, + \"Timeout\": 10800, + \"ClientId\": \"\", + \"TenantId\": $AZURESECPACKVSATENANTID, + \"QualysStoreBaseUrl\": $AZURESECPACKQUALYSURL, + \"ProcessTimeout\": 300, + \"CommandDelay\": 0 + }" + + write_file nodescan_agent_filename nodescan_agent_file true +} + +# run_azsecd_config_scan +run_azsecd_config_scan() { + log "starting" + + local -ar configs=( + "baseline" + "clamav" + "software" + ) + + log "Scanning configuration files with azsecd ${configs[*]}" + # shellcheck disable=SC2068 + for scan in ${configs[@]}; do + log "Scanning config file $scan now" + /usr/local/bin/azsecd config -s "$scan" -d P1D + done +} + +# create_required_dirs +create_required_dirs() { + create_dirs=( + /var/log/journal + /var/lib/waagent/Microsoft.Azure.KeyVault.Store + # Does not exist on devProxyVMSS + /var/opt/microsoft/linuxmonagent + ) + + # shellcheck disable=SC2068 + for d in ${create_dirs[@]}; do + log "Creating directory $d" + mkdir -p "$d" || abort "failed to create directory $d" + done +} + +# create_podman_networks() +# args: +# 1) nets - nameref, associative array; Networks to be created +# Key is the network name, value is the subnet with cidr notation +create_podman_networks() { + local -n nets="$1" + log "starting" + + # shellcheck disable=SC2068 + for n in ${!nets[@]}; do + log "Creating podman network \"$n\" with subnet \"${nets[$n]}\"" + podman network \ + create \ + --subnet "${nets["$n"]}" \ + "$n" + done +} + +# firewalld_configure_backend +firewalld_configure_backend() { + log "starting" + + log "Changing firewalld backend to iptables" + conf_file="/etc/firewalld/firewalld.conf" + sed -i 's/FirewallBackend=nftables/FirewallBackend=iptables/g' "$conf_file" +} + +# firewalld_configure +# args: +# 1) ports - nameref, string array; ports to be enabled. +# Ports must be postfixed with /tcp or /udp +firewalld_configure() { + local -n ports="$1" + log "starting" + + firewalld_configure_backend + + # shellcheck disable=SC2034 + local -ra service=( + "firewalld" + ) + enable_services service + + log "Enabling ports ${ports[*]} on default firewalld zone" + # shellcheck disable=SC2068 + for port in ${ports[@]}; do + log "Enabling port $port now" + firewall-cmd "--add-port=$port" \ + --permanent + done + + log "Writing runtime config to permanent config" + firewall-cmd --runtime-to-permanent +} diff --git a/pkg/deploy/generator/scripts/util.sh b/pkg/deploy/generator/scripts/util.sh new file mode 100644 index 00000000000..ca1f36304ea --- /dev/null +++ b/pkg/deploy/generator/scripts/util.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# File to be sourced by *VMSS.sh scripts +# This is only present for the ability to manaully run the VMSS setup scripts seperate from the deploy process. +# e. g. scp copying the script to a test VM +# During normal deployment operations, the other util-*.sh files are prefixed to the VMSS scripts + +if [ "${DEBUG:-false}" == true ]; then + set -x +fi + +util_common="util-common.sh" +if [ -f "$util_common" ]; then + # shellcheck source=util-common.sh + source "$util_common" +fi + +util_system="util-system.sh" +if [ -f "$util_system" ]; then + # shellcheck source=util-system.sh + source "$util_system" +fi + +util_services="util-services.sh" +if [ -f "$util_services" ]; then + # shellcheck source=util-services.sh + source "$util_services" +fi + +util_pkgs="util-packages.sh" +if [ -f "$util_pkgs" ]; then + # shellcheck source=util-packages.sh + source "$util_pkgs" +fi