chore: 迁出 fnOS K8s 协作文档

This commit is contained in:
sunlei 2026-05-16 16:53:47 +08:00
parent a30b6667b2
commit 3364465267
3 changed files with 0 additions and 441 deletions

View File

@ -1,254 +0,0 @@
#!/usr/bin/env bash
set -Eeuo pipefail
KT_K8S_ROOT="${KT_K8S_ROOT:-/vol1/docker/kt-k8s}"
CLUSTER_NAME="${CLUSTER_NAME:-kt-nas}"
K8S_NAMESPACE="${K8S_NAMESPACE:-kt-prod}"
REGISTRY_NAME="${REGISTRY_NAME:-kt-registry.localhost}"
REGISTRY_PORT="${REGISTRY_PORT:-5000}"
API_HOST_PORT="${API_HOST_PORT:-48085}"
API_NODE_PORT="${API_NODE_PORT:-30085}"
AGENT_CONTAINER="${AGENT_CONTAINER:-kt-node-agent}"
AGENT_KUBECONFIG="${AGENT_KUBECONFIG:-/home/jenkins/agent/kubeconfig/${CLUSTER_NAME}.jenkins.yaml}"
API_ENV_FILE_ON_AGENT="${API_ENV_FILE_ON_AGENT:-/home/jenkins/agent/env/kt-template-online-api/.env.production}"
API_ENV_SECRET="${API_ENV_SECRET:-kt-template-online-api-env}"
PAUSE_IMAGE="${PAUSE_IMAGE:-rancher/mirrored-pause:3.6}"
OLD_API_CONTAINER="${OLD_API_CONTAINER:-kt-template-online-api}"
STOP_OLD_API_CONTAINER="${STOP_OLD_API_CONTAINER:-false}"
REGISTRY_CONTAINER="k3d-${REGISTRY_NAME}"
K3D_NETWORK="k3d-${CLUSTER_NAME}"
HOST_KUBECONFIG="${KT_K8S_ROOT}/kubeconfig/${CLUSTER_NAME}.host.yaml"
JENKINS_KUBECONFIG="${KT_K8S_ROOT}/kubeconfig/${CLUSTER_NAME}.jenkins.yaml"
log() {
printf '\n[%s] %s\n' "$(date '+%F %T')" "$*"
}
warn() {
printf '\n[%s] WARN: %s\n' "$(date '+%F %T')" "$*" >&2
}
die() {
printf '\n[%s] ERROR: %s\n' "$(date '+%F %T')" "$*" >&2
exit 1
}
require_root() {
[ "$(id -u)" -eq 0 ] || die "Please run as root."
}
require_command() {
command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1"
}
cluster_exists() {
docker inspect "k3d-${CLUSTER_NAME}-serverlb" "k3d-${CLUSTER_NAME}-server-0" >/dev/null 2>&1
}
install_k3d() {
if command -v k3d >/dev/null 2>&1; then
log "k3d already installed: $(k3d version | head -n 1)"
return
fi
require_command curl
log "Installing k3d from official installer"
curl -fsSL https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash
k3d version
}
kubectl_arch() {
case "$(uname -m)" in
x86_64|amd64) echo amd64 ;;
aarch64|arm64) echo arm64 ;;
armv7l|armhf) echo arm ;;
*) die "Unsupported kubectl architecture: $(uname -m)" ;;
esac
}
install_kubectl() {
if command -v kubectl >/dev/null 2>&1; then
log "kubectl already installed: $(kubectl version --client=true --short 2>/dev/null || kubectl version --client=true)"
return
fi
require_command curl
local version arch
version="$(curl -fsSL https://dl.k8s.io/release/stable.txt)"
arch="$(kubectl_arch)"
log "Installing kubectl ${version} for linux/${arch}"
curl -fsSL -o /usr/local/bin/kubectl "https://dl.k8s.io/release/${version}/bin/linux/${arch}/kubectl"
chmod +x /usr/local/bin/kubectl
kubectl version --client=true
}
prepare_dirs() {
log "Preparing ${KT_K8S_ROOT}"
mkdir -p \
"${KT_K8S_ROOT}/registry" \
"${KT_K8S_ROOT}/kubeconfig" \
"${KT_K8S_ROOT}/secrets" \
"${KT_K8S_ROOT}/manifests" \
"${KT_K8S_ROOT}/backups"
}
ensure_registry_host_entry() {
if ! grep -Eq "[[:space:]]${REGISTRY_CONTAINER}([[:space:]]|$)" /etc/hosts; then
log "Adding ${REGISTRY_CONTAINER} to /etc/hosts"
printf '127.0.0.1 %s\n' "$REGISTRY_CONTAINER" >> /etc/hosts
fi
}
ensure_registry() {
if docker inspect "$REGISTRY_CONTAINER" >/dev/null 2>&1; then
log "k3d registry already exists: ${REGISTRY_CONTAINER}"
return
fi
log "Creating local k3d registry: ${REGISTRY_CONTAINER}:${REGISTRY_PORT}"
k3d registry create "$REGISTRY_NAME" \
--port "$REGISTRY_PORT" \
-v "${KT_K8S_ROOT}/registry:/var/lib/registry"
}
assert_api_port_available_for_new_cluster() {
cluster_exists && return
local docker_owner
docker_owner="$(docker ps --format '{{.Names}} {{.Ports}}' | grep -E "(:|0\.0\.0\.0:|:::|127\.0\.0\.1:)${API_HOST_PORT}->" || true)"
if [ -z "$docker_owner" ] && command -v ss >/dev/null 2>&1; then
ss -ltn "( sport = :${API_HOST_PORT} )" | grep -q ":${API_HOST_PORT}" && docker_owner="non-docker-process"
fi
[ -z "$docker_owner" ] && return
if [ "$STOP_OLD_API_CONTAINER" = "true" ]; then
log "Host port ${API_HOST_PORT} is in use. Stopping old API container: ${OLD_API_CONTAINER}"
docker rm -f "$OLD_API_CONTAINER" >/dev/null 2>&1 || true
return
fi
die "Host port ${API_HOST_PORT} is in use: ${docker_owner}. Re-run with STOP_OLD_API_CONTAINER=true when you are ready to cut over."
}
ensure_cluster() {
if cluster_exists; then
log "k3d cluster already exists: ${CLUSTER_NAME}"
else
assert_api_port_available_for_new_cluster
log "Creating k3d cluster: ${CLUSTER_NAME}"
k3d cluster create "$CLUSTER_NAME" \
--servers 1 \
--agents 1 \
--registry-use "${REGISTRY_CONTAINER}:${REGISTRY_PORT}" \
-p "${API_HOST_PORT}:${API_NODE_PORT}@loadbalancer" \
--kubeconfig-update-default=false \
--kubeconfig-switch-context=false
fi
docker network connect "$K3D_NETWORK" "$REGISTRY_CONTAINER" >/dev/null 2>&1 || true
}
ensure_pause_image() {
log "Ensuring K3s sandbox image in cluster: ${PAUSE_IMAGE}"
if ! docker image inspect "$PAUSE_IMAGE" >/dev/null 2>&1; then
docker pull "$PAUSE_IMAGE"
fi
k3d image import "$PAUSE_IMAGE" -c "$CLUSTER_NAME"
}
export_kubeconfigs() {
log "Exporting kubeconfigs"
k3d kubeconfig get "$CLUSTER_NAME" > "$HOST_KUBECONFIG"
cp "$HOST_KUBECONFIG" "$JENKINS_KUBECONFIG"
# Jenkins Agent runs inside Docker, so it reaches the API server through the k3d Docker network.
kubectl config set-cluster "k3d-${CLUSTER_NAME}" \
--server="https://k3d-${CLUSTER_NAME}-serverlb:6443" \
--kubeconfig "$JENKINS_KUBECONFIG" >/dev/null
chmod 600 "$HOST_KUBECONFIG" "$JENKINS_KUBECONFIG"
kubectl --kubeconfig "$HOST_KUBECONFIG" get nodes
}
ensure_namespace() {
log "Ensuring namespace: ${K8S_NAMESPACE}"
kubectl --kubeconfig "$HOST_KUBECONFIG" create namespace "$K8S_NAMESPACE" \
--dry-run=client -o yaml | kubectl --kubeconfig "$HOST_KUBECONFIG" apply -f -
}
sync_agent_kubeconfig() {
if ! docker inspect "$AGENT_CONTAINER" >/dev/null 2>&1; then
warn "Jenkins Agent container not found: ${AGENT_CONTAINER}. Copy ${JENKINS_KUBECONFIG} into ${AGENT_KUBECONFIG} after Agent is created."
return
fi
if [ "$(docker inspect -f '{{.State.Running}}' "$AGENT_CONTAINER")" != "true" ]; then
warn "Jenkins Agent container exists but is not running: ${AGENT_CONTAINER}. Start it before syncing kubeconfig."
return
fi
log "Connecting Jenkins Agent to ${K3D_NETWORK}"
if ! docker inspect -f '{{json .NetworkSettings.Networks}}' "$AGENT_CONTAINER" | grep -q "\"${K3D_NETWORK}\""; then
docker network connect "$K3D_NETWORK" "$AGENT_CONTAINER"
fi
log "Copying kubeconfig into Jenkins Agent: ${AGENT_KUBECONFIG}"
docker exec "$AGENT_CONTAINER" sh -lc "mkdir -p '$(dirname "$AGENT_KUBECONFIG")'"
docker cp "$JENKINS_KUBECONFIG" "${AGENT_CONTAINER}:${AGENT_KUBECONFIG}"
docker exec "$AGENT_CONTAINER" sh -lc "chmod 600 '${AGENT_KUBECONFIG}' && kubectl --kubeconfig '${AGENT_KUBECONFIG}' get namespace '${K8S_NAMESPACE}'"
}
sync_api_secret_if_present() {
if ! docker inspect "$AGENT_CONTAINER" >/dev/null 2>&1; then
return
fi
if [ "$(docker inspect -f '{{.State.Running}}' "$AGENT_CONTAINER")" != "true" ]; then
return
fi
if ! docker exec "$AGENT_CONTAINER" sh -lc "test -f '${API_ENV_FILE_ON_AGENT}'"; then
warn "API env file not found in Agent: ${API_ENV_FILE_ON_AGENT}. Jenkins will fail K8s deploy until this file exists."
return
fi
log "Creating/updating API env Secret from Agent env file"
docker exec "$AGENT_CONTAINER" sh -lc "kubectl --kubeconfig '${AGENT_KUBECONFIG}' -n '${K8S_NAMESPACE}' create secret generic '${API_ENV_SECRET}' --from-env-file='${API_ENV_FILE_ON_AGENT}' --dry-run=client -o yaml | kubectl --kubeconfig '${AGENT_KUBECONFIG}' apply -f -"
}
main() {
require_root
require_command docker
docker info >/dev/null
install_k3d
install_kubectl
prepare_dirs
ensure_registry_host_entry
ensure_registry
ensure_cluster
ensure_pause_image
export_kubeconfigs
ensure_namespace
sync_agent_kubeconfig
sync_api_secret_if_present
log "Bootstrap completed"
cat <<EOF
Cluster: ${CLUSTER_NAME}
Namespace: ${K8S_NAMESPACE}
Registry: ${REGISTRY_CONTAINER}:${REGISTRY_PORT}
Host kubeconfig:${HOST_KUBECONFIG}
Agent kubeconf: ${AGENT_KUBECONFIG}
API route: NAS ${API_HOST_PORT} -> k3d NodePort ${API_NODE_PORT}
Jenkins defaults:
DEPLOY_TARGET=k8s
DOCKER_REGISTRY=${REGISTRY_CONTAINER}:${REGISTRY_PORT}
KUBE_CONFIG_FILE=${AGENT_KUBECONFIG}
CONTAINER_ENV_FILE=${API_ENV_FILE_ON_AGENT}
EOF
}
main "$@"

View File

@ -1,26 +0,0 @@
param(
[string]$SshTarget = "root@yd.frp-bag.com",
[int]$SshPort = 45122,
[switch]$Cutover
)
$ErrorActionPreference = "Stop"
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$localScript = Join-Path $scriptDir "bootstrap.sh"
$remoteScript = "/tmp/kt-fnos-k8s-bootstrap.sh"
$sshOptions = @("-o", "StrictHostKeyChecking=accept-new")
$sshArgs = $sshOptions + @("-p", $SshPort.ToString(), $SshTarget)
$scpArgs = $sshOptions + @("-P", $SshPort.ToString(), $localScript, "${SshTarget}:$remoteScript")
$remoteEnv = ""
if ($Cutover) {
# Cutover allows the bootstrap script to stop the old Docker API container if 48085 is occupied.
$remoteEnv = "STOP_OLD_API_CONTAINER=true "
}
Write-Host "Uploading $localScript to ${SshTarget}:$remoteScript"
& scp @scpArgs
Write-Host "Running fnOS k3d bootstrap on $SshTarget"
& ssh @sshArgs "chmod +x '$remoteScript' && ${remoteEnv}bash '$remoteScript'"

View File

@ -1,161 +0,0 @@
# fnOS Docker + Jenkins + k3d/K8s 标准发布流程
这套流程把飞牛 NAS 上的 Docker 保留为基础控制面,把业务运行逐步迁到 k3d/K3s
- Jenkins Controller、Jenkins Agent、本地 Registry 仍由 Docker 管理。
- 后端 API 进入 k3d/K8s由 Jenkins 构建镜像、推送本地 Registry、滚动更新 Deployment。
- Web 和 Playground 继续走现有 Nginx 静态发布,等后端链路稳定后再决定是否容器化。
## 固定命名
| 对象 | 名称 |
| --- | --- |
| Jenkins Agent | `kt-node-agent` |
| k3d 集群 | `kt-nas` |
| K8s namespace | `kt-prod` |
| 本地 Registry | `k3d-kt-registry.localhost:5000` |
| API Deployment | `kt-template-online-api` |
| API Service | `kt-template-online-api` |
| API K8s 容器名 | `api` |
| API 容器端口 | `48085` |
| API NodePort | `30085` |
| NAS 对外端口 | `48085` |
## 一次性初始化
先确保本机 SSH key 已授权到 NAS 的 root 用户,然后从仓库根目录执行:
```powershell
.\ci\fnos-k8s\run-remote-bootstrap.ps1
```
如果 NAS 上旧的 Docker API 容器已经占用 `48085`,第一次真正切换时再执行:
```powershell
.\ci\fnos-k8s\run-remote-bootstrap.ps1 -Cutover
```
`-Cutover` 会允许脚本停止旧的 `kt-template-online-api` Docker 容器,把 `48085` 交给 k3d loadbalancer 映射到 K8s `NodePort 30085`
脚本会在 NAS 上完成:
- 创建 `/vol1/docker/kt-k8s/{registry,kubeconfig,secrets,manifests,backups}`
- 安装缺失的 `k3d``kubectl`
- 创建本地 Registry。
- 创建 `kt-nas` 集群。
- 拉取并导入 `rancher/mirrored-pause:3.6`,避免 K3s 节点因 Docker Hub 超时卡在 `ContainerCreating`
- 导出 host kubeconfig 和 Jenkins Agent kubeconfig。
- 创建 `kt-prod` namespace。
- 将 `kt-node-agent` 接入 k3d Docker 网络。
- 将 kubeconfig 复制到 Agent 内的 `/home/jenkins/agent/kubeconfig/kt-nas.jenkins.yaml`
- 如果 Agent 内已有 `/home/jenkins/agent/env/kt-template-online-api/.env.production`,同步创建 `kt-template-online-api-env` Secret。
## Jenkins Agent 镜像
Agent 镜像位于:
```text
ci/jenkins-agent/Dockerfile
```
镜像内置:
- Node.js 22
- pnpm 9
- Docker CLI / Buildx / Compose
- kubectl
- Git / OpenSSH
NAS 上重新构建并重启 Agent
```bash
docker build -t kt-jenkins-agent:node22 -f ci/jenkins-agent/Dockerfile ci/jenkins-agent
docker rm -f kt-node-agent
```
然后按 `ci/jenkins-agent/README.md` 中的 `docker run` 命令重新启动。Agent 启动后再跑一次:
```powershell
.\ci\fnos-k8s\run-remote-bootstrap.ps1
```
这样脚本会把 kubeconfig 重新复制进 Agent并把 Agent 接到 `k3d-kt-nas` 网络。
## Jenkins 发布参数
后端 Jenkinsfile 的标准参数:
```text
DEPLOY_TARGET=k8s
BUILD_DOCKER_IMAGE=true
PUSH_DOCKER_IMAGE=true
DOCKER_REGISTRY=k3d-kt-registry.localhost:5000
IMAGE_NAME=kt-template-online-api
CONTAINER_ENV_FILE=/home/jenkins/agent/env/kt-template-online-api/.env.production
KUBE_CONFIG_FILE=/home/jenkins/agent/kubeconfig/kt-nas.jenkins.yaml
K8S_MANIFEST_FILE=k8s/prod/api.yaml
K8S_NAMESPACE=kt-prod
K8S_DEPLOYMENT=kt-template-online-api
K8S_CONTAINER=api
K8S_ENV_SECRET=kt-template-online-api-env
```
发布阶段会做四件事:
1. 构建后端 `dist`
2. 用仓库根目录 `dockerfile` 构建业务镜像。
3. 推送到 NAS 本地 Registry同时更新 `latest` 标签。
4. 从 Agent 私有 `.env.production` 重建 K8s Secret并滚动更新 Deployment 镜像。
## 验证
NAS 上验证集群:
```bash
kubectl --kubeconfig /vol1/docker/kt-k8s/kubeconfig/kt-nas.host.yaml get nodes
kubectl --kubeconfig /vol1/docker/kt-k8s/kubeconfig/kt-nas.host.yaml -n kt-prod get pod,svc
```
Agent 内验证:
```bash
docker exec kt-node-agent sh -lc 'kubectl --kubeconfig /home/jenkins/agent/kubeconfig/kt-nas.jenkins.yaml -n kt-prod get pod,svc'
```
API 验证:
```bash
curl -I http://127.0.0.1:48085
```
如果公网入口仍由腾讯云 WireGuard/Caddy 转发到 NAS `10.66.66.2:48085`,切换到 K8s 后公网侧不需要改端口。
## 回滚
查看发布历史:
```bash
kubectl --kubeconfig /vol1/docker/kt-k8s/kubeconfig/kt-nas.host.yaml -n kt-prod rollout history deployment/kt-template-online-api
```
回滚上一个版本:
```bash
kubectl --kubeconfig /vol1/docker/kt-k8s/kubeconfig/kt-nas.host.yaml -n kt-prod rollout undo deployment/kt-template-online-api
kubectl --kubeconfig /vol1/docker/kt-k8s/kubeconfig/kt-nas.host.yaml -n kt-prod rollout status deployment/kt-template-online-api --timeout=180s
```
查看日志:
```bash
kubectl --kubeconfig /vol1/docker/kt-k8s/kubeconfig/kt-nas.host.yaml -n kt-prod logs -l app=kt-template-online-api --tail=200
```
如果需要临时退回旧 Docker 容器,先删除或停止 k3d loadbalancer 对 `48085` 的占用,再按旧 Jenkins Docker 参数重启 `kt-template-online-api` 容器。
## 参考
- k3d: <https://k3d.io/stable/>
- k3d Registry: <https://k3d.io/stable/usage/registries/>
- kubectl Linux 安装: <https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/>
- Kubernetes Deployment: <https://kubernetes.io/docs/concepts/workloads/controllers/deployment/>