{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"sns:Subscribe",
"sns:GetTopicAttributes",
"sns:ListSubscriptionsByTopic"
],
"Resource": "arn:aws:sns:*:*:CloudWatch-Alerts"
},
{
"Effect": "Allow",
"Action": [
"cloudwatch:DescribeAlarms",
"cloudwatch:GetMetricData"
],
"Resource": "*"
}
]
} |
import json
import logging
import os
import urllib.request
import datetime
import uuid
import re
# 配置日志
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# 从环境变量获取飞书Webhook URL
FEISHU_WEBHOOK_URL = os.environ['FEISHU_WEBHOOK_URL']
def lambda_handler(event, context):
logger.info("收到的事件: " + json.dumps(event))
try:
# 解析CloudWatch告警数据
message = json.loads(event['Records'][0]['Sns']['Message'])
# 提取告警信息
alarm_name = message.get('AlarmName', 'N/A')
alarm_description = message.get('AlarmDescription', 'N/A')
new_state = message.get('NewStateValue', 'N/A')
reason = message.get('NewStateReason', 'N/A')
timestamp = message.get('StateChangeTime', 'N/A')
# 提取资源信息
region = message.get('Region', 'N/A')
account_id = message.get('AWSAccountId', 'N/A')
# 从告警ARN中提取服务信息
alarm_arn = message.get('AlarmArn', '')
service = "UNKNOWN_SERVICE"
if alarm_arn:
service_match = re.search(r'alarm:([^:]+):', alarm_arn)
if service_match:
service = service_match.group(1).upper()
# 确定事件类型和严重性
event_type = "AWS_SERVICE_OPERATIONAL_ISSUE" if new_state == "ALARM" else "AWS_SERVICE_OPERATIONAL_NORMAL"
severity = "CRITICAL" if new_state == "ALARM" else "NORMAL"
# 生成关联ID
incident_id = str(uuid.uuid4().int)[:14]
# 格式化时间
start_time = format_time(timestamp)
end_time = format_time(datetime.datetime.utcnow().isoformat())
# 构建飞书消息
feishu_message = {
"msg_type": "post",
"content": {
"post": {
"zh_cn": {
"title": f"AWS告警: {alarm_name}",
"content": [
[
{
"tag": "text",
"text": f"关联账号: {account_id}\n"
}
],
[
{
"tag": "text",
"text": f"服务: {service}\n"
}
],
[
{
"tag": "text",
"text": f"区域: {region}\n"
}
],
[
{
"tag": "text",
"text": f"事件代码: {event_type}\n"
}
],
[
{
"tag": "text",
"text": f"事件类型: {'issue' if new_state == 'ALARM' else 'normal'}\n"
}
],
[
{
"tag": "text",
"text": f"受影响资源: {extract_resources(message)}\n"
}
],
[
{
"tag": "text",
"text": f"开始时间: {start_time}\n"
}
],
[
{
"tag": "text",
"text": f"结束时间: {end_time if new_state == 'OK' else '持续中'}\n"
}
],
[
{
"tag": "text",
"text": f"事件说明: {reason}\n"
}
],
[
{
"tag": "text",
"text": f"当前状态: {get_status_description(new_state)}\n"
}
],
[
{
"tag": "text",
"text": f"详细信息: {alarm_description}\n"
}
]
]
}
}
}
}
# 发送到飞书
req = urllib.request.Request(
url=FEISHU_WEBHOOK_URL,
data=json.dumps(feishu_message).encode('utf-8'),
headers={'Content-Type': 'application/json'}
)
response = urllib.request.urlopen(req)
response_body = response.read().decode('utf-8')
logger.info(f"飞书响应: {response_body}")
return {
'statusCode': 200,
'body': json.dumps('消息已成功发送到飞书!')
}
except Exception as e:
logger.error(f"处理失败: {str(e)}")
return {
'statusCode': 500,
'body': json.dumps(f'处理失败: {str(e)}')
}
def format_time(iso_time):
"""格式化ISO时间为更易读的格式"""
try:
dt = datetime.datetime.fromisoformat(iso_time.replace('Z', '+00:00'))
return dt.strftime("%a, %d %b %Y %H:%M:%S GMT")
except:
return iso_time
def extract_resources(message):
"""从告警消息中提取受影响的资源"""
try:
# 尝试从不同位置提取资源信息
dimensions = message.get('Trigger', {}).get('Dimensions', [])
if dimensions:
resources = []
for dim in dimensions:
resources.append(f"{dim.get('name', '')}: {dim.get('value', '')}")
return resources if resources else "[]"
# 尝试从指标维度提取
dimensions = message.get('Dimensions', [])
if dimensions:
return [f"{dim.get('name', '')}: {dim.get('value', '')}" for dim in dimensions] or "[]"
return "[]"
except:
return "[]"
def get_status_description(state):
"""根据状态返回详细描述"""
if state == "ALARM":
return "Current severity level: Critical - Service impacted"
elif state == "OK":
return "Current severity level: Operating normally"
else:
return "Current severity level: Insufficient data"
|
key=FEISHU_WEBHOOK_URL values=飞书webhook url |
aws cloudwatch put-metric-alarm \ --alarm-name <告警名称> \ --alarm-description <告警描述> \ --metric-name <指标名称> \ --namespace <命名空间> \ --statistic <统计方法> \ --period <周期秒数> \ --threshold <阈值> \ --comparison-operator <比较运算符> \ --dimensions <维度> \ --evaluation-periods <评估周期数> \ --alarm-actions <告警动作ARN> \ --ok-actions <恢复动作ARN> |
# | 判定描述 | 自我判定(是/否) |
|---|---|---|
| 1 | 在各搜索引擎中是否能找到知识信息(包括但不限于Google、百度、Bing) | 是 |
| 2 | 是否需要代码集成开发 | 否 |