Skip to content

求救!!Nacos 服务启动后短时间内一切正常,一段时间后failed to connect server #248

@Dawien

Description

@Dawien
在python端创建client服务后,启动后短时间内服务正常,此时监听服务正常获取数据。但在服务运行一段时间后出现异常日志:error = Error [-401]: failed to connect nacos server,然后服务关闭。

demo代码:

async def _init_client():
	client_config = (ClientConfigBuilder()
					 .username("xxxx")
					 .password("xxxxx")
					 .namespace_id("xxxx")
					 .heart_beat_interval(5 * 1000)
					 .log_dir('./')
					 .server_address("xxxxx")
					 .log_level('DEBUG')  # 'INFO')
					 .grpc_config(GRPCConfig(grpc_timeout=5000))
					 .build())
	# task = asyncio.create_task(background_task())
	# 在适当的方法中使用 await 调用
	try:
		client = await NacosConfigService.create_config_service(client_config)

		if not client:
			raise Exception('NacosConfigService.create_config_service failed')

		data_id = "xxxx"
		group = "xxxxx"
	except Exception as e:
		logging.error(
			f"Failed to create NacosConfigService:URL:xxxx}:{e}")
		raise e

	await client.add_listener(data_id=data_id, group=group,  listener=config_listener)

	try:
		config = await client.get_config(
				ConfigParam(
						data_id=data_id,
						group=group,
				)
		)
		# await config_listener("", data_id, group, config)
		print(f"获取远程配置成功")
		print(f"获取配置:\n-----------------------\n{config}\n-----------------------\n")
	except Exception as e:
		print(f"获取配置失败:DataID:{data_id}|Group:{group}|{e}")
		raise e

	while True:
		await asyncio.sleep(1)



async def config_listener(tenant, data_id, group, content):
	if not content:
		return
	print(f"监听到配置更新-{group}-{data_id}:{len(content)}")

if __name__ == '__main__':
	asyncio.run(_init_client())```


从源码中追溯异常日志到下面的代码:
`class GrpcClient(RpcClient):

    def __init__(self, logger, name: str, client_config: ClientConfig, nacos_server: NacosServerConnector):
        super().__init__(logger=logger, name=name, nacos_server=nacos_server)
        self.logger = logger
        self.tls_config = client_config.tls_config
        self.grpc_config = client_config.grpc_config
        self.tenant = client_config.namespace_id

    async def _create_new_managed_channel(self, server_ip, grpc_port):
        options = [
            ('grpc.max_call_recv_msg_size', self.grpc_config.max_receive_message_length),
            ('grpc.keepalive_time_ms', self.grpc_config.max_keep_alive_ms),
            ('grpc.use_local_subchannel_pool', 1),  # 禁用全局连接池
            ('grpc.so_reuseport', 0)  # 禁止端口复用
        ]

        if self.tls_config and self.tls_config.enabled:
            with open(self.tls_config.ca_file, 'rb') as f:
                root_certificates = f.read()

            with open(self.tls_config.cert_file, 'rb') as f:
                cert_chain = f.read()

            with open(self.tls_config.key_file, 'rb') as f:
                private_key = f.read()

            credentials = grpc.ssl_channel_credentials(root_certificates=root_certificates,
                                                       private_key=private_key,
                                                       certificate_chain=cert_chain)

            channel = grpc.aio.secure_channel(f'{server_ip}:{grpc_port}', credentials=credentials,
                                              options=options)
        else:
            channel = grpc.aio.insecure_channel(f'{server_ip}:{grpc_port}',
                                                options=options)
        try:
            await asyncio.wait_for(channel.channel_ready(), self.grpc_config.grpc_timeout / 1000)
        except asyncio.TimeoutError as e:
            await channel.close()
            raise NacosException(CLIENT_DISCONNECT, 'failed to connect nacos server') from e
        else:
            return channel`

请问下,这个具体原因是gRPC的参数设置不对,还是Nacos集群的gRPC配置存在问题?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions