-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Fuse moe lora #3801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Fuse moe lora #3801
Changes from 3 commits
7964f0b
d2d9667
d6bb005
0d577b2
2921563
7ed5523
9b6ea8b
2c0d2d0
1fc86c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -108,7 +108,7 @@ def get_lora_layers(): | |
| XPURowSequenceParallelLoRALinear as RowSequenceParallelLoRALinear, | ||
| ) | ||
|
|
||
| from .lora_layers import LoRAConv2D | ||
| from .lora_layers import LoRAConv2D, LoRAExperts | ||
| else: | ||
| raise ImportError # Force to use the fallback if not XPU | ||
| except ImportError: | ||
|
|
@@ -121,6 +121,7 @@ def get_lora_layers(): | |
| FleetRowParallelLoRALinear, | ||
| FleetRowSequenceParallelLoRALinear, | ||
| LoRAConv2D, | ||
| LoRAExperts, | ||
| LoRALinear, | ||
| RowParallelLoRALinear, | ||
| RowSequenceParallelLoRALinear, | ||
|
|
@@ -131,6 +132,7 @@ def get_lora_layers(): | |
| "ColumnSequenceParallelLoRALinear": ColumnSequenceParallelLoRALinear, | ||
| "LoRAConv2D": LoRAConv2D, | ||
| "LoRALinear": LoRALinear, | ||
| "LoRAExperts": LoRAExperts, | ||
| "RowParallelLoRALinear": RowParallelLoRALinear, | ||
| "RowSequenceParallelLoRALinear": RowSequenceParallelLoRALinear, | ||
| "FleetLoRALinear": FleetLoRALinear, | ||
|
|
@@ -145,6 +147,7 @@ def get_lora_layers(): | |
| ColumnParallelLoRALinear = lora_layers["ColumnParallelLoRALinear"] | ||
| ColumnSequenceParallelLoRALinear = lora_layers["ColumnSequenceParallelLoRALinear"] | ||
| LoRAConv2D = lora_layers["LoRAConv2D"] | ||
| LoRAExperts = lora_layers["LoRAExperts"] | ||
| LoRALinear = lora_layers["LoRALinear"] | ||
| RowParallelLoRALinear = lora_layers["RowParallelLoRALinear"] | ||
| RowSequenceParallelLoRALinear = lora_layers["RowSequenceParallelLoRALinear"] | ||
|
|
@@ -904,6 +907,15 @@ def _find_and_replace_module(self, model, module_name, lora_config): | |
| lora_module = RowParallelQuantizationLoRALinear(module, lora_config) | ||
| # Lora row parallel will spilt lora A matrix | ||
| self.add_lora_split_mapping(module_name + ".lora_A", is_column=False) | ||
| elif attribute_chain[-1] == "experts": | ||
| lora_module = LoRAExperts( | ||
| module, | ||
| r=lora_config.r, | ||
| lora_alpha=lora_config.lora_alpha, | ||
| lora_dropout=lora_config.lora_dropout, | ||
| rslora=lora_config.rslora, | ||
| lora_plus_scale=lora_config.lora_plus_scale, | ||
| ) | ||
| if lora_module is None: | ||
| raise ValueError( | ||
| f"LoRA strategy only supports paddle.nn.Linear or paddle.distributed.fleet.meta_parallel.ColumnParallelLinear or paddleformers.transformers.sequence_utils. {module}({module_name} {type(module).__name__}) is not supported。" | ||
|
|
@@ -968,6 +980,7 @@ def mark_only_lora_as_trainable(self) -> None: | |
| or isinstance(layer, FleetColumnSequenceParallelLoRALinear) | ||
| or isinstance(layer, RowSequenceParallelLoRALinear) | ||
| or isinstance(layer, FleetRowSequenceParallelLoRALinear) | ||
| or isinstance(layer, LoRAExperts) | ||
| or (QuantizationLoRALinear is not None and isinstance(layer, QuantizationLoRALinear)) | ||
| or ( | ||
| ColumnParallelQuantizationLoRALinear is not None | ||
|
|
@@ -1004,8 +1017,7 @@ def get_lora_model(self, model: Union[PretrainedModel, nn.Layer], lora_config: L | |
| return model | ||
| if isinstance(lora_config.target_modules, str): | ||
| lora_config.target_modules = [lora_config.target_modules] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 需要添加相关单测
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已添加相关单测
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 需要适配get_merge_state_dict函数
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已适配 |
||
| for i in model.named_sublayers(): | ||
| module_name = i[0] | ||
| for module_name, module in model.named_sublayers(): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 需要考虑开发lora merge
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已适配merge_model |
||
| for target_module in lora_config.target_modules: | ||
| if re.fullmatch(target_module, module_name): | ||
| self._find_and_replace_module(model, module_name, lora_config) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1.这个匹配规则是否具有通用性,会不会替换其他存量模型导致问题?
2. 需要考虑如果模型的expert写法比较特殊能够流一个接口适配自定义的loraexpert
3.是否能够匹配paddlefleet的expert?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
已修改匹配规则,并且保留接口用于适配自定义的lora expert