From d6ffcee109ed1385507acf595101e8e9aea2c717 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Sun, 20 Jul 2025 00:12:50 -0400 Subject: [PATCH] Add support for ModernBERT Decoder --- README.md | 1 + docs/snippets/6_supported-models.snippet | 1 + src/configs.js | 1 + src/models.js | 8 ++++++++ 4 files changed, 11 insertions(+) diff --git a/README.md b/README.md index ad2861bbe..1456b4529 100644 --- a/README.md +++ b/README.md @@ -382,6 +382,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[MobileViT](https://huggingface.co/docs/transformers/model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://huggingface.co/papers/2110.02178) by Sachin Mehta and Mohammad Rastegari. 1. **[MobileViTV2](https://huggingface.co/docs/transformers/model_doc/mobilevitv2)** (from Apple) released with the paper [Separable Self-attention for Mobile Vision Transformers](https://huggingface.co/papers/2206.02680) by Sachin Mehta and Mohammad Rastegari. 1. **[ModernBERT](https://huggingface.co/docs/transformers/model_doc/modernbert)** (from Answer.AI and LightOn) released with the paper [Smarter, Better, Faster, Longer: A Modern Bidirectional Encoder for Fast, Memory Efficient, and Long Context Finetuning and Inference](https://huggingface.co/papers/2412.13663) by Benjamin Warner, Antoine Chaffin, Benjamin Clavié, Orion Weller, Oskar Hallström, Said Taghadouini, Alexis Gallagher, Raja Biswas, Faisal Ladhak, Tom Aarsen, Nathan Cooper, Griffin Adams, Jeremy Howard, Iacopo Poli. +1. **[ModernBERT Decoder](https://huggingface.co/docs/transformers/model_doc/modernbert-decoder)** (from Johns Hopkins University and LightOn) released with the paper [Seq vs Seq: An Open Suite of Paired Encoders and Decoders](https://huggingface.co/papers/2507.11412) by Orion Weller, Kathryn Ricci, Marc Marone, Antoine Chaffin, Dawn Lawrie, Benjamin Van Durme. 1. **Moondream1** released in the repository [moondream](https://github.com/vikhyat/moondream) by vikhyat. 1. **[Moonshine](https://huggingface.co/docs/transformers/model_doc/moonshine)** (from Useful Sensors) released with the paper [Moonshine: Speech Recognition for Live Transcription and Voice Commands](https://huggingface.co/papers/2410.15608) by Nat Jeffries, Evan King, Manjunath Kudlur, Guy Nicholson, James Wang, Pete Warden. 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://huggingface.co/papers/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index 4bed9132c..2cd02568d 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -96,6 +96,7 @@ 1. **[MobileViT](https://huggingface.co/docs/transformers/model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://huggingface.co/papers/2110.02178) by Sachin Mehta and Mohammad Rastegari. 1. **[MobileViTV2](https://huggingface.co/docs/transformers/model_doc/mobilevitv2)** (from Apple) released with the paper [Separable Self-attention for Mobile Vision Transformers](https://huggingface.co/papers/2206.02680) by Sachin Mehta and Mohammad Rastegari. 1. **[ModernBERT](https://huggingface.co/docs/transformers/model_doc/modernbert)** (from Answer.AI and LightOn) released with the paper [Smarter, Better, Faster, Longer: A Modern Bidirectional Encoder for Fast, Memory Efficient, and Long Context Finetuning and Inference](https://huggingface.co/papers/2412.13663) by Benjamin Warner, Antoine Chaffin, Benjamin Clavié, Orion Weller, Oskar Hallström, Said Taghadouini, Alexis Gallagher, Raja Biswas, Faisal Ladhak, Tom Aarsen, Nathan Cooper, Griffin Adams, Jeremy Howard, Iacopo Poli. +1. **[ModernBERT Decoder](https://huggingface.co/docs/transformers/model_doc/modernbert-decoder)** (from Johns Hopkins University and LightOn) released with the paper [Seq vs Seq: An Open Suite of Paired Encoders and Decoders](https://huggingface.co/papers/2507.11412) by Orion Weller, Kathryn Ricci, Marc Marone, Antoine Chaffin, Dawn Lawrie, Benjamin Van Durme. 1. **Moondream1** released in the repository [moondream](https://github.com/vikhyat/moondream) by vikhyat. 1. **[Moonshine](https://huggingface.co/docs/transformers/model_doc/moonshine)** (from Useful Sensors) released with the paper [Moonshine: Speech Recognition for Live Transcription and Voice Commands](https://huggingface.co/papers/2410.15608) by Nat Jeffries, Evan King, Manjunath Kudlur, Guy Nicholson, James Wang, Pete Warden. 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://huggingface.co/papers/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu. diff --git a/src/configs.js b/src/configs.js index 0862f7adf..23f6b7c33 100644 --- a/src/configs.js +++ b/src/configs.js @@ -104,6 +104,7 @@ function getNormalizedConfig(config) { case 'stablelm': case 'opt': case 'falcon': + case 'modernbert-decoder': mapping['num_heads'] = 'num_attention_heads'; mapping['num_layers'] = 'num_hidden_layers'; mapping['hidden_size'] = 'hidden_size'; diff --git a/src/models.js b/src/models.js index 743d61648..88463ce03 100644 --- a/src/models.js +++ b/src/models.js @@ -2230,6 +2230,12 @@ export class ModernBertForTokenClassification extends ModernBertPreTrainedModel } ////////////////////////////////////////////////// +////////////////////////////////////////////////// +// ModernBERT Decoder models +export class ModernBertDecoderPreTrainedModel extends PreTrainedModel { } +export class ModernBertDecoderModel extends ModernBertDecoderPreTrainedModel { } +export class ModernBertDecoderForCausalLM extends ModernBertDecoderPreTrainedModel { } +////////////////////////////////////////////////// ////////////////////////////////////////////////// // NomicBert models @@ -7837,6 +7843,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([ ['starcoder2', ['Starcoder2Model', Starcoder2Model]], ['falcon', ['FalconModel', FalconModel]], ['stablelm', ['StableLmModel', StableLmModel]], + ['modernbert-decoder', ['ModernBertDecoderModel', ModernBertDecoderModel]], ]); const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([ @@ -7945,6 +7952,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([ ['falcon', ['FalconForCausalLM', FalconForCausalLM]], ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]], ['stablelm', ['StableLmForCausalLM', StableLmForCausalLM]], + ['modernbert-decoder', ['ModernBertDecoderForCausalLM', ModernBertDecoderForCausalLM]], // Also image-text-to-text ['phi3_v', ['Phi3VForCausalLM', Phi3VForCausalLM]],