Fix all the lint issues.

wang2yn84 · wang2yn84 · commit d14e7f5a9a9e · 2024-08-06T00:20:21.000Z
diff --git a/benchmarks/run_offline.py b/benchmarks/run_offline.py
@@ -109,7 +109,7 @@ def main(argv):
 
   profiling_output = FLAGS.profiling_output
   print("======= decode starting ===")
-  
+
   dec_times = []
   for i in range(10):
     if profiling_output and i == 7 and not profiler_started:
diff --git a/jetstream_pt/layers.py b/jetstream_pt/layers.py
@@ -456,7 +456,7 @@ def attend(xq, keys, values, local_mask=None):
       # When GQA is enabled, it not necessary to expand
       if not (self.env.ragged_mha and n_rep > 1) and seqlen == 1:
         true_len = 2
-        #xq = torch.broadcast_to(xq, (xq.shape[0], xq.shape[1], 2, xq.shape[3]))
+        # xq = torch.broadcast_to(xq, (xq.shape[0], xq.shape[1], 2, xq.shape[3]))
         xq = torch.nn.functional.pad(
             xq, (0, 0, 0, true_len - seqlen), "constant", 0
         )
@@ -714,6 +714,7 @@ def attend(xq, keys, values, k_scaler, v_scaler, local_mask=None):
 
       return attn_out
 
+
 class Attention(ModuleBase):
   """Attention module."""
 
diff --git a/run_interactive.py b/run_interactive.py
@@ -18,13 +18,10 @@
 from typing import List
 
 # import torch_xla2 first!
-import torch_xla2  # pylint: disable
 import jax
 import numpy as np
-from absl import app, flags
-from colorama import Fore, Style
+from absl import app
 from jetstream.engine import token_utils
-from jetstream_pt import engine as je
 from jetstream_pt.config import FLAGS, create_engine_from_config_flags
 
 
@@ -54,10 +51,15 @@ def main(argv):
   if profiling_prefill:
     jax.profiler.stop_trace()
   prompts: List[str] = [
+      # pylint: disable-next=all
       "I believe the meaning of life is",
+      # pylint: disable-next=all
       "To add an element to an ArrayList of a specific class type in Java, you can follow the following steps:\n\n1. Create an instance of the class to be added.\n2. Get a reference to the ArrayList.\n3. Call the `add()` method on the ArrayList, passing the instance of the class as the argument.\n\nHere's an example of how to add an object of type `Person` to an ArrayList of type `ArrayList<Person>`:\n```csharp\n// Create a new instance of the Person class\nPerson person = new Person(\"John\", 25);\n\n// Get a reference to the ArrayList\nArrayList<Person> peopleList = new ArrayList<>();\n\n// Add the person object to the ArrayList\npeopleList.add(person);\n```\nIn this example, the `Person` class is assumed to have a constructor that takes two arguments: a String for the person's name, and an int for their age. You can substitute your own class and constructor as necessary.",
+      # pylint: disable-next=all
       "<s>[INST] <<SYS>>\nYou are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.\n<</SYS>>\n\nQuestion 1: What is commercial real estate finance?\nQuestion 2: What are Commercial Real Estate services?\nOptions are:\n[a]. no.\n[b]. yes.\nWould the answer to these two questions be the same? [/INST]",
+      # pylint: disable-next=all
       "<s>[INST] <<SYS>>\nYou are an AI assistant that helps people find information. Provide a detailed answer so user don\u2019t need to search outside to understand the answer.\n<</SYS>>\n\nUse reasoning to lead to the answer of the following question:\nWhere are you likely to find water underneath?\nOptions:\n- toilet\n- sink\n- jar\n- bridge\n- house\n Reasoning process: [/INST",
+      # pylint: disable-next=all
       "<s>[INST] <<SYS>>\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.\n<</SYS>>\n\nContinue the following story.\n\nKay didn't have shoes that fit her feet properly. She only wore sneakers, because the \nChoose from: [I] shoes  fitted badly. [II] sneakers  fitted badly. [/INST]",
   ]
   for prompt in prompts:
diff --git a/run_interactive_disaggregated.py b/run_interactive_disaggregated.py
@@ -19,9 +19,7 @@
 from typing import List
 from absl import app
 from absl import flags
-from colorama import Fore, Style
 
-import numpy as np
 import jax
 
 from jetstream.engine import token_utils
@@ -129,7 +127,6 @@ def main(argv):
   print("Load params ", time.perf_counter() - start)
 
   metadata = prefill_engine.get_tokenizer()
-  tokenizer = prefill_engine.build_tokenizer(metadata)
   vocab = token_utils.load_vocab(metadata.path, metadata.extra_ids)
   stop_tokens = [vocab.eos_id, vocab.pad_id]
   max_output_length = 1024
@@ -157,19 +154,21 @@ def main(argv):
     print(f"---- Input prompts are: {prompt}")
     print(f"---- Encoded tokens are: {tokens}")
 
-    # pylint: disable-next=all
     print(
+        # pylint: disable-next=all
         f"---- Do prefill in prefill engine pod_slice_name: {prefill_engine.pod_slice_name}"
     )
     prefill_result, _ = prefill_engine.prefill(
         params=None, padded_tokens=tokens, true_length=true_length
     )
     print(
+        # pylint: disable-next=all
         f"---- Transfer prefill result to decode engine pod_slice_name: {decode_engine.pod_slice_name}"
     )
     decode_engine.transfer(prefill_result)
-    # pylint: disable-next=all
+
     print(
+        # pylint: disable-next=all
         f"---- Do insert in decode engine pod_slice_name: {decode_engine.pod_slice_name}"
     )
     decode_state = decode_engine.insert(prefill_result, None, slot=slot)
diff --git a/run_interactive_multiple_host.py b/run_interactive_multiple_host.py
@@ -19,7 +19,6 @@
 
 import jax
 from absl import app, flags
-from colorama import Fore, Style
 from jetstream.engine import token_utils
 from jetstream_pt import ray_engine
 from jetstream_pt.config import FLAGS
diff --git a/run_ray_serve_interleave.py b/run_ray_serve_interleave.py
@@ -40,6 +40,7 @@
 
 
 def create_head_resource_name(generation, tpu_chips):
+  """Create head resource name."""
   return f"TPU-{generation}-{tpu_chips}-head"
 
 
@@ -73,6 +74,7 @@ def create_engine(**kwargs):
 
 @serve.deployment
 class JetStreamDeployment:
+  """JetStream deployment."""
 
   def __init__(self, **kwargs):
     os.environ["XLA_FLAGS"] = (
@@ -111,18 +113,24 @@ def __init__(self, **kwargs):
 
     print("Started jetstream driver....")
 
+  # pylint: disable-next=all
   async def Decode(
-      self, request: jetstream_pb2.DecodeRequest
+      self,
+      # pylint: disable-next=all
+      request: jetstream_pb2.DecodeRequest,
+      # pylint: disable-next=all
   ) -> AsyncIterator[jetstream_pb2.DecodeResponse]:
-
+    """Async decode function."""
     return self.orchestrator.Decode(request)
 
 
 def main(_argv):
+  """Main function"""
   resource_name = create_head_resource_name(
       FLAGS.tpu_generation, FLAGS.tpu_chips
   )
   print(f"Using head resource {resource_name}")
+  # pylint: disable-next=all
   deployment = JetStreamDeployment.options(
       ray_actor_options={"resources": {resource_name: 1}}
   ).bind(
diff --git a/run_server.py b/run_server.py
@@ -17,7 +17,6 @@
 from typing import Sequence
 
 # import torch_xla2 first!
-import torch_xla2  # pylint: disable
 import jax
 from absl import app, flags
 from jetstream.core import server_lib
diff --git a/run_server_with_ray.py b/run_server_with_ray.py
@@ -19,7 +19,6 @@
 from absl import app, flags
 
 # import torch_xla2 first!
-import torch_xla2  # pylint: disable
 import jax
 from jetstream.core import server_lib
 from jetstream.core.config_lib import ServerConfig
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -6,6 +6,7 @@
 from jetstream_pt import environment
 
 
+# pylint: disable-next=all
 def make_env_tiny(bf16_enable=True, env_data_update_fn=lambda _: None):
   torch_dtype = torch.bfloat16 if bf16_enable else torch.float32
   torch.set_default_dtype(torch_dtype)
@@ -33,6 +34,7 @@ def make_env_tiny(bf16_enable=True, env_data_update_fn=lambda _: None):
   return env, config
 
 
+# pylint: disable-next=all
 def make_mixtral_env(bf16_enable=True):
   torch_dtype = torch.bfloat16 if bf16_enable else torch.float32
   torch.set_default_dtype(torch_dtype)
@@ -57,14 +59,16 @@ def make_mixtral_env(bf16_enable=True):
   return env, config
 
 
+# pylint: disable-next=all
 def to_xla_tensor(tree):
   return torch_xla2.default_env().to_xla(tree)
 
 
+# pylint: disable-next=all
 def call_xla_model(model, weights, args):
   with jax.default_device(jax.devices("cpu")[0]):
     xla_weights, xla_inputs = to_xla_tensor((weights, args))
     with torch_xla2.default_env():
       result = torch.func.functional_call(model, xla_weights, xla_inputs)
-    result_torch = torch_xla2.tensor.j2t(result._elem)
+    result_torch = torch_xla2.tensor.j2t(result.jax())
     return result_torch
diff --git a/tests/test_hf_names.py b/tests/test_hf_names.py
@@ -4,10 +4,13 @@
 
 
 class TestModuleBase(unittest.TestCase):
+  """Test module base."""
 
   def test_get_hf_names_to_real_name(self):
+    """Test get hugginface names to real name."""
 
     class MyModule(ModuleBase):
+      """My module."""
 
       def __init__(self):
         super().__init__()
@@ -18,6 +21,9 @@ def __init__(self):
         self.param = torch.nn.Parameter(torch.randn(10))
         self.hf_name("param", "model.param")
 
+      def forward(self):
+        """Forward function."""
+
     module = MyModule()
     expected_mapping = {
         "model.my_linear1.weight": "linear1.weight",
@@ -30,20 +36,30 @@ def __init__(self):
     self.assertEqual(module.get_hf_names_to_real_name(), expected_mapping)
 
   def test_get_sharding_annotations(self):
+    """Test get sharding annotations."""
+
     class MyModule(ModuleBase):
+      """MyModule."""
 
       def __init__(self):
         super().__init__()
         self.linear = torch.nn.Linear(10, 20)
         self.embedding = torch.nn.Embedding(100, 50)
         self.inner = InnerModule()
 
+      def forward(self):
+        """Forward function."""
+
     class InnerModule(ModuleBase):
+      """Inner modeule."""
 
       def __init__(self):
         super().__init__()
         self.fc = torch.nn.Linear(50, 100)
 
+      def forward(self):
+        """Forward function."""
+
     module = MyModule()
     module.annotate_sharding("linear.weight", 0)
     module.annotate_sharding("embedding.weight", 1)
diff --git a/tests/test_llama_e2e.py b/tests/test_llama_e2e.py
@@ -22,14 +22,13 @@
 import torch
 import torch_xla2
 from torch.utils import _pytree as pytree
+from absl.testing import parameterized
 
 from jetstream_pt.engine import PyTorchEngine
 from jetstream_pt.third_party.llama import model_exportable, model_args
 from jetstream_pt.third_party.llama.generation_original import LlamaOriginal
 from jetstream_pt import environment
 from tests import helpers
-from jetstream_pt import torchjax
-from absl.testing import parameterized
 
 
 class LlamaE2ETest(parameterized.TestCase):
@@ -43,6 +42,7 @@ def _make_env(self, bf16_enable=True):
     torch.set_default_dtype(torch_dtype)
     jax.config.update("jax_dynamic_shapes", False)
     jax.config.update("jax_traceback_filtering", "off")
+    # pylint: disable-next=all
     config = model_args.get_model_args("tiny", 128, 1, 32000, True)
     environment_data = environment.JetEngineEnvironmentData()
     environment_data.max_input_sequence_length = 128
diff --git a/tests/test_model_impl.py b/tests/test_model_impl.py
@@ -17,7 +17,6 @@
 import jax.numpy as jnp
 import torch
 import torch_xla2
-from . import helpers
 
 from jetstream_pt.third_party.llama import model_exportable
 from jetstream_pt.third_party.llama import model_original
@@ -30,6 +29,8 @@
 from jetstream_pt import layers
 from jetstream_pt import cache_manager
 
+from . import helpers
+
 
 class ModelComponentTest(unittest.TestCase):
   """Test diff between original model and xla model for transformer,
@@ -77,7 +78,7 @@ def _generate_mask(self, cache_length, pos, seqlen, ring_buffer=True):
 
   def _compare_cache(self, cache_torch, cache_jax):
     _, seq, _, _ = cache_torch.shape
-    cache_j = torch_xla2.tensor.j2t(cache_jax._elem)
+    cache_j = torch_xla2.tensor.j2t(cache_jax.jax())
     for s in range(seq):
       print("diff ", (cache_torch[0, s] - cache_j[0, :, s]).norm())
 
@@ -141,13 +142,14 @@ def test_attention(self):
     cache_decode = self._make_one_cache_for_generate(env, pos)
 
     # insert prefilled cache entry
-    cache_decode.cache_k._elem = cache_decode.cache_k._elem.at[
-        ..., :pos, :
-    ].set(cache.cache_k._elem)
-
-    cache_decode.cache_v._elem = cache_decode.cache_v._elem.at[
-        ..., :pos, :
-    ].set(cache.cache_v._elem)
+    # pylint: disable-next=all
+    cache_decode.cache_k._elem = (
+        cache_decode.cache_k.jax().at[..., :pos, :].set(cache.cache_k.jax())
+    )
+    # pylint: disable-next=all
+    cache_decode.cache_v._elem = (
+        cache_decode.cache_v.jax().at[..., :pos, :].set(cache.cache_v.jax())
+    )
 
     # self._compare_cache(attention_orig.cache_k, cache_decode.cache_k)
     # Now do one with decode
@@ -176,6 +178,7 @@ def test_attention(self):
     self.assertTrue(torch.allclose(result_torch, expected_out, atol=1e-4))
 
   def test_gemma_attention(self):
+    """Test gemma attention."""
     with jax.default_matmul_precision("float32"):
       env, model_arg = helpers.make_env_tiny(False)
 
@@ -306,12 +309,14 @@ def test_transformer_block(self):
     cache_decode = self._make_one_cache_for_generate(env, pos)
 
     # insert prefilled cache entry
-    cache_decode.cache_k._elem = cache_decode.cache_k._elem.at[
-        ..., :pos, :
-    ].set(cache.cache_k._elem)
-    cache_decode.cache_v._elem = cache_decode.cache_v._elem.at[
-        ..., :pos, :
-    ].set(cache.cache_v._elem)
+    # pylint: disable-next=all
+    cache_decode.cache_k._elem = (
+        cache_decode.cache_k.jax().at[..., :pos, :].set(cache.cache_k.jax())
+    )
+    # pylint: disable-next=all
+    cache_decode.cache_v._elem = (
+        cache_decode.cache_v.jax().at[..., :pos, :].set(cache.cache_v.jax())
+    )
 
     # Now do one with decode
     x2 = torch.randn((1, 1, model_arg.dim))
@@ -433,14 +438,16 @@ def test_mixtral_transformer(self):
     self.assertTrue(torch.allclose(result_torch, expected_out, atol=1e-4))
 
   def test_mixtral_moe(self):
+    """Test mixtral moe module."""
     config = mixtral_config.ModelArgs()
     config.intermediate_size = 16
     config.dim = 16
     m = mixtral.ConditionalFeedForward(config)
     # random init
     states = m.state_dict()
-    for k, v in states.items():
-      states[k].normal_()
+    for _, v in states.items():
+      # pylint: disable-next=all
+      v.normal_()
     m.load_state_dict(states, assign=True)
 
     seqlen = 3
diff --git a/tests/test_quantization.py b/tests/test_quantization.py
diff --git a/tests/test_run_server.py b/tests/test_run_server.py