From 926aeb1590b805c3dfb5cd2589a551c8707b17e9 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Thu, 27 Mar 2025 19:26:38 +0000
Subject: [PATCH 1/8] initial example script exploration (only pirate script
 tested)

---
 examples/compile_inference.py     |  32 --------
 examples/compile_inference.py.tmp |  59 ++++++++++++++
 examples/compile_pirate_qlora.py  | 108 ++++++++++++++++++++++++
 examples/compile_qlora.py.tmp     | 131 ++++++++++++++++++++++++++++++
 4 files changed, 298 insertions(+), 32 deletions(-)
 delete mode 100644 examples/compile_inference.py
 create mode 100644 examples/compile_inference.py.tmp
 create mode 100644 examples/compile_pirate_qlora.py
 create mode 100644 examples/compile_qlora.py.tmp

diff --git a/examples/compile_inference.py b/examples/compile_inference.py
deleted file mode 100644
index f1900a4fb..000000000
--- a/examples/compile_inference.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import torch
-import torch._dynamo
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-
-# torch._dynamo.config.suppress_errors = True
-
-torch.set_float32_matmul_precision("high")
-
-quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-
-# torch._dynamo.config.capture_dynamic_output_shape_ops = True
-
-model_id = "google/gemma-2-2b-it"
-# model_id = "Qwen/Qwen2.5-7B"
-
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    quantization_config=quantization_config,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-)
-
-input_text = "Write me a poem about Machine Learning."
-input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
-
-# model.forward = torch.compile(model.forward, fullgraph=True)
-
-model = torch.compile(model)
-
-outputs = model.generate(**input_ids, max_new_tokens=32)
-print(tokenizer.decode(outputs[0]))
diff --git a/examples/compile_inference.py.tmp b/examples/compile_inference.py.tmp
new file mode 100644
index 000000000..1f8b49f54
--- /dev/null
+++ b/examples/compile_inference.py.tmp
@@ -0,0 +1,59 @@
+import logging
+
+import torch
+import torch._dynamo
+
+# Enable verbose logging using PyTorch's new logging system
+from torch._logging import set_logs
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+
+set_logs(dynamo=logging.DEBUG, graph_breaks=True, recompiles=True)  # Enable specific artifacts
+
+torch.set_float32_matmul_precision("high")
+
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+
+model_id = "google/gemma-2-2b-it"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    quantization_config=quantization_config,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+
+input_text = "Write me a poem about Machine Learning."
+input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
+
+# Improved explanation with artifact capture
+from torch._dynamo import explain
+
+explanation, graphs, guards, log_location = explain(
+    model,
+    **input_ids,
+    traceback=True,  # Show origin of graph breaks
+)
+print(f"Graph breaks: {len(explanation)}\nDetails:")
+for break_info in explanation:
+    print(f"- {break_info}")
+
+
+# Enhanced AOTAutograd tracing
+def trace_handler(gm: torch.fx.GraphModule, example_inputs):
+    print(f"AOT traced graph with {len(gm.graph.nodes)} nodes")
+    gm.graph.print_tabular()  # Show full graph structure
+    return gm.forward
+
+
+model = torch.compile(
+    model,
+    backend="aot_eager",
+    options={
+        "fw_compiler": trace_handler,
+        "track_graph_metrics": True,  # Additional compilation metrics
+    },
+)
+
+# Generate with compilation diagnostics
+outputs = model.generate(**input_ids, max_new_tokens=32)
+print(tokenizer.decode(outputs[0]))
diff --git a/examples/compile_pirate_qlora.py b/examples/compile_pirate_qlora.py
new file mode 100644
index 000000000..2ab9feef8
--- /dev/null
+++ b/examples/compile_pirate_qlora.py
@@ -0,0 +1,108 @@
+# for torch.compile trace run:
+#
+# TORCH_TRACE="./tracedir" TORCH_LOGS="graph_breaks" CUDA_VISIBLE_DEVICES=0 python examples/compile_pirate_qlora.py
+
+# 🏴☠️⛵ Pirate Coder's Delight: Fine-Tune Mistral-7B to Speak Like a Buccaneer Hacker
+# Using bitsandbytes 4-bit + torch.compile - 100% Original Dataset
+
+from datasets import Dataset
+from peft import LoraConfig
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from trl import SFTConfig, SFTTrainer
+
+# 1. Load Model with Pirate-Optimized Quantization 🏴☠️
+model_id = "Qwen/Qwen1.5-1.8B"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+tokenizer.pad_token = tokenizer.eos_token
+
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    quantization_config={
+        "load_in_4bit": True,
+        "bnb_4bit_quant_type": "nf4",
+        "bnb_4bit_compute_dtype": torch.bfloat16,
+        "bnb_4bit_use_double_quant": True,
+    },
+    device_map="auto",
+    attn_implementation="flash_attention_2",
+    torch_dtype=torch.bfloat16,
+)
+
+
+# 2. Original Pirate Programmer Dataset 🦜
+def pirate_formatting_func(example):
+    return {"text": f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}"}
+
+
+train_dataset = Dataset.from_list(
+    [
+        {
+            "instruction": "Explain quantum computing using pirate slang",
+            "response": "Arrr, matey! 'Tis like sailin' parallel seas...",
+        },
+        {
+            "instruction": "Write Python code to find buried treasure",
+            "response": "def find_booty():\n    return (sum(coordinates) / len(coordinates))",
+        },
+        {
+            "instruction": "Why do pirates hate distributed systems?",
+            "response": "Too many captains sink the ship, ye scallywag!",
+        },
+    ]
+).map(pirate_formatting_func)
+
+
+# 2. Prepare Pirate Dataset
+def tokenize_pirate_data(examples):
+    return tokenizer(examples["text"], padding="max_length", max_length=256, truncation=True, return_tensors="pt")
+
+
+train_dataset = train_dataset.map(
+    tokenize_pirate_data,
+    batched=True,
+    remove_columns=["instruction", "response"],  # Keep only tokenized fields
+)
+
+# 3. Configure QLoRA ⚙️
+peft_config = LoraConfig(
+    r=32,
+    lora_alpha=64,
+    target_modules=["q_proj", "v_proj"],
+    lora_dropout=0.05,
+    bias="none",
+    task_type="CAUSAL_LM",
+)
+
+# 5. Training Configuration
+training_args = SFTConfig(
+    per_device_train_batch_size=2,
+    gradient_accumulation_steps=1,
+    max_steps=5,
+    learning_rate=2e-5,
+    max_seq_length=256,
+    remove_unused_columns=False,
+    output_dir="./pirate_coder",
+    optim="paged_adamw_8bit",
+    dataset_text_field="text",
+    packing=True,
+    torch_compile={
+        "mode": "reduce-overhead",
+        "fullgraph": False,
+        "dynamic": False,
+    },
+    report_to="none",
+    logging_steps=1,
+)
+
+# 6. Launch Training with Pirate Flair! 🚀
+trainer = SFTTrainer(
+    model=model,
+    train_dataset=train_dataset,
+    args=training_args,
+    peft_config=peft_config,
+    formatting_func=pirate_formatting_func,
+)
+
+print("⚡ Batten down the hatches - training with torch.compile!")
+trainer.train()
diff --git a/examples/compile_qlora.py.tmp b/examples/compile_qlora.py.tmp
new file mode 100644
index 000000000..c45286115
--- /dev/null
+++ b/examples/compile_qlora.py.tmp
@@ -0,0 +1,131 @@
+# 🦄 Fine-Tune Llama-3.8B to Write Pirate Jokes & Shakespearean Sonnets
+# Using bitsandbytes 4-bit + torch.compile 🏴☠️
+
+# !pip install -qU "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git" \
+#     transformers==4.40.0 accelerate==0.30.0 bitsandbytes==0.43.0
+
+from datasets import load_dataset
+import torch
+from unsloth import FastLanguageModel
+
+# 1. Load Pre-Quantized Model with bitsandbytes 🎯
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="unsloth/llama-3-8B-bnb-4bit",
+    max_seq_length=2048,
+    dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
+    load_in_4bit=True,
+    quantization_config={
+        "bnb_4bit_quant_type": "nf4",
+        "bnb_4bit_compute_dtype": torch.bfloat16,
+        "bnb_4bit_use_double_quant": True,
+    },
+)
+
+# 2. Prepare Creative Dataset 🎭
+pirate_dataset = load_dataset(
+    "json",
+    data_files={"train": "https://huggingface.co/datasets/jondurbin/pirate-jokes/resolve/main/pirate_jokes.json"},
+)
+
+
+def format_creative_prompt(sample):
+    return f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+    You are a swashbuckling pirate poet. Respond ONLY in pirate speak or Shakespearean verse.<|eot_id|>
+    <|start_header_id|>user<|end_header_id|>
+    {sample['prompt']}<|eot_id|>
+    <|start_header_id|>assistant<|end_header_id|>
+    {sample['response']}<|eot_id|>"""
+
+
+dataset = pirate_dataset.map(format_creative_prompt)
+
+# 3. Configure QLoRA with torch.compile Diagnostics 🔍
+model = FastLanguageModel.get_peft_model(
+    model,
+    r=32,
+    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
+    lora_alpha=64,
+    lora_dropout=0.1,
+    bias="none",
+    use_gradient_checkpointing=True,
+    torch_compile={
+        "mode": "reduce-overhead",
+        "fullgraph": False,  # Allow partial compilation initially
+        "dynamic": True,
+    },
+    random_state=3407,  # For reproducibility of creative outputs
+)
+
+
+# 4. Custom Training Loop with Graph Break Analysis 🕵️♂️
+def detect_graph_breaks():
+    import torch._dynamo
+
+    original_verbose = torch._dynamo.config.verbose
+    torch._dynamo.config.verbose = True
+
+    # Trigger compilation with sample input
+    sample_input = tokenizer("Arrr! Tell me about yer treasure...", return_tensors="pt").to("cuda")
+    compiled_model = torch.compile(model)
+    _ = compiled_model(**sample_input)
+
+    torch._dynamo.config.verbose = original_verbose
+
+
+detect_graph_breaks()  # Initial graph break detection
+
+
+# 5. Creative Generation During Training 🎩
+class PirateStreamer:
+    def __init__(self, tokenizer):
+        self.tokenizer = tokenizer
+        self.prompt = "Yarrr! Why did the pirate's chicken cross the road?"
+
+    def __call__(self, input_ids, *args, **kwargs):
+        if random.random() < 0.1:  # 10% chance to generate during training
+            with torch.no_grad():
+                outputs = model.generate(
+                    input_ids=self.tokenizer(self.prompt, return_tensors="pt").to("cuda").input_ids,
+                    max_new_tokens=50,
+                    temperature=0.7,
+                    repetition_penalty=1.1,
+                )
+            print("\n🏴☠️ Crew's Update:", self.tokenizer.decode(outputs[0]))
+        return input_ids
+
+
+# 6. Launch Training with Progressive Compilation 🚀
+trainer = SFTTrainer(
+    model=model,
+    train_dataset=dataset,
+    dataset_text_field="text",
+    max_seq_length=1024,
+    packing=True,
+    callbacks=[PirateStreamer(tokenizer)],
+    args=TrainingArguments(
+        per_device_train_batch_size=2,
+        gradient_accumulation_steps=4,
+        warmup_steps=10,
+        max_steps=100,
+        learning_rate=3e-5,
+        fp16=not torch.cuda.is_bf16_supported(),
+        bf16=torch.cuda.is_bf16_supported(),
+        logging_steps=1,
+        optim="paged_adamw_8bit",
+        weight_decay=0.01,
+        lr_scheduler_type="cosine",
+        output_dir="pirate-poet",
+        report_to="none",
+    ),
+)
+
+# Progressive compilation strategy
+trainer.train_step = torch.compile(
+    trainer.train_step,
+    mode="reduce-overhead",
+    fullgraph=False,  # Start with partial graphs
+    dynamic=True,
+)
+
+print("🏁 Starting training - watch for graph breaks and pirate wisdom!")
+trainer.train()

From e7eeb3f3b5853603f5ffe338a6c0ee4c35296b78 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Thu, 27 Mar 2025 19:29:15 +0000
Subject: [PATCH 2/8] tmp commit with compile logs

---
 .../bnb-specific-graph-breaks.log             |  45 ++
 examples/tmp_compile_logs/pirate.log          | 445 ++++++++++++++++++
 2 files changed, 490 insertions(+)
 create mode 100644 examples/tmp_compile_logs/bnb-specific-graph-breaks.log
 create mode 100644 examples/tmp_compile_logs/pirate.log

diff --git a/examples/tmp_compile_logs/bnb-specific-graph-breaks.log b/examples/tmp_compile_logs/bnb-specific-graph-breaks.log
new file mode 100644
index 000000000..0f5a77a05
--- /dev/null
+++ b/examples/tmp_compile_logs/bnb-specific-graph-breaks.log
@@ -0,0 +1,45 @@
+WARNING: BNB_CUDA_VERSION=124 environment variable detected; loading libbitsandbytes_cuda124.so.
+This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks] Graph break in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]   File "/home/ubuntu/src/bnb/bitsandbytes/nn/modules.py", line 483, in forward
+V0327 19:01:19.984000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:20.125000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:20.169000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:20.252000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:21.002000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:21.106000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.058000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [27/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.133000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.528000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.703000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [30/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.752000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.107000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [31/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.237000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [32/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.359000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.786000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.951000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.990000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:24.036000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/4] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:24.558000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:24.649000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:25.457000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.511000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.605000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.635000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.669000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/5] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.078000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.179000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.268000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.396000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [30/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.419000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/6] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.661000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [31/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.750000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [32/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.075000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.199000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.230000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.264000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/7] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+W0327 19:01:31.314000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [15/8]    function: 'torch_dynamo_resume_in_forward_at_483' (/home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483)
+V0327 19:01:31.522000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.623000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.711000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+W0327 19:01:32.668000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [14/8]    function: 'forward' (/home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:466)
diff --git a/examples/tmp_compile_logs/pirate.log b/examples/tmp_compile_logs/pirate.log
new file mode 100644
index 000000000..f428ed9d6
--- /dev/null
+++ b/examples/tmp_compile_logs/pirate.log
@@ -0,0 +1,445 @@
+WARNING: BNB_CUDA_VERSION=124 environment variable detected; loading libbitsandbytes_cuda124.so.
+This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.
+If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=
+If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH
+For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64
+
+
+Map:   0%|          | 0/3 [00:00<?, ? examples/s]
+Map: 100%|██████████| 3/3 [00:00<00:00, 1426.96 examples/s]
+
+Map:   0%|          | 0/3 [00:00<?, ? examples/s]
+Map: 100%|██████████| 3/3 [00:00<00:00, 420.17 examples/s]
+/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:381: UserWarning: You passed a dataset that is already processed (contains an `input_ids` field) together with a formatting function. Therefore `formatting_func` will be ignored. Either remove the `formatting_func` or pass a dataset that is not already processed.
+  warnings.warn(
+
+Converting train dataset to ChatML:   0%|          | 0/3 [00:00<?, ? examples/s]
+Converting train dataset to ChatML: 100%|██████████| 3/3 [00:00<00:00, 1298.68 examples/s]
+
+Applying chat template to train dataset:   0%|          | 0/3 [00:00<?, ? examples/s]
+Applying chat template to train dataset: 100%|██████████| 3/3 [00:00<00:00, 1305.55 examples/s]
+
+Packing train dataset:   0%|          | 0/3 [00:00<?, ? examples/s]
+Packing train dataset: 100%|██████████| 3/3 [00:00<00:00, 1182.05 examples/s]
+No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
+
+  0%|          | 0/5 [00:00<?, ?it/s]V0327 19:01:18.259000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [0/0] [__graph_breaks] Graph break in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/peft_model.py:1754
+V0327 19:01:18.259000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [0/0] [__graph_breaks] Reason: Unsupported: 'inline in skipfiles: PeftModel._enable_peft_forward_hooks | helper /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/contextlib.py, skipped according trace_rules.lookup SKIP_DIRS'
+V0327 19:01:18.259000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [0/0] [__graph_breaks] User code traceback:
+V0327 19:01:18.259000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [0/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/peft_model.py", line 1754, in forward
+V0327 19:01:18.259000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [0/0] [__graph_breaks]     with self._enable_peft_forward_hooks(**kwargs):
+V0327 19:01:18.259000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [0/0] [__graph_breaks]
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks] Graph break: skip: from user code at:
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/peft_model.py", line 1754, in torch_dynamo_resume_in_forward_at_1754
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     with self._enable_peft_forward_hooks(**kwargs):
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks] Traceback (most recent call last):
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 1164, in __call__
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     result = self._inner_convert(
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]              ^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 547, in __call__
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     return _compile(
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]            ^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 986, in _compile
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     guarded_code = compile_inner(code, one_graph, hooks, transform)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 715, in compile_inner
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     return _compile_inner(code, one_graph, hooks, transform)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_utils_internal.py", line 95, in wrapper_function
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     return function(*args, **kwargs)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 750, in _compile_inner
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     out_code = transform_code_object(code, transform)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/bytecode_transformation.py", line 1361, in transform_code_object
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     transformations(instructions, code_options)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 231, in _fn
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     return fn(*args, **kwargs)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 662, in transform
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     tracer.run()
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2868, in run
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     super().run()
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1052, in run
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     while self.step():
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]           ^^^^^^^^^^^
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 962, in step
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     self.dispatch_table[inst.opcode](self, inst)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2361, in BEFORE_WITH
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     self.setup_or_before_with(inst)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2368, in setup_or_before_with
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     unimplemented(f"{inst.opname} {ctx}")
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/exc.py", line 317, in unimplemented
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     raise Unsupported(msg, case_name=case_name)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks] torch._dynamo.exc.Unsupported: BEFORE_WITH UserDefinedObjectVariable(_GeneratorContextManager)
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks] from user code:
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]    File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/peft_model.py", line 1754, in torch_dynamo_resume_in_forward_at_1754
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     with self._enable_peft_forward_hooks(**kwargs):
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks] Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks] You can suppress this exception and fall back to eager by setting:
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     import torch._dynamo
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]     torch._dynamo.config.suppress_errors = True
+V0327 19:01:18.321000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [1/0] [__graph_breaks]
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks] Graph break: skip: from user code at:
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/peft_model.py", line 1755, in <dictcomp>
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     kwargs = {k: v for k, v in kwargs.items() if k not in self.special_peft_forward_args}
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks] Traceback (most recent call last):
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 1164, in __call__
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     result = self._inner_convert(
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]              ^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 547, in __call__
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     return _compile(
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]            ^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 986, in _compile
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     guarded_code = compile_inner(code, one_graph, hooks, transform)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 715, in compile_inner
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     return _compile_inner(code, one_graph, hooks, transform)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_utils_internal.py", line 95, in wrapper_function
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     return function(*args, **kwargs)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 750, in _compile_inner
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     out_code = transform_code_object(code, transform)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/bytecode_transformation.py", line 1361, in transform_code_object
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     transformations(instructions, code_options)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 231, in _fn
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     return fn(*args, **kwargs)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 662, in transform
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     tracer.run()
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2868, in run
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     super().run()
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1052, in run
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     while self.step():
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]           ^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 962, in step
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     self.dispatch_table[inst.opcode](self, inst)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1401, in FOR_ITER
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     val = it.next_variable(self)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]           ^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/variables/user_defined.py", line 859, in next_variable
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     return self.call_method(tx, "__next__", [], {})
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/variables/user_defined.py", line 823, in call_method
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     return super().call_method(tx, name, args, kwargs)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/variables/base.py", line 414, in call_method
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     unimplemented(f"call_method {self} {name} {args} {kwargs}")
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/exc.py", line 317, in unimplemented
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     raise Unsupported(msg, case_name=case_name)
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks] torch._dynamo.exc.Unsupported: call_method UserDefinedObjectVariable(dict_itemiterator) __next__ [] {}
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks] from user code:
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]    File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/peft_model.py", line 1755, in <dictcomp>
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     kwargs = {k: v for k, v in kwargs.items() if k not in self.special_peft_forward_args}
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks] Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks] You can suppress this exception and fall back to eager by setting:
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     import torch._dynamo
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]     torch._dynamo.config.suppress_errors = True
+V0327 19:01:18.330000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [2/0] [__graph_breaks]
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0] Graph break from `Tensor.item()`, consider setting:
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]     torch._dynamo.config.capture_scalar_outputs = True
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0] or:
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]     env TORCHDYNAMO_CAPTURE_SCALAR_OUTPUTS=1
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0] to include these operations in the captured graph.
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0] Graph break: from user code at:
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/tuners/tuners_utils.py", line 193, in forward
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]     return self.model.forward(*args, **kwargs)
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]     return func(*args, **kwargs)
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 855, in forward
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]     outputs = self.model(
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 549, in forward
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]     causal_mask = self._update_causal_mask(
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 620, in _update_causal_mask
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]     is_padding_right = attention_mask[:, -1].sum().item() != input_tensor.size()[0]
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]
+W0327 19:01:18.598000 565954 site-packages/torch/_dynamo/variables/tensor.py:869] [3/0]
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks] Graph break in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks] User code traceback:
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/tuners/tuners_utils.py", line 193, in forward
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]     return self.model.forward(*args, **kwargs)
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]     return func(*args, **kwargs)
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 855, in forward
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]     outputs = self.model(
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 549, in forward
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]     causal_mask = self._update_causal_mask(
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 620, in _update_causal_mask
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]     is_padding_right = attention_mask[:, -1].sum().item() != input_tensor.size()[0]
+V0327 19:01:18.601000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [3/0] [__graph_breaks]
+V0327 19:01:18.654000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [4/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:18.654000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [4/0] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:18.698000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [5/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:18.698000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [5/0] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:18.741000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [6/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:18.741000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [6/0] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:19.492000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [7/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:19.492000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [7/0] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks] Graph break: skip: from user code at:
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 627, in torch_dynamo_resume_in__update_causal_mask_at_620
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     if attention_mask is not None and 0.0 in attention_mask:
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks] Traceback (most recent call last):
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 1164, in __call__
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     result = self._inner_convert(
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]              ^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 547, in __call__
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     return _compile(
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]            ^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 986, in _compile
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     guarded_code = compile_inner(code, one_graph, hooks, transform)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 715, in compile_inner
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     return _compile_inner(code, one_graph, hooks, transform)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_utils_internal.py", line 95, in wrapper_function
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     return function(*args, **kwargs)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 750, in _compile_inner
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     out_code = transform_code_object(code, transform)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/bytecode_transformation.py", line 1361, in transform_code_object
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     transformations(instructions, code_options)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 231, in _fn
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     return fn(*args, **kwargs)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/convert_frame.py", line 662, in transform
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     tracer.run()
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2868, in run
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     super().run()
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1052, in run
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     while self.step():
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]           ^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 962, in step
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     self.dispatch_table[inst.opcode](self, inst)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2164, in CONTAINS_OP
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     self.push(right.call_method(self, "__contains__", [left], {}))
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/variables/tensor.py", line 583, in call_method
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     result = handler_method(*args, **kwargs)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/variables/tensor.py", line 983, in method___contains__
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     return result.call_method(tx, "item", [], {})
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/variables/tensor.py", line 583, in call_method
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     result = handler_method(*args, **kwargs)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/variables/tensor.py", line 838, in method_item
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     unimplemented("Tensor.item")
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/_dynamo/exc.py", line 317, in unimplemented
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     raise Unsupported(msg, case_name=case_name)
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks] torch._dynamo.exc.Unsupported: Tensor.item
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks] from user code:
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]    File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 627, in torch_dynamo_resume_in__update_causal_mask_at_620
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     if attention_mask is not None and 0.0 in attention_mask:
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks] Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks] You can suppress this exception and fall back to eager by setting:
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     import torch._dynamo
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]     torch._dynamo.config.suppress_errors = True
+V0327 19:01:19.569000 565954 site-packages/torch/_dynamo/convert_frame.py:1214] [8/0] [__graph_breaks]
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks] Graph break in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks] User code traceback:
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 579, in torch_dynamo_resume_in_forward_at_549
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]     layer_outputs = decoder_layer(
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 260, in forward
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]     hidden_states, self_attn_weights = self.self_attn(
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 162, in forward
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]     query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/peft/tuners/lora/bnb.py", line 494, in forward
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]     result = self.base_layer(x, *args, **kwargs)
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]   File "/home/ubuntu/src/bnb/bitsandbytes/nn/modules.py", line 483, in forward
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]     return bnb.matmul_4bit(x, self.weight.t(), bias=bias, quant_state=self.weight.quant_state).to(inp_dtype)
+V0327 19:01:19.771000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [9/0] [__graph_breaks]
+V0327 19:01:19.984000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:19.984000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:20.125000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:20.125000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:20.169000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:20.169000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:20.252000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:20.252000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:21.002000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:21.002000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:21.106000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:21.106000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks] Graph break in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks] User code traceback:
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 192, in torch_dynamo_resume_in_forward_at_164
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]     attn_output, attn_weights = attention_interface(
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/integrations/flash_attention.py", line 50, in flash_attention_forward
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]     attn_output = _flash_attention_forward(
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py", line 297, in _flash_attention_forward
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]     query_states, key_states, value_states = fa_peft_integration_check(
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py", line 213, in fa_peft_integration_check
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]     logger.warning_once(
+V0327 19:01:21.234000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [19/0] [__graph_breaks]
+V0327 19:01:21.495000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [20/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:21.495000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [20/0] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:21.575000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [21/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:21.575000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [21/0] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:21.616000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [22/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:21.616000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [22/0] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.
+V0327 19:01:21.762000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [24/0] [__graph_breaks] Graph break in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:329
+V0327 19:01:21.762000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [24/0] [__graph_breaks] Reason: Data-dependent jump
+V0327 19:01:21.762000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [24/0] [__graph_breaks] User code traceback:
+V0327 19:01:21.762000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [24/0] [__graph_breaks]   File "/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py", line 329, in torch_dynamo_resume_in__flash_attention_forward_at_297
+V0327 19:01:21.762000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [24/0] [__graph_breaks]     max_length_q is not None or (query_length != 1 and not (torch.diff(position_ids, dim=-1) >= 0).all())
+V0327 19:01:21.762000 565954 site-packages/torch/_dynamo/symbolic_convert.py:435] [24/0] [__graph_breaks]
+V0327 19:01:22.058000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [27/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.058000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [27/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:22.133000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.133000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:22.528000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.528000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:22.703000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [30/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.703000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [30/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:22.752000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:22.752000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/2] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:23.107000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [31/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.107000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [31/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:23.237000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [32/0] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.237000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [32/0] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:23.359000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.359000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/3] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:23.786000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.786000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:23.951000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.951000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:23.990000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:23.990000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:24.036000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/4] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:24.036000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/4] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:24.558000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:24.558000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:24.649000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:24.649000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:24.756000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:24.756000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/1] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:24.994000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [20/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:24.994000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [20/1] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:25.064000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [21/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:25.064000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [21/1] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:25.217000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [24/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:329
+V0327 19:01:25.217000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [24/1] [__graph_breaks] Reason: Data-dependent jump
+V0327 19:01:25.457000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:25.457000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:25.665000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:25.665000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/2] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:25.946000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:25.946000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/3] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:26.226000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/4] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:26.226000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/4] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:26.505000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/5] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:26.505000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/5] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:26.786000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/6] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:26.786000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/6] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+V0327 19:01:27.069000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/7] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/modeling_flash_attention_utils.py:213
+V0327 19:01:27.069000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [19/7] [__graph_breaks] Reason: Unsupported: Logger not supported for non-export cases. To avoid graph breaks caused by logger in compile-mode, it is recommended to disable logging by adding logging methods to config.ignore_logger_methods
+W0327 19:01:27.292000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [19/8] torch._dynamo hit config.cache_size_limit (8)
+W0327 19:01:27.292000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [19/8]    function: 'torch_dynamo_resume_in_forward_at_164' (/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:164)
+W0327 19:01:27.292000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [19/8]    last reason: 19/0: L['self'].layer_idx == 0
+W0327 19:01:27.292000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [19/8] To log all recompilation reasons, use TORCH_LOGS="recompiles".
+W0327 19:01:27.292000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [19/8] To diagnose recompilation issues, see https://pytorch.org/docs/main/torch.compiler_troubleshooting.html.
+/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/utils/_config_module.py:342: UserWarning: Skipping serialization of skipfiles_inline_module_allowlist value {}
+  warnings.warn(
+/home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/torch/utils/_config_module.py:342: UserWarning: Skipping serialization of skipfiles_inline_module_allowlist value {}
+  warnings.warn(
+
+ 20%|██        | 1/5 [00:10<00:43, 10.86s/it]
+
+
+ 20%|██        | 1/5 [00:10<00:43, 10.86s/it]V0327 19:01:29.067000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [3/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:29.067000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [3/1] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:29.120000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [4/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:29.120000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [4/1] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:29.165000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [5/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:29.165000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [5/1] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:29.205000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [6/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:29.205000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [6/1] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:29.316000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [7/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/.condax/mamba/envs/bnb/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py:620
+V0327 19:01:29.316000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [7/1] [__graph_breaks] Reason: Unsupported: Tensor.item
+V0327 19:01:29.511000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.511000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/2] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:29.605000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.605000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/2] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:29.635000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.635000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/2] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:29.669000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/5] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:29.669000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/5] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:30.078000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.078000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/2] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:30.179000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.179000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/2] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:30.268000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/2] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.268000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/2] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:30.396000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [30/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.396000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [30/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:30.419000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/6] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.419000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/6] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:30.661000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [31/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.661000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [31/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:30.750000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [32/1] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:30.750000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [32/1] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:31.075000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.075000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [11/3] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:31.199000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.199000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [12/3] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:31.230000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.230000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [13/3] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:31.264000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/7] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.264000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [14/7] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+W0327 19:01:31.314000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [15/8] torch._dynamo hit config.cache_size_limit (8)
+W0327 19:01:31.314000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [15/8]    function: 'torch_dynamo_resume_in_forward_at_483' (/home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483)
+W0327 19:01:31.314000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [15/8]    last reason: 15/0: tensor 'L['___stack1']' requires_grad mismatch. expected requires_grad=0
+W0327 19:01:31.314000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [15/8] To log all recompilation reasons, use TORCH_LOGS="recompiles".
+W0327 19:01:31.314000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [15/8] To diagnose recompilation issues, see https://pytorch.org/docs/main/torch.compiler_troubleshooting.html.
+V0327 19:01:31.522000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.522000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [17/3] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:31.623000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.623000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [18/3] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+V0327 19:01:31.711000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/3] [__graph_breaks] Graph break (details suppressed) in user code at /home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:483
+V0327 19:01:31.711000 565954 site-packages/torch/_dynamo/symbolic_convert.py:442] [29/3] [__graph_breaks] Reason: Unsupported: call_method UserDefinedObjectVariable(Params4bit) t [] {}
+
+ 40%|████      | 2/5 [00:14<00:19,  6.62s/it]
+
+
+ 40%|████      | 2/5 [00:14<00:19,  6.62s/it]W0327 19:01:32.668000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [14/8] torch._dynamo hit config.cache_size_limit (8)
+W0327 19:01:32.668000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [14/8]    function: 'forward' (/home/ubuntu/src/bnb/bitsandbytes/nn/modules.py:466)
+W0327 19:01:32.668000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [14/8]    last reason: 14/0: ___check_obj_id(L['self'].compute_type_is_set, 109972630010656)
+W0327 19:01:32.668000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [14/8] To log all recompilation reasons, use TORCH_LOGS="recompiles".
+W0327 19:01:32.668000 565954 site-packages/torch/_dynamo/convert_frame.py:906] [14/8] To diagnose recompilation issues, see https://pytorch.org/docs/main/torch.compiler_troubleshooting.html.
+
+ 60%|██████    | 3/5 [00:14<00:07,  3.70s/it]
+
+
+ 60%|██████    | 3/5 [00:14<00:07,  3.70s/it]
+ 80%|████████  | 4/5 [00:14<00:02,  2.31s/it]
+
+
+ 80%|████████  | 4/5 [00:14<00:02,  2.31s/it]
+100%|██████████| 5/5 [00:15<00:00,  1.55s/it]
+
+
+100%|██████████| 5/5 [00:15<00:00,  1.55s/it]
+
+
+100%|██████████| 5/5 [00:15<00:00,  1.55s/it]
+100%|██████████| 5/5 [00:15<00:00,  3.10s/it]

From a2543dc9bf96fa1d4b24d50d48f15c2db91b3c77 Mon Sep 17 00:00:00 2001
From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com>
Date: Mon, 7 Apr 2025 09:56:59 -0400
Subject: [PATCH 3/8] Add compile debug example script

---
 examples/compile_debug.py | 61 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 examples/compile_debug.py

diff --git a/examples/compile_debug.py b/examples/compile_debug.py
new file mode 100644
index 000000000..1b6bfe949
--- /dev/null
+++ b/examples/compile_debug.py
@@ -0,0 +1,61 @@
+import logging
+
+import torch
+import torch._dynamo
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+
+torch._logging.set_logs(
+    dynamo=logging.INFO,
+    graph_breaks=True,
+    recompiles=True,
+    recompiles_verbose=True,
+    compiled_autograd_verbose=True,
+)
+
+torch._dynamo.config.suppress_errors = False
+
+
+torch.set_float32_matmul_precision("high")
+
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+)
+
+# torch._dynamo.config.capture_dynamic_output_shape_ops = True
+
+# model_id = "google/gemma-2-2b-it"
+model_id = "Qwen/Qwen2.5-7B"
+
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    quantization_config=quantization_config,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+
+input_text = "Write me a poem about Machine Learning."
+input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
+
+compile_options = {
+    # "epilogue_fusion": True,
+    # "shape_padding": True,
+    # "trace.enabled"     : True,
+    # "triton.cudagraphs" : False,
+}
+
+# warmup
+outputs = model.generate(**input_ids, max_new_tokens=32)
+print(tokenizer.decode(outputs[0]))
+
+# compile
+
+model.forward = torch.compile(model.forward, dynamic=True, fullgraph=True, options=compile_options)
+
+# model = torch.compile(model, dynamic=True, fullgraph=True, options=compile_options)
+
+outputs = model.generate(**input_ids, max_new_tokens=32)
+print(tokenizer.decode(outputs[0]))

From 4e41a4bfca78881ddc61ff204c01557f784849a0 Mon Sep 17 00:00:00 2001
From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com>
Date: Mon, 7 Apr 2025 10:27:16 -0400
Subject: [PATCH 4/8] Update example

---
 examples/compile_debug.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/compile_debug.py b/examples/compile_debug.py
index 1b6bfe949..3fb8b18c1 100644
--- a/examples/compile_debug.py
+++ b/examples/compile_debug.py
@@ -7,13 +7,15 @@
 torch._logging.set_logs(
     dynamo=logging.INFO,
     graph_breaks=True,
-    recompiles=True,
-    recompiles_verbose=True,
-    compiled_autograd_verbose=True,
+    recompiles=False,
+    recompiles_verbose=False,
+    compiled_autograd_verbose=False,
 )
 
 torch._dynamo.config.suppress_errors = False
 
+torch._dynamo.config.capture_scalar_outputs = True
+torch._dynamo.config.capture_dynamic_output_shape_ops = True
 
 torch.set_float32_matmul_precision("high")
 

From b4370b828be2c23f3b58cc76cc9fba06a0a16b01 Mon Sep 17 00:00:00 2001
From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com>
Date: Mon, 7 Apr 2025 11:06:41 -0400
Subject: [PATCH 5/8] Try removing extra transpose ops in 4bit

---
 bitsandbytes/autograd/_functions.py | 4 ++--
 bitsandbytes/nn/modules.py          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
index 5df8a0979..dc7b4ee9e 100644
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@@ -327,7 +327,7 @@ def forward(ctx, A, B, out=None, bias=None, quant_state: Optional[F.QuantState]
 
         # 1. Dequantize
         # 2. MatmulnN
-        output = torch.nn.functional.linear(A, F.dequantize_4bit(B, quant_state).to(A.dtype).t(), bias)
+        output = torch.nn.functional.linear(A, F.dequantize_4bit(B, quant_state).to(A.dtype), bias)
 
         # 3. Save state
         ctx.state = quant_state
@@ -393,7 +393,7 @@ def matmul_4bit(
             )
             return MatMul4Bit.apply(A, B, out, bias, quant_state)
         else:
-            out = F.gemv_4bit(A, B.t(), out, state=quant_state)
+            out = F.gemv_4bit(A, B, out, state=quant_state)
             if bias is not None:
                 out += bias
             return out
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
index dfa688abb..3df5da396 100644
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -480,7 +480,7 @@ def forward(self, x: torch.Tensor):
 
         bias = None if self.bias is None else self.bias.to(self.compute_dtype)
 
-        return bnb.matmul_4bit(x, self.weight.t(), bias=bias, quant_state=self.weight.quant_state).to(inp_dtype)
+        return bnb.matmul_4bit(x, self.weight.data, bias=bias, quant_state=self.weight.quant_state).to(inp_dtype)
 
 
 class LinearFP4(Linear4bit):

From f09812ddeaab2ad4c0f9e017f56116ad822aa59f Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 8 Apr 2025 11:36:00 +0000
Subject: [PATCH 6/8] attempt at conforming to flatten/unflatten protocol

---
 bitsandbytes/nn/modules.py | 45 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
index 3df5da396..b666309a9 100644
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -344,6 +344,49 @@ def to(self, *args, **kwargs):
 
             return new_param
 
+    def __tensor_flatten__(self):
+        """Return data tensor and non-tensor context"""
+        ctx = {
+            "quant_state": self.quant_state,
+            "blocksize": self.blocksize,
+            "compress_statistics": self.compress_statistics,
+            "quant_type": self.quant_type,
+            "quant_storage": self.quant_storage,
+            "module": self.module,
+            "bnb_quantized": self.bnb_quantized,
+        }
+        return ["data"], ctx
+
+    @staticmethod
+    def __tensor_unflatten__(inner_tensors, ctx, outer_size, outer_stride):
+        """Reconstruct Params4bit from components"""
+        data = inner_tensors["data"]
+        return Params4bit(
+            data,
+            requires_grad=data.requires_grad,
+            quant_state=ctx["quant_state"],
+            blocksize=ctx["blocksize"],
+            compress_statistics=ctx["compress_statistics"],
+            quant_type=ctx["quant_type"],
+            quant_storage=ctx["quant_storage"],
+            module=ctx["module"],
+            bnb_quantized=ctx["bnb_quantized"],
+        )
+
+    def detach(self):
+        """Create new instance preserving quantization state"""
+        return type(self)(
+            self.data.detach(),
+            requires_grad=self.requires_grad,
+            quant_state=self.quant_state,
+            blocksize=self.blocksize,
+            compress_statistics=self.compress_statistics,
+            quant_type=self.quant_type,
+            quant_storage=self.quant_storage,
+            module=self.module,
+            bnb_quantized=self.bnb_quantized,
+        )
+
 
 def fix_4bit_weight_quant_state_from_module(module: Union["Embedding4bit", "Linear4bit"]):
     if getattr(module.weight, "quant_state", None) is not None:
@@ -480,7 +523,7 @@ def forward(self, x: torch.Tensor):
 
         bias = None if self.bias is None else self.bias.to(self.compute_dtype)
 
-        return bnb.matmul_4bit(x, self.weight.data, bias=bias, quant_state=self.weight.quant_state).to(inp_dtype)
+        return bnb.matmul_4bit(x, self.weight, bias=bias, quant_state=self.weight.quant_state).to(inp_dtype)
 
 
 class LinearFP4(Linear4bit):

From e25e0ab9c746cf1647a3f4f8154d1294664222c2 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Mon, 14 Apr 2025 13:51:13 +0000
Subject: [PATCH 7/8] wip on torch flatten unflatten, etc

---
 bitsandbytes/nn/modules.py | 78 ++++++++++++++++++++++++++++++++------
 1 file changed, 67 insertions(+), 11 deletions(-)

diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
index b666309a9..362b820ce 100644
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -222,6 +222,21 @@ def __new__(
         if data is None:
             data = torch.empty(0)
 
+        # Handle FakeTensor creation during dynamo tracing
+        if torch._dynamo.is_compiling() and not isinstance(data, cls):
+            if isinstance(data, torch._subclasses.FakeTensor):
+                param = data.as_subclass(cls)
+                param.requires_grad = requires_grad
+                param.quant_state = quant_state
+                param.blocksize = blocksize
+                param.compress_statistics = compress_statistics
+                param.quant_type = quant_type
+                param.quant_storage = quant_storage
+                param.module = module
+                param.bnb_quantized = bnb_quantized
+                return param
+
+        # Standard initialization for real tensors
         self = torch.Tensor._make_subclass(cls, data, requires_grad)
         self.blocksize = blocksize
         self.compress_statistics = compress_statistics
@@ -324,26 +339,23 @@ def to(self: T, dtype: Union[dtype, str], non_blocking: bool = ...) -> T: ...
     def to(self: T, tensor: Tensor, non_blocking: bool = ...) -> T: ...
 
     def to(self, *args, **kwargs):
-        device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
+        device, dtype, non_blocking, _ = torch._C._nn._parse_to(*args, **kwargs)
 
         if device is not None and device.type == "cuda" and not self.bnb_quantized:
             return self._quantize(device)
         else:
-            if self.quant_state is not None:
-                self.quant_state.to(device)
-
-            new_param = Params4bit(
+            return Params4bit(
                 super().to(device=device, dtype=dtype, non_blocking=non_blocking),
                 requires_grad=self.requires_grad,
-                quant_state=self.quant_state,
+                quant_state=self.quant_state.to(device) if self.quant_state else None,
                 blocksize=self.blocksize,
                 compress_statistics=self.compress_statistics,
                 quant_type=self.quant_type,
                 quant_storage=self.quant_storage,
+                module=self.module,
+                bnb_quantized=self.bnb_quantized,
             )
 
-            return new_param
-
     def __tensor_flatten__(self):
         """Return data tensor and non-tensor context"""
         ctx = {
@@ -361,6 +373,20 @@ def __tensor_flatten__(self):
     def __tensor_unflatten__(inner_tensors, ctx, outer_size, outer_stride):
         """Reconstruct Params4bit from components"""
         data = inner_tensors["data"]
+
+        # Special handling for FakeTensor reconstruction
+        if isinstance(data, torch._subclasses.FakeTensor):
+            param = data.as_subclass(Params4bit)
+            param.blocksize = ctx["blocksize"]
+            param.compress_statistics = ctx["compress_statistics"]
+            param.quant_type = ctx["quant_type"]
+            param.quant_state = ctx["quant_state"]
+            param.quant_storage = ctx["quant_storage"]
+            param.module = ctx["module"]
+            param.bnb_quantized = ctx["bnb_quantized"]
+            return param
+
+        # Standard reconstruction for real tensors
         return Params4bit(
             data,
             requires_grad=data.requires_grad,
@@ -373,6 +399,21 @@ def __tensor_unflatten__(inner_tensors, ctx, outer_size, outer_stride):
             bnb_quantized=ctx["bnb_quantized"],
         )
 
+    @classmethod
+    def __torch_function__(cls, func, types, args=(), kwargs=None):
+        # Type preservation through ops
+        result = super().__torch_function__(func, types, args, kwargs or {})
+        if isinstance(result, torch.Tensor) and not isinstance(result, cls):
+            return result.as_subclass(cls)
+        return result
+
+    @classmethod
+    def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
+        # Delegate to FakeTensor implementation when needed
+        if any(isinstance(x, torch._subclasses.FakeTensor) for x in args):
+            return torch._C.DispatchKey.Fake(func(*args, **(kwargs or {})))
+        return super().__torch_dispatch__(func, types, args, kwargs)
+
     def detach(self):
         """Create new instance preserving quantization state"""
         return type(self)(
@@ -460,21 +501,36 @@ def __init__(
             bias (`bool`, defaults to `True`):
                 Whether the linear class uses the bias term as well.
         """
-        super().__init__(input_features, output_features, bias, device)
+        # Bypass nn.Linear's parameter initialization
+        super(nn.Linear, self).__init__()
+        self.in_features = input_features
+        self.out_features = output_features
+
+        # Manually register parameters
         self.weight = Params4bit(
-            self.weight.data,
+            torch.empty((output_features, input_features), dtype=quant_storage),
             requires_grad=False,
             compress_statistics=compress_statistics,
             quant_type=quant_type,
             quant_storage=quant_storage,
             module=self,
         )
-        # self.persistent_buffers = []  # TODO consider as way to save quant state
+
+        if bias:
+            self.bias = nn.Parameter(torch.empty(output_features))
+        else:
+            self.register_parameter("bias", None)
+
+        self.reset_parameters()
         self.compute_dtype = compute_dtype
         self.compute_type_is_set = False
         self.quant_state = None
         self.quant_storage = quant_storage
 
+    def reset_parameters(self):
+        # Disable standard initialization
+        pass
+
     def set_compute_type(self, x):
         if x.dtype in [torch.float32, torch.bfloat16]:
             # the input is in a dtype that is safe to compute in, we switch

From da96ddef0944020d3395ed85625e2dec170e8aab Mon Sep 17 00:00:00 2001
From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com>
Date: Mon, 14 Apr 2025 10:11:18 -0400
Subject: [PATCH 8/8] Update params4bit __torch_function__

---
 bitsandbytes/nn/modules.py | 139 +++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 66 deletions(-)

diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
index 362b820ce..f9eb8af56 100644
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -223,18 +223,18 @@ def __new__(
             data = torch.empty(0)
 
         # Handle FakeTensor creation during dynamo tracing
-        if torch._dynamo.is_compiling() and not isinstance(data, cls):
-            if isinstance(data, torch._subclasses.FakeTensor):
-                param = data.as_subclass(cls)
-                param.requires_grad = requires_grad
-                param.quant_state = quant_state
-                param.blocksize = blocksize
-                param.compress_statistics = compress_statistics
-                param.quant_type = quant_type
-                param.quant_storage = quant_storage
-                param.module = module
-                param.bnb_quantized = bnb_quantized
-                return param
+        # if torch._dynamo.is_compiling() and not isinstance(data, cls):
+        #     if isinstance(data, torch._subclasses.FakeTensor):
+        #         param = data.as_subclass(cls)
+        #         param.requires_grad = requires_grad
+        #         param.quant_state = quant_state
+        #         param.blocksize = blocksize
+        #         param.compress_statistics = compress_statistics
+        #         param.quant_type = quant_type
+        #         param.quant_storage = quant_storage
+        #         param.module = module
+        #         param.bnb_quantized = bnb_quantized
+        #         return param
 
         # Standard initialization for real tensors
         self = torch.Tensor._make_subclass(cls, data, requires_grad)
@@ -356,63 +356,70 @@ def to(self, *args, **kwargs):
                 bnb_quantized=self.bnb_quantized,
             )
 
-    def __tensor_flatten__(self):
-        """Return data tensor and non-tensor context"""
-        ctx = {
-            "quant_state": self.quant_state,
-            "blocksize": self.blocksize,
-            "compress_statistics": self.compress_statistics,
-            "quant_type": self.quant_type,
-            "quant_storage": self.quant_storage,
-            "module": self.module,
-            "bnb_quantized": self.bnb_quantized,
-        }
-        return ["data"], ctx
-
-    @staticmethod
-    def __tensor_unflatten__(inner_tensors, ctx, outer_size, outer_stride):
-        """Reconstruct Params4bit from components"""
-        data = inner_tensors["data"]
-
-        # Special handling for FakeTensor reconstruction
-        if isinstance(data, torch._subclasses.FakeTensor):
-            param = data.as_subclass(Params4bit)
-            param.blocksize = ctx["blocksize"]
-            param.compress_statistics = ctx["compress_statistics"]
-            param.quant_type = ctx["quant_type"]
-            param.quant_state = ctx["quant_state"]
-            param.quant_storage = ctx["quant_storage"]
-            param.module = ctx["module"]
-            param.bnb_quantized = ctx["bnb_quantized"]
-            return param
-
-        # Standard reconstruction for real tensors
-        return Params4bit(
-            data,
-            requires_grad=data.requires_grad,
-            quant_state=ctx["quant_state"],
-            blocksize=ctx["blocksize"],
-            compress_statistics=ctx["compress_statistics"],
-            quant_type=ctx["quant_type"],
-            quant_storage=ctx["quant_storage"],
-            module=ctx["module"],
-            bnb_quantized=ctx["bnb_quantized"],
-        )
+    # def __tensor_flatten__(self):
+    #     """Return data tensor and non-tensor context"""
+    #     ctx = {
+    #         "quant_state": self.quant_state,
+    #         "blocksize": self.blocksize,
+    #         "compress_statistics": self.compress_statistics,
+    #         "quant_type": self.quant_type,
+    #         "quant_storage": self.quant_storage,
+    #         "module": self.module,
+    #         "bnb_quantized": self.bnb_quantized,
+    #     }
+    #     return ["data"], ctx
+
+    # @staticmethod
+    # def __tensor_unflatten__(inner_tensors, ctx, outer_size, outer_stride):
+    #     """Reconstruct Params4bit from components"""
+    #     data = inner_tensors["data"]
+
+    #     # Special handling for FakeTensor reconstruction
+    #     if isinstance(data, torch._subclasses.FakeTensor):
+    #         param = data.as_subclass(Params4bit)
+    #         param.blocksize = ctx["blocksize"]
+    #         param.compress_statistics = ctx["compress_statistics"]
+    #         param.quant_type = ctx["quant_type"]
+    #         param.quant_state = ctx["quant_state"]
+    #         param.quant_storage = ctx["quant_storage"]
+    #         param.module = ctx["module"]
+    #         param.bnb_quantized = ctx["bnb_quantized"]
+    #         return param
+
+    #     # Standard reconstruction for real tensors
+    #     return Params4bit(
+    #         data,
+    #         requires_grad=data.requires_grad,
+    #         quant_state=ctx["quant_state"],
+    #         blocksize=ctx["blocksize"],
+    #         compress_statistics=ctx["compress_statistics"],
+    #         quant_type=ctx["quant_type"],
+    #         quant_storage=ctx["quant_storage"],
+    #         module=ctx["module"],
+    #         bnb_quantized=ctx["bnb_quantized"],
+    #     )
 
     @classmethod
     def __torch_function__(cls, func, types, args=(), kwargs=None):
-        # Type preservation through ops
-        result = super().__torch_function__(func, types, args, kwargs or {})
-        if isinstance(result, torch.Tensor) and not isinstance(result, cls):
-            return result.as_subclass(cls)
-        return result
-
-    @classmethod
-    def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
-        # Delegate to FakeTensor implementation when needed
-        if any(isinstance(x, torch._subclasses.FakeTensor) for x in args):
-            return torch._C.DispatchKey.Fake(func(*args, **(kwargs or {})))
-        return super().__torch_dispatch__(func, types, args, kwargs)
+        if kwargs is None:
+            kwargs = {}
+        with torch._C.DisableTorchFunctionSubclass():
+            return func(*args, **kwargs)
+
+    # @classmethod
+    # def __torch_function__(cls, func, types, args=(), kwargs=None):
+    #     # Type preservation through ops
+    #     result = super().__torch_function__(func, types, args, kwargs or {})
+    #     if isinstance(result, torch.Tensor) and not isinstance(result, cls):
+    #         return result.as_subclass(cls)
+    #     return result
+
+    # @classmethod
+    # def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
+    #     # Delegate to FakeTensor implementation when needed
+    #     if any(isinstance(x, torch._subclasses.FakeTensor) for x in args):
+    #         return torch._C.DispatchKey.Fake(func(*args, **(kwargs or {})))
+    #     return super().__torch_dispatch__(func, types, args, kwargs)
 
     def detach(self):
         """Create new instance preserving quantization state"""