diff --git a/vllm_npu/attention/attention_v1.py b/vllm_npu/attention/attention_v1.py
index 39c2d87..da9e7b3 100644
--- a/vllm_npu/attention/attention_v1.py
+++ b/vllm_npu/attention/attention_v1.py
@@ -351,13 +351,15 @@ class AscendAttentionBackendImpl(AttentionImpl):
                 self._key_cache, self._value_cache = kv_cache.unbind(0)
 
             slots = attn_metadata.slot_mapping
-            torch_npu._npu_reshape_and_cache(
-                key=key[:num_actual_tokens],
-                value=value[:num_actual_tokens],
-                key_cache=self._key_cache,
-                value_cache=self._value_cache,
-                slot_indices=slots,
-            )
+            # Pure PyTorch reshape_and_cache (avoids ATB dependency)
+            key_to_cache = key[:num_actual_tokens]
+            val_to_cache = value[:num_actual_tokens]
+            block_size = self._key_cache.shape[1]
+            block_idx = slots // block_size
+            block_offset = slots % block_size
+            self._key_cache[block_idx, block_offset] = key_to_cache
+            self._value_cache[block_idx, block_offset] = val_to_cache
+
 
         # ----------------------------------------------------------
         # Step 2: Compute attention