Advertisement
osmarks

Untitled

Mar 17th, 2023
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.94 KB | None | 0 0
  1. ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
  2. │ /srv/home/rwkv_quant/rwkv_quantizer.py:70 in <module> │
  3. │ │
  4. │ 67 │
  5. │ 68 print("run forward pass") │
  6. │ 69 ctx = tokenizer.tokenizer.encode("\nIn a shocking finding, scientist discovered a herd o │
  7. │ ❱ 70 out = model.forward(th.tensor(ctx).unsqueeze(0).cuda())[0].detach().cpu().numpy() │
  8. │ 71 print(out) │
  9. │ 72 │
  10. │ │
  11. │ /srv/home/rwkv_quant/rwkv_quantizer.py:31 in forward │
  12. │ │
  13. │ 28 │ │ assert T <= self.ctx_len, "Cannot forward, because len(input) > model ctx_len." │
  14. │ 29 │ │ │
  15. │ 30 │ │ x = self.emb(idx) │
  16. │ ❱ 31 │ │ x = self.blocks(x) │
  17. │ 32 │ │ x = self.ln_out(x) │
  18. │ 33 │ │ │
  19. │ 34 │ │ if RWKV_HEAD_QK_DIM > 0: │
  20. │ │
  21. │ /usr/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
  22. │ │
  23. │ 1191 │ │ # this function, and just call forward. │
  24. │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
  25. │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
  26. │ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
  27. │ 1195 │ │ # Do not call functions when jit is used │
  28. │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
  29. │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
  30. │ │
  31. │ /usr/lib/python3.10/site-packages/torch/nn/modules/container.py:204 in forward │
  32. │ │
  33. │ 201 │ # with Any as TorchScript expects a more precise type │
  34. │ 202 │ def forward(self, input): │
  35. │ 203 │ │ for module in self: │
  36. │ ❱ 204 │ │ │ input = module(input) │
  37. │ 205 │ │ return input │
  38. │ 206 │ │
  39. │ 207 │ def append(self, module: Module) -> 'Sequential': │
  40. │ │
  41. │ /usr/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
  42. │ │
  43. │ 1191 │ │ # this function, and just call forward. │
  44. │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
  45. │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
  46. │ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
  47. │ 1195 │ │ # Do not call functions when jit is used │
  48. │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
  49. │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
  50. │ │
  51. │ /srv/home/rwkv_quant/RWKV-LM/RWKV-v4/./src/model.py:312 in forward │
  52. │ │
  53. │ 309 │ │ if self.layer_id == 0 and self.config.model_type == 'RWKV-ffnPre': │
  54. │ 310 │ │ │ x = x + self.ffnPre(self.ln1(x)) # better in some cases │
  55. │ 311 │ │ else: │
  56. │ ❱ 312 │ │ │ x = x + self.att(self.ln1(x)) │
  57. │ 313 │ │ x = x + self.ffn(self.ln2(x)) │
  58. │ 314 │ │ return x │
  59. │ 315 │
  60. │ │
  61. │ /usr/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
  62. │ │
  63. │ 1191 │ │ # this function, and just call forward. │
  64. │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
  65. │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
  66. │ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
  67. │ 1195 │ │ # Do not call functions when jit is used │
  68. │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
  69. │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
  70. │ │
  71. │ /srv/home/rwkv_quant/RWKV-LM/RWKV-v4/./src/model.py:231 in forward │
  72. │ │
  73. │ 228 │ │ │
  74. │ 229 │ │ sr, k, v = self.jit_func(x) │
  75. │ 230 │ │ │
  76. │ ❱ 231 │ │ rwkv = sr * RUN_CUDA(B, T, C, self.time_decay, self.time_first, k, v) │
  77. │ 232 │ │ rwkv = self.output(rwkv) │
  78. │ 233 │ │ return rwkv │
  79. │ 234 │
  80. │ │
  81. │ /srv/home/rwkv_quant/RWKV-LM/RWKV-v4/./src/model.py:102 in RUN_CUDA │
  82. │ │
  83. │ 99 │ │ │ return (None, None, None, gw.bfloat16(), gu.bfloat16(), gk.bfloat16(), gv.bf │
  84. │ 100 │
  85. │ 101 def RUN_CUDA(B, T, C, w, u, k, v): │
  86. │ ❱ 102 │ return WKV.apply(B, T, C, w.cuda(), u.cuda(), k.cuda(), v.cuda()) │
  87. │ 103 │
  88. │ 104 ######################################################################################## │
  89. │ 105 # RWKV: RWKV Time-mix + RWKV Channel-mix │
  90. │ │
  91. │ /srv/home/rwkv_quant/RWKV-LM/RWKV-v4/./src/model.py:55 in forward │
  92. │ │
  93. │ 52 │ │ ctx.C = C │
  94. │ 53 │ │ assert T <= T_MAX │
  95. │ 54 │ │ print(B, C) │
  96. │ ❱ 55 │ │ assert B * C % min(C, 1024) == 0 │
  97. │ 56 │ │ if '32' in os.environ['RWKV_FLOAT_MODE']: │
  98. │ 57 │ │ │ w = -torch.exp(w.contiguous()) │
  99. │ 58 │ │ │ u = u.contiguous() │
  100. ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
  101. AssertionError
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement