Advertisement
kopyl

without torch.distributed.run

Jul 21st, 2023
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.52 KB | None | 0 0
  1. Not using distributed mode
  2. 2023-07-21 12:01:49,218 [INFO]
  3. ===== Running Parameters =====
  4. 2023-07-21 12:01:49,218 [INFO] {
  5. "amp": true,
  6. "batch_size_eval": 1,
  7. "batch_size_train": 3,
  8. "device": "cuda",
  9. "dist_url": "env://",
  10. "distributed": false,
  11. "evaluate": false,
  12. "init_lr": 5e-06,
  13. "iters_per_inner_epoch": 40,
  14. "lr_sched": "constant_lr",
  15. "max_iters": 40,
  16. "min_lr": 0,
  17. "num_workers": 4,
  18. "output_dir": "train_output",
  19. "resume_ckpt_path": null,
  20. "runner": "runner_iter",
  21. "seed": 42,
  22. "task": "text-to-image-generation",
  23. "train_splits": [
  24. "train"
  25. ],
  26. "weight_decay": 0.01,
  27. "world_size": 1
  28. }
  29. 2023-07-21 12:01:49,218 [INFO]
  30. ====== Dataset Attributes ======
  31. 2023-07-21 12:01:49,218 [INFO]
  32. ======== blip_diffusion_finetune =======
  33. 2023-07-21 12:01:49,219 [INFO] {
  34. "build_info": {
  35. "images": {
  36. "storage": "train_images"
  37. },
  38. "subject_text": "feigin"
  39. },
  40. "data_type": "images",
  41. "kw_processor": {
  42. "inp_vis_processor": {
  43. "name": "blip_diffusion_inp_image_train"
  44. },
  45. "tgt_vis_processor": {
  46. "name": "blip_diffusion_tgt_image_train"
  47. }
  48. },
  49. "text_processor": {
  50. "eval": {
  51. "name": "blip_caption"
  52. },
  53. "train": {
  54. "name": "blip_caption"
  55. }
  56. }
  57. }
  58. 2023-07-21 12:01:49,219 [INFO]
  59. ====== Model Attributes ======
  60. 2023-07-21 12:01:49,219 [INFO] {
  61. "arch": "blip_diffusion",
  62. "load_finetuned": false,
  63. "load_pretrained": true,
  64. "model_type": "base",
  65. "pretrained": "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP-Diffusion/blip-diffusion.tar.gz",
  66. "qformer_cross_attention_freq": 1,
  67. "qformer_num_query_token": 16,
  68. "qformer_train": false,
  69. "sd_pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5",
  70. "sd_train_text_encoder": false,
  71. "vae_half_precision": true,
  72. "vit_model": "clip_L"
  73. }
  74. /workspace/LAVIS/lavis/datasets/builders/base_dataset_builder.py:164: UserWarning:
  75. The specified path /export/home/.cache/lavis/train_images for visual inputs does not exist.
  76. Please provide a correct path to the visual inputs or
  77. refer to datasets/download_scripts/README.md for downloading instructions.
  78.  
  79. warnings.warn(
  80. 2023-07-21 12:01:49,222 [INFO] Building datasets...
  81. 2023-07-21 12:01:52,190 [INFO] freeze vision encoder
  82. Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with:
  83. ```
  84. pip install accelerate
  85. ```
  86. .
  87. Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with:
  88. ```
  89. pip install accelerate
  90. ```
  91. .
  92. /usr/local/lib/python3.10/dist-packages/diffusers/configuration_utils.py:215: FutureWarning: It is deprecated to pass a pretrained model name or path to `from_config`.If you were trying to load a scheduler, please use <class 'diffusers.schedulers.scheduling_ddpm.DDPMScheduler'>.from_pretrained(...) instead. Otherwise, please make sure to pass a configuration dictionary instead. This functionality will be removed in v1.0.0.
  93. deprecate("config-passed-as-path", "1.0.0", deprecation_message, standard_warn=False)
  94. 2023-07-21 12:02:04,271 [INFO] Loading pretrained model from /root/.cache/torch/hub/checkpoints/blip-diffusion
  95. No ctx_embeddings_cache found in /root/.cache/torch/hub/checkpoints/blip-diffusion
  96. 2023-07-21 12:02:06,932 [INFO] Start training, max_iters=40, in total 1 inner epochs.
  97. 2023-07-21 12:02:08,532 [INFO] dataset_ratios not specified, datasets will be concatenated (map-style datasets) or chained (webdataset.DataPipeline).
  98. 2023-07-21 12:02:08,533 [INFO] Loaded 2200000 records for train split from the dataset.
  99. 2023-07-21 12:02:08,579 [INFO] number of trainable parameters: 859520964
  100. 2023-07-21 12:02:08,580 [INFO] Start training epoch 0, 40 iters per inner epoch.
  101. Traceback (most recent call last):
  102. File "/workspace/LAVIS/train.py", line 103, in <module>
  103. main()
  104. File "/workspace/LAVIS/train.py", line 99, in main
  105. runner.train()
  106. File "/workspace/LAVIS/lavis/runners/runner_iter.py", line 99, in train
  107. train_stats = self.train_iters(self.cur_epoch, start_iters)
  108. File "/workspace/LAVIS/lavis/runners/runner_iter.py", line 145, in train_iters
  109. return self.task.train_iters(
  110. File "/workspace/LAVIS/lavis/tasks/base_task.py", line 144, in train_iters
  111. return self._train_inner_loop(
  112. File "/workspace/LAVIS/lavis/tasks/base_task.py", line 222, in _train_inner_loop
  113. loss, loss_dict = self.train_step(model=model, samples=samples)
  114. File "/workspace/LAVIS/lavis/tasks/base_task.py", line 64, in train_step
  115. output = model(samples)
  116. File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
  117. return forward_call(*args, **kwargs)
  118. File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 363, in _forward_unimplemented
  119. raise NotImplementedError(f"Module [{type(self).__name__}] is missing the required \"forward\" function")
  120. NotImplementedError: Module [BlipDiffusion] is missing the required "forward" function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement