Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [
- {
- "GPU": "BC-250",
- "Tok/s": "26.89 -33.52 tokens/s",
- "TFLOPS": "",
- "Format": "GGUF",
- "Cost": "$20",
- "Loading Secs": "21.49secs",
- "2nd Load": "",
- "Context (max)s": "",
- "Context sent": "109 tokens",
- "VRAM": "",
- "TDP": "",
- "watts inference": "197W",
- "Watts idle(Loaded)": "85W* - 101W",
- "Watts idle (0B VRAM)": "85W* - 101W",
- "Notes": "* 101W stock on P4.00G Bios. 85W with oberon-governor Single node on APW3+ and 12V Delta blower fan."
- },
- {
- "GPU": "P102-100",
- "Tok/s": "22.62 tokens/s",
- "TFLOPS": "10.77 fp32",
- "Format": "GGUF",
- "Cost": "$40",
- "Loading Secs": "11.4secs",
- "2nd Load": "",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9320MB",
- "TDP": "250W",
- "watts inference": "140-220W",
- "Watts idle(Loaded)": "9W",
- "Watts idle (0B VRAM)": "9W",
- "Notes": ""
- },
- {
- "GPU": "P104-100 Q6_K_L",
- "Tok/s": "16.92 tokens/s",
- "TFLOPS": "6.655 fp32",
- "Format": "GGUF",
- "Cost": "$30",
- "Loading Secs": "26.33secs",
- "2nd Load": "16.24secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7362MB",
- "TDP": "180W",
- "watts inference": "85-155W",
- "Watts idle(Loaded)": "5W",
- "Watts idle (0B VRAM)": "5W",
- "Notes": ""
- },
- {
- "GPU": "M40",
- "Tok/s": "15.67 tokens/s",
- "TFLOPS": "6.832 fp32",
- "Format": "GGUF",
- "Cost": "$40",
- "Loading Secs": "23.44secs",
- "2nd Load": "2.4secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9292MB",
- "TDP": "250W",
- "watts inference": "125-220W",
- "Watts idle(Loaded)": "62W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": "CUDA error: CUDA-capable device(s) is/are busy or unavailable"
- },
- {
- "GPU": "GTX 1060 Q4_K_M",
- "Tok/s": "15.17 tokens/s",
- "TFLOPS": "4.375 fp32",
- "Format": "GGUF",
- "Cost": "",
- "Loading Secs": "",
- "2nd Load": "2.02secs",
- "Context (max)s": "4096",
- "Context sent": "109 tokens",
- "VRAM": "5278MB",
- "TDP": "120W",
- "watts inference": "65-120W",
- "Watts idle(Loaded)": "5W",
- "Watts idle (0B VRAM)": "5W",
- "Notes": ""
- },
- {
- "GPU": "GTX 1070 ti Q6_K_L",
- "Tok/s": "17.28 tokens/s",
- "TFLOPS": "8.186 fp32",
- "Format": "GGUF",
- "Cost": "$100",
- "Loading Secs": "19.70secs",
- "2nd Load": "3.55secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7684MB***",
- "TDP": "180W",
- "watts inference": "90-170W",
- "Watts idle(Loaded)": "6W",
- "Watts idle (0B VRAM)": "6W",
- "Notes": "Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf"
- },
- {
- "GPU": "AMD Radeon Instinct MI25",
- "Tok/s": "soon..",
- "TFLOPS": "",
- "Format": "",
- "Cost": "",
- "Loading Secs": "",
- "2nd Load": "",
- "Context (max)s": "",
- "Context sent": "",
- "VRAM": "",
- "TDP": "",
- "watts inference": "",
- "Watts idle(Loaded)": "",
- "Watts idle (0B VRAM)": "",
- "Notes": ""
- },
- {
- "GPU": "P4",
- "Tok/s": "",
- "TFLOPS": "5.704 fp32",
- "Format": "GGUF",
- "Cost": "$100",
- "Loading Secs": "",
- "2nd Load": "",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "",
- "TDP": "75W",
- "watts inference": "",
- "Watts idle(Loaded)": "",
- "Watts idle (0B VRAM)": "",
- "Notes": ""
- },
- {
- "GPU": "P40",
- "Tok/s": "18.56 tokens/s",
- "TFLOPS": "11.76 fp32",
- "Format": "GGUF",
- "Cost": "$300",
- "Loading Secs": "",
- "2nd Load": "3.58secs**",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9341MB",
- "TDP": "250W",
- "watts inference": "90-150W",
- "Watts idle(Loaded)": "50W",
- "Watts idle (0B VRAM)": "10W",
- "Notes": "same inference time with or without flash_attention. **NVME on another server"
- },
- {
- "GPU": "P100",
- "Tok/s": "21.48 tokens/s",
- "TFLOPS": "9.526 fp32",
- "Format": "GGUF",
- "Cost": "$150",
- "Loading Secs": "23.51secs",
- "2nd Load": "",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9448MB",
- "TDP": "250W",
- "watts inference": "80-140W",
- "Watts idle(Loaded)": "33W",
- "Watts idle (0B VRAM)": "26W",
- "Notes": ""
- },
- {
- "GPU": "P100",
- "Tok/s": "29.58 tokens/s",
- "TFLOPS": "19.05 fp16",
- "Format": "EXL2",
- "Cost": "$150",
- "Loading Secs": "22.51secs",
- "2nd Load": "6.95secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9458MB",
- "TDP": "250W",
- "watts inference": "95-150W",
- "Watts idle(Loaded)": "33W",
- "Watts idle (0B VRAM)": "26W",
- "Notes": "no_flash_attn=true"
- },
- {
- "GPU": "CMP 70HX Q6_K_L",
- "Tok/s": "12.8 tokens/s",
- "TFLOPS": "10.71 fp32",
- "Format": "GGUF",
- "Cost": "$150",
- "Loading Secs": "26.7secs",
- "2nd Load": "9secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7693MB",
- "TDP": "220W",
- "watts inference": "80-100W",
- "Watts idle(Loaded)": "65W** 13W setting p-state 8",
- "Watts idle (0B VRAM)": "65W",
- "Notes": "Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf RISER"
- },
- {
- "GPU": "CMP 70HX Q6_K_L",
- "Tok/s": "16.47 tokens/s",
- "TFLOPS": "10.71 fp32",
- "Format": "GGUF/FA",
- "Cost": "$150",
- "Loading Secs": "26.78secs",
- "2nd Load": "9secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7391MB",
- "TDP": "220W",
- "watts inference": "80-110W",
- "Watts idle(Loaded)": "65W",
- "Watts idle (0B VRAM)": "65W",
- "Notes": "flash_attention RISER"
- },
- {
- "GPU": "CMP 70HX 6bpw",
- "Tok/s": "25.12 tokens/s",
- "TFLOPS": "10.71 fp16",
- "Format": "EXL2",
- "Cost": "$150",
- "Loading Secs": "22.07secs",
- "2nd Load": "8.81secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7653MB",
- "TDP": "220W",
- "watts inference": "70-110W",
- "Watts idle(Loaded)": "65W",
- "Watts idle (0B VRAM)": "65W",
- "Notes": "turboderp/Llama-3.1-8B-Instruct-exl2 at 6.0bpw no_flash_attn RISER"
- },
- {
- "GPU": "CMP 70HX 6bpw",
- "Tok/s": "30.08 tokens/s",
- "TFLOPS": "10.71 fp16",
- "Format": "EXL2/FA",
- "Cost": "$150",
- "Loading Secs": "22.22secs",
- "2nd Load": "13.14secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7653MB",
- "TDP": "220W",
- "watts inference": "110W",
- "Watts idle(Loaded)": "65W",
- "Watts idle (0B VRAM)": "65W",
- "Notes": "turboderp/Llama-3.1-8B-Instruct-exl2:6.0bpw RISER"
- },
- {
- "GPU": "GTX 1080ti",
- "Tok/s": "22.80 tokens/s",
- "TFLOPS": "11.34 fp32",
- "Format": "GGUF",
- "Cost": "$160",
- "Loading Secs": "23.99secs",
- "2nd Load": "2.89secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9332MB",
- "TDP": "250W",
- "watts inference": "120-200W",
- "Watts idle(Loaded)": "8W",
- "Watts idle (0B VRAM)": "8W",
- "Notes": "RISER"
- },
- {
- "GPU": "CMP 100-210",
- "Tok/s": "25.07 tokens/s",
- "TFLOPS": "11.75 fp32",
- "Format": "GGUF",
- "Cost": "$150",
- "Loading Secs": "39.98secs",
- "2nd Load": "",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9461MB",
- "TDP": "250W",
- "watts inference": "80-130W",
- "Watts idle(Loaded)": "28W",
- "Watts idle (0B VRAM)": "24W",
- "Notes": "rope_freq_base=0, or coredump"
- },
- {
- "GPU": "CMP 100-210",
- "Tok/s": "40.66 tokens/s",
- "TFLOPS": "23.49 fp16",
- "Format": "EXL2",
- "Cost": "$150",
- "Loading Secs": "41.43secs",
- "2nd Load": "",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9489MB",
- "TDP": "250W",
- "watts inference": "120-170W",
- "Watts idle(Loaded)": "28W",
- "Watts idle (0B VRAM)": "24W",
- "Notes": "no_flash_attn=true"
- },
- {
- "GPU": "RTX 3070 Q6_K_L",
- "Tok/s": "27.96 tokens/s",
- "TFLOPS": "20.31 fp32",
- "Format": "GGUF",
- "Cost": "$250",
- "Loading Secs": "",
- "2nd Load": "5.15secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7765MB",
- "TDP": "240W",
- "watts inference": "145-165W",
- "Watts idle(Loaded)": "23W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": ""
- },
- {
- "GPU": "RTX 3070 Q6_K_L",
- "Tok/s": "29.63 tokens/s",
- "TFLOPS": "20.31 fp32",
- "Format": "GGUF/FA",
- "Cost": "$250",
- "Loading Secs": "22.4secs",
- "2nd Load": "5.3secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7435MB",
- "TDP": "240W",
- "watts inference": "165-185W",
- "Watts idle(Loaded)": "23W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": ""
- },
- {
- "GPU": "RTX 3070 6bpw",
- "Tok/s": "31.36 tokens/s",
- "TFLOPS": "20.31 fp16",
- "Format": "EXL2",
- "Cost": "$250",
- "Loading Secs": "",
- "2nd Load": "5.17secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7707MiB",
- "TDP": "240W",
- "watts inference": "140-155W",
- "Watts idle(Loaded)": "23W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": ""
- },
- {
- "GPU": "RTX 3070 6bpw",
- "Tok/s": "35.27 tokens/s",
- "TFLOPS": "20.31 fp16",
- "Format": "EXL2/FA",
- "Cost": "$250",
- "Loading Secs": "17.48secs",
- "2nd Load": "5.39secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "7707MiB",
- "TDP": "240W",
- "watts inference": "130-145W",
- "Watts idle(Loaded)": "23W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": ""
- },
- {
- "GPU": "Titan V",
- "Tok/s": "37.37 tokens/s",
- "TFLOPS": "14.90 fp32",
- "Format": "GGUF",
- "Cost": "$300",
- "Loading Secs": "23.38 sec",
- "2nd Load": "2.53secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9502MB",
- "TDP": "250W",
- "watts inference": "90W-127W",
- "Watts idle(Loaded)": "25W",
- "Watts idle (0B VRAM)": "25W",
- "Notes": "--tensorcores"
- },
- {
- "GPU": "Titan V",
- "Tok/s": "45.65 tokens/s",
- "TFLOPS": "29.80 fp16",
- "Format": "EXL2",
- "Cost": "$300",
- "Loading Secs": "20.75secs",
- "2nd Load": "6.27secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9422MB",
- "TDP": "250W",
- "watts inference": "110-130W",
- "Watts idle(Loaded)": "25W",
- "Watts idle (0B VRAM)": "23W",
- "Notes": "no_flash_attn=true"
- },
- {
- "GPU": "Tesla T4",
- "Tok/s": "19.57 tokens/s",
- "TFLOPS": "8.141 fp32",
- "Format": "GGUF",
- "Cost": "$500",
- "Loading Secs": "23.72secs",
- "2nd Load": "2.24secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9294MB",
- "TDP": "70W",
- "watts inference": "45-50w",
- "Watts idle(Loaded)": "37W",
- "Watts idle (0B VRAM)": "10-27W",
- "Notes": "Card I had bounced between P0 & P8 idle"
- },
- {
- "GPU": "Tesla T4",
- "Tok/s": "23.99 tokens/s",
- "TFLOPS": "65.13 fp16",
- "Format": "EXL2",
- "Cost": "$500",
- "Loading Secs": "27.04secs",
- "2nd Load": "6.63secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9220MB",
- "TDP": "70W",
- "watts inference": "60-70W",
- "Watts idle(Loaded)": "27W",
- "Watts idle (0B VRAM)": "10-27W",
- "Notes": ""
- },
- {
- "GPU": "Titan RTX",
- "Tok/s": "31.62 tokens/s",
- "TFLOPS": "16.31 fp32",
- "Format": "GGUF",
- "Cost": "$700",
- "Loading Secs": "",
- "2nd Load": "2.93secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9358MB",
- "TDP": "280W",
- "watts inference": "180-210W",
- "Watts idle(Loaded)": "15W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": "--tensorcores"
- },
- {
- "GPU": "Titan RTX",
- "Tok/s": "32.56 tokens/s",
- "TFLOPS": "16.31 fp32",
- "Format": "GGUF/FA",
- "Cost": "$700",
- "Loading Secs": "23.78secs",
- "2nd Load": "2.92secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9056MB",
- "TDP": "280W",
- "watts inference": "185-215W",
- "Watts idle(Loaded)": "15W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": "--tensorcores flash_attn=true"
- },
- {
- "GPU": "Titan RTX",
- "Tok/s": "44.15 tokens/s",
- "TFLOPS": "32.62 fp16",
- "Format": "EXL2",
- "Cost": "$700",
- "Loading Secs": "26.58secs",
- "2nd Load": "6.47secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9246MB",
- "TDP": "280W",
- "watts inference": "220-240W",
- "Watts idle(Loaded)": "15W",
- "Watts idle (0B VRAM)": "15W",
- "Notes": "no_flash_attn=true"
- },
- {
- "GPU": "RTX 3090",
- "Tok/s": "35.13 tokens/s",
- "TFLOPS": "35.58 fp32",
- "Format": "GGUF",
- "Cost": "$700",
- "Loading Secs": "24.00secs",
- "2nd Load": "2.89secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9456MB",
- "TDP": "350W",
- "watts inference": "235-260W",
- "Watts idle(Loaded)": "17W",
- "Watts idle (0B VRAM)": "6W",
- "Notes": ""
- },
- {
- "GPU": "RTX 3090",
- "Tok/s": "36.02 token/s",
- "TFLOPS": "35.58 fp32",
- "Format": "GGUF/FA",
- "Cost": "$700",
- "Loading Secs": "",
- "2nd Load": "2.82secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9154MB",
- "TDP": "350W",
- "watts inference": "260-265W",
- "Watts idle(Loaded)": "17W",
- "Watts idle (0B VRAM)": "6W",
- "Notes": ""
- },
- {
- "GPU": "RTX 3090",
- "Tok/s": "49.11 tokens/s",
- "TFLOPS": "35.58 fp16",
- "Format": "EXL2",
- "Cost": "$700",
- "Loading Secs": "26.14secs",
- "2nd Load": "7.63secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9360MB",
- "TDP": "350W",
- "watts inference": "270-315W",
- "Watts idle(Loaded)": "17W",
- "Watts idle (0B VRAM)": "6W",
- "Notes": ""
- },
- {
- "GPU": "RTX 3090",
- "Tok/s": "54.75 tokens/s",
- "TFLOPS": "35.58 fp16",
- "Format": "EXL2/FA",
- "Cost": "$700",
- "Loading Secs": "",
- "2nd Load": "7.37secs",
- "Context (max)s": "8192",
- "Context sent": "109 tokens",
- "VRAM": "9360MB",
- "TDP": "350W",
- "watts inference": "285-310W",
- "Watts idle(Loaded)": "17W",
- "Watts idle (0B VRAM)": "6W",
- "Notes": ""
- }
- ]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement