IgANet
IgANets - Isogeometric Analysis Networks
Loading...
Searching...
No Matches
core.hpp
Go to the documentation of this file.
1
15#pragma once
16
17#include <config.hpp>
18
19#include <array>
20#include <fstream>
21#include <iostream>
22#include <tuple>
23#include <vector>
24
25#include <utils/getenv.hpp>
26
27#ifdef IGANET_WITH_OPENMP
28#include <omp.h>
29#endif
30
31#ifdef IGANET_WITH_MPI
32#ifndef USE_C10D_MPI
33#error "Torch must be compiled with USE_DISTRIBUTED=1, USE_MPI=1, USE_C10_MPI=1"
34#endif
35#include <torch/csrc/distributed/c10d/ProcessGroupMPI.hpp>
36#endif
37
38#include <torch/csrc/api/include/torch/types.h>
39#include <torch/torch.h>
40
41#ifdef CUDA_VERSION
42#include <c10/cuda/CUDACachingAllocator.h>
43#include <c10/cuda/CUDAFunctions.h>
44#endif
45
46#ifdef HIP_VERSION
47#include <c10/hip/HIPCachingAllocator.h>
48#include <c10/hip/HIPFunctions.h>
49#endif
50
51#ifdef IGANET_WITH_GISMO
52#include <gismo.h>
53#include <gsModeling/gsSurfaceReparameterization.h>
54
55#ifdef gsElasticity_ENABLED
56#include <gsElasticity/src/gsElasticityAssembler.h>
57#include <gsElasticity/src/gsGeoUtils.h>
58#include <gsElasticity/src/gsMassAssembler.h>
59#endif
60#endif
61
62#undef real_t
63#undef index_t
64#undef short_t
65
66#ifdef IGANET_WITH_MATPLOT
67#include <matplot/matplot.h>
68#endif
69
70#include <sysinfo.hpp>
71
72namespace iganet {
73
74using short_t = short int;
75
76namespace literals {
77
80inline short_t operator""_s(unsigned long long value) { return value; };
81inline int8_t operator""_i8(unsigned long long value) { return value; };
82inline int16_t operator""_i16(unsigned long long value) { return value; };
83inline int32_t operator""_i32(unsigned long long value) { return value; };
84inline int64_t operator""_i64(unsigned long long value) { return value; };
86} // namespace literals
87
88// clang-format off
90enum class log : short_t {
91 none = 0,
92 fatal = 1,
93 error = 2,
94 warning = 3,
95 info = 4,
96 debug = 5,
97 verbose = 6
98};
99// clang-format on
100
101namespace logging {
103class NullStreamBuffer : public std::streambuf {
104public:
106 int overflow(int c) override { return traits_type::not_eof(c); }
107};
108
110class NullOStream : public std::ostream {
111public:
114
115private:
117};
118} // namespace logging
119
121struct {
122private:
124 std::ostream &outputStream = std::cout;
125
127 logging::NullOStream nullStream;
128
130 std::ofstream outputFile;
131
133 enum log level = log::info;
134
135public:
137 void setLogLevel(enum log level) { this->level = level; }
138
140 void setLogFile(std::string filename) {
141 outputFile = std::ofstream(filename);
142 outputStream.rdbuf(outputFile.rdbuf());
143 }
144
146 std::ostream &operator()(enum log level = log::info) {
147 if (this->level >= level)
148 switch (level) {
149 case (log::fatal):
150 return outputStream << "[FATAL ERROR] ";
151 case (log::error):
152 return outputStream << "[ERROR] ";
153 case (log::warning):
154 return outputStream << "[WARNING] ";
155 case (log::info):
156 return outputStream << "[INFO] ";
157 case (log::debug):
158 return outputStream << "[DEBUG] ";
159 case (log::verbose):
160 return outputStream << "[VERBOSE] ";
161 default:
162 return nullStream;
163 }
164 else
165 return nullStream;
166 }
168
171inline std::string memory_summary(c10::DeviceIndex device =
173 c10::cuda::current_device()
175 c10::hip::current_device()
176#else
177 0
178#endif
179) {
180
181 std::ostringstream os;
182
183#if defined(CUDA_VERSION) || defined(HIP_VERSION)
184
185 auto _format_size = [](int64_t bytes) -> std::string {
186 if (bytes == 0)
187 return "0 B";
188
189 std::array<std::string, 6> prefixes{"B", "KiB", "MiB", "GiB", "TiB", "PiB"};
190 int64_t n = std::floor(std::max(0.0, std::log2(static_cast<double>(bytes) /
191 static_cast<double>(768))) /
192 static_cast<double>(10));
193
194 return std::to_string((int64_t)(bytes / std::pow(1024, n))) + " " +
195 prefixes[n];
196 };
197
198#if TORCH_VERSION_MAJOR > 2 || \
199 (TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR > 4)
200 using namespace c10::CachingDeviceAllocator;
201#endif
202
203#ifdef CUDA_VERSION
204 using namespace c10::cuda::CUDACachingAllocator;
205#elif HIP_VERSION
206 using namespace c10::hip::HIPCachingAllocator;
207#endif
208
210
211 os << "|====================================================================="
212 "======|\n"
213#ifdef CUDA_VERSION
214 << "| LibTorch CUDA memory summary, device ID "
215#elif HIP_VERSION
216 << "| LibTorch ROCm memory summary, device ID "
217#endif
218 << std::setw(18) << std::left << static_cast<int>(device) << "|\n"
219 << "|---------------------------------------------------------------------"
220 "------|\n"
221#ifdef CUDA_VERSION
222 << "| CUDA OOMs: "
223#elif HIP_VERSION
224 << "| ROCm OOMs: "
225#endif
226 << std::setw(13) << std::left << deviceStats.num_ooms
227#ifdef CUDA_VERSION
228 << "| cudaMalloc retries: "
229#elif HIP_VERSION
230 << "| hipMalloc retries: "
231#endif
232 << std::setw(10) << std::left << deviceStats.num_alloc_retries << "|\n"
233 << "|====================================================================="
234 "======|\n"
235 << "| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot "
236 "Freed |\n"
237 << "|---------------------------------------------------------------------"
238 "------|\n"
239 << "| Allocated memory | " << std::setw(10) << std::right
240 << _format_size(
242 .allocated_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
243 .current)
244 << " | " << std::setw(10) << std::right
245 << _format_size(
247 .allocated_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
248 .peak)
249 << " | " << std::setw(10) << std::right
250 << _format_size(
252 .allocated_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
253 .allocated)
254 << " | " << std::setw(10) << std::right
255 << _format_size(
257 .allocated_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
258 .freed)
259 << " |\n"
260 << "| from large pool | " << std::setw(10) << std::right
261 << _format_size(
263 .allocated_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
264 .current)
265 << " | " << std::setw(10) << std::right
266 << _format_size(
268 .allocated_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
269 .peak)
270 << " | " << std::setw(10) << std::right
271 << _format_size(
273 .allocated_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
274 .allocated)
275 << " | " << std::setw(10) << std::right
276 << _format_size(
278 .allocated_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
279 .freed)
280 << " |\n"
281 << "| from small pool | " << std::setw(10) << std::right
282 << _format_size(
284 .allocated_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
285 .current)
286 << " | " << std::setw(10) << std::right
287 << _format_size(
289 .allocated_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
290 .peak)
291 << " | " << std::setw(10) << std::right
292 << _format_size(
294 .allocated_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
295 .allocated)
296 << " | " << std::setw(10) << std::right
297 << _format_size(
299 .allocated_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
300 .freed)
301 << " |\n"
302 << "|---------------------------------------------------------------------"
303 "------|\n"
304 << "| Active memory | " << std::setw(10) << std::right
305 << _format_size(
307 .active_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
308 .current)
309 << " | " << std::setw(10) << std::right
310 << _format_size(
312 .active_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
313 .peak)
314 << " | " << std::setw(10) << std::right
315 << _format_size(
317 .active_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
318 .allocated)
319 << " | " << std::setw(10) << std::right
320 << _format_size(
322 .active_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
323 .freed)
324 << " |\n"
325 << "| from large pool | " << std::setw(10) << std::right
326 << _format_size(
328 .active_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
329 .current)
330 << " | " << std::setw(10) << std::right
331 << _format_size(
333 .active_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
334 .peak)
335 << " | " << std::setw(10) << std::right
336 << _format_size(
338 .active_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
339 .allocated)
340 << " | " << std::setw(10) << std::right
341 << _format_size(
343 .active_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
344 .freed)
345 << " |\n"
346 << "| from small pool | " << std::setw(10) << std::right
347 << _format_size(
349 .active_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
350 .current)
351 << " | " << std::setw(10) << std::right
352 << _format_size(
354 .active_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
355 .peak)
356 << " | " << std::setw(10) << std::right
357 << _format_size(
359 .active_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
360 .allocated)
361 << " | " << std::setw(10) << std::right
362 << _format_size(
364 .active_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
365 .freed)
366 << " |\n"
367 << "|---------------------------------------------------------------------"
368 "------|\n"
369 << "| Requested memory | " << std::setw(10) << std::right
370 << _format_size(
372 .requested_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
373 .current)
374 << " | " << std::setw(10) << std::right
375 << _format_size(
377 .requested_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
378 .peak)
379 << " | " << std::setw(10) << std::right
380 << _format_size(
382 .requested_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
383 .allocated)
384 << " | " << std::setw(10) << std::right
385 << _format_size(
387 .requested_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
388 .freed)
389 << " |\n"
390 << "| from large pool | " << std::setw(10) << std::right
391 << _format_size(
393 .requested_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
394 .current)
395 << " | " << std::setw(10) << std::right
396 << _format_size(
398 .requested_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
399 .peak)
400 << " | " << std::setw(10) << std::right
401 << _format_size(
403 .requested_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
404 .allocated)
405 << " | " << std::setw(10) << std::right
406 << _format_size(
408 .requested_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
409 .freed)
410 << " |\n"
411 << "| from small pool | " << std::setw(10) << std::right
412 << _format_size(
414 .requested_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
415 .current)
416 << " | " << std::setw(10) << std::right
417 << _format_size(
419 .requested_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
420 .peak)
421 << " | " << std::setw(10) << std::right
422 << _format_size(
424 .requested_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
425 .allocated)
426 << " | " << std::setw(10) << std::right
427 << _format_size(
429 .requested_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
430 .freed)
431 << " |\n"
432 << "|---------------------------------------------------------------------"
433 "------|\n"
434 << "| GPU reserved memory | " << std::setw(10) << std::right
435 << _format_size(
437 .reserved_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
438 .current)
439 << " | " << std::setw(10) << std::right
440 << _format_size(
442 .reserved_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
443 .peak)
444 << " | " << std::setw(10) << std::right
445 << _format_size(
447 .reserved_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
448 .allocated)
449 << " | " << std::setw(10) << std::right
450 << _format_size(
452 .reserved_bytes[static_cast<std::size_t>(StatType::AGGREGATE)]
453 .freed)
454 << " |\n"
455 << "| from large pool | " << std::setw(10) << std::right
456 << _format_size(
458 .reserved_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
459 .current)
460 << " | " << std::setw(10) << std::right
461 << _format_size(
463 .reserved_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
464 .peak)
465 << " | " << std::setw(10) << std::right
466 << _format_size(
468 .reserved_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
469 .allocated)
470 << " | " << std::setw(10) << std::right
471 << _format_size(
473 .reserved_bytes[static_cast<std::size_t>(StatType::LARGE_POOL)]
474 .freed)
475 << " |\n"
476 << "| from small pool | " << std::setw(10) << std::right
477 << _format_size(
479 .reserved_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
480 .current)
481 << " | " << std::setw(10) << std::right
482 << _format_size(
484 .reserved_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
485 .peak)
486 << " | " << std::setw(10) << std::right
487 << _format_size(
489 .reserved_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
490 .allocated)
491 << " | " << std::setw(10) << std::right
492 << _format_size(
494 .reserved_bytes[static_cast<std::size_t>(StatType::SMALL_POOL)]
495 .freed)
496 << " |\n"
497 << "|---------------------------------------------------------------------"
498 "------|\n"
499 << "| Non-releasable memory | " << std::setw(10) << std::right
501 .inactive_split_bytes[static_cast<std::size_t>(
502 StatType::AGGREGATE)]
503 .current)
504 << " | " << std::setw(10) << std::right
506 .inactive_split_bytes[static_cast<std::size_t>(
507 StatType::AGGREGATE)]
508 .peak)
509 << " | " << std::setw(10) << std::right
511 .inactive_split_bytes[static_cast<std::size_t>(
512 StatType::AGGREGATE)]
513 .allocated)
514 << " | " << std::setw(10) << std::right
516 .inactive_split_bytes[static_cast<std::size_t>(
517 StatType::AGGREGATE)]
518 .freed)
519 << " |\n"
520 << "| from large pool | " << std::setw(10) << std::right
522 .inactive_split_bytes[static_cast<std::size_t>(
523 StatType::LARGE_POOL)]
524 .current)
525 << " | " << std::setw(10) << std::right
527 .inactive_split_bytes[static_cast<std::size_t>(
528 StatType::LARGE_POOL)]
529 .peak)
530 << " | " << std::setw(10) << std::right
532 .inactive_split_bytes[static_cast<std::size_t>(
533 StatType::LARGE_POOL)]
534 .allocated)
535 << " | " << std::setw(10) << std::right
537 .inactive_split_bytes[static_cast<std::size_t>(
538 StatType::LARGE_POOL)]
539 .freed)
540 << " |\n"
541 << "| from small pool | " << std::setw(10) << std::right
543 .inactive_split_bytes[static_cast<std::size_t>(
544 StatType::SMALL_POOL)]
545 .current)
546 << " | " << std::setw(10) << std::right
548 .inactive_split_bytes[static_cast<std::size_t>(
549 StatType::SMALL_POOL)]
550 .peak)
551 << " | " << std::setw(10) << std::right
553 .inactive_split_bytes[static_cast<std::size_t>(
554 StatType::SMALL_POOL)]
555 .allocated)
556 << " | " << std::setw(10) << std::right
558 .inactive_split_bytes[static_cast<std::size_t>(
559 StatType::SMALL_POOL)]
560 .freed)
561 << " |\n"
562 << "|---------------------------------------------------------------------"
563 "------|\n"
564 << "| Allocations | " << std::setw(10) << std::right
565 << deviceStats.allocation[static_cast<std::size_t>(StatType::AGGREGATE)]
566 .current
567 << " | " << std::setw(10) << std::right
568 << deviceStats.allocation[static_cast<std::size_t>(StatType::AGGREGATE)]
569 .peak
570 << " | " << std::setw(10) << std::right
571 << deviceStats.allocation[static_cast<std::size_t>(StatType::AGGREGATE)]
572 .allocated
573 << " | " << std::setw(10) << std::right
574 << deviceStats.allocation[static_cast<std::size_t>(StatType::AGGREGATE)]
575 .freed
576 << " |\n"
577 << "|---------------------------------------------------------------------"
578 "------|\n"
579 << "| from large pool | " << std::setw(10) << std::right
580 << deviceStats.allocation[static_cast<std::size_t>(StatType::LARGE_POOL)]
581 .current
582 << " | " << std::setw(10) << std::right
583 << deviceStats.allocation[static_cast<std::size_t>(StatType::LARGE_POOL)]
584 .peak
585 << " | " << std::setw(10) << std::right
586 << deviceStats.allocation[static_cast<std::size_t>(StatType::LARGE_POOL)]
587 .allocated
588 << " | " << std::setw(10) << std::right
589 << deviceStats.allocation[static_cast<std::size_t>(StatType::LARGE_POOL)]
590 .freed
591 << " |\n"
592 << "|---------------------------------------------------------------------"
593 "------|\n"
594 << "| from small pool | " << std::setw(10) << std::right
595 << deviceStats.allocation[static_cast<std::size_t>(StatType::SMALL_POOL)]
596 .current
597 << " | " << std::setw(10) << std::right
598 << deviceStats.allocation[static_cast<std::size_t>(StatType::SMALL_POOL)]
599 .peak
600 << " | " << std::setw(10) << std::right
601 << deviceStats.allocation[static_cast<std::size_t>(StatType::SMALL_POOL)]
602 .allocated
603 << " | " << std::setw(10) << std::right
604 << deviceStats.allocation[static_cast<std::size_t>(StatType::SMALL_POOL)]
605 .freed
606 << " |\n"
607 << "|---------------------------------------------------------------------"
608 "------|\n"
609 << "| Active allocs | " << std::setw(10) << std::right
610 << deviceStats.active[static_cast<std::size_t>(StatType::AGGREGATE)]
611 .current
612 << " | " << std::setw(10) << std::right
613 << deviceStats.active[static_cast<std::size_t>(StatType::AGGREGATE)].peak
614 << " | " << std::setw(10) << std::right
615 << deviceStats.active[static_cast<std::size_t>(StatType::AGGREGATE)]
616 .allocated
617 << " | " << std::setw(10) << std::right
618 << deviceStats.active[static_cast<std::size_t>(StatType::AGGREGATE)].freed
619 << " |\n"
620 << "|---------------------------------------------------------------------"
621 "------|\n"
622 << "| from large pool | " << std::setw(10) << std::right
623 << deviceStats.active[static_cast<std::size_t>(StatType::LARGE_POOL)]
624 .current
625 << " | " << std::setw(10) << std::right
626 << deviceStats.active[static_cast<std::size_t>(StatType::LARGE_POOL)].peak
627 << " | " << std::setw(10) << std::right
628 << deviceStats.active[static_cast<std::size_t>(StatType::LARGE_POOL)]
629 .allocated
630 << " | " << std::setw(10) << std::right
631 << deviceStats.active[static_cast<std::size_t>(StatType::LARGE_POOL)].freed
632 << " |\n"
633 << "|---------------------------------------------------------------------"
634 "------|\n"
635 << "| from small pool | " << std::setw(10) << std::right
636 << deviceStats.active[static_cast<std::size_t>(StatType::SMALL_POOL)]
637 .current
638 << " | " << std::setw(10) << std::right
639 << deviceStats.active[static_cast<std::size_t>(StatType::SMALL_POOL)].peak
640 << " | " << std::setw(10) << std::right
641 << deviceStats.active[static_cast<std::size_t>(StatType::SMALL_POOL)]
642 .allocated
643 << " | " << std::setw(10) << std::right
644 << deviceStats.active[static_cast<std::size_t>(StatType::SMALL_POOL)].freed
645 << " |\n"
646 << "|---------------------------------------------------------------------"
647 "------|\n"
648 << "| GPU reserved segments | " << std::setw(10) << std::right
649 << deviceStats.segment[static_cast<std::size_t>(StatType::AGGREGATE)]
650 .current
651 << " | " << std::setw(10) << std::right
652 << deviceStats.segment[static_cast<std::size_t>(StatType::AGGREGATE)].peak
653 << " | " << std::setw(10) << std::right
654 << deviceStats.segment[static_cast<std::size_t>(StatType::AGGREGATE)]
655 .allocated
656 << " | " << std::setw(10) << std::right
657 << deviceStats.segment[static_cast<std::size_t>(StatType::AGGREGATE)].freed
658 << " |\n"
659 << "|---------------------------------------------------------------------"
660 "------|\n"
661 << "| from large pool | " << std::setw(10) << std::right
662 << deviceStats.segment[static_cast<std::size_t>(StatType::LARGE_POOL)]
663 .current
664 << " | " << std::setw(10) << std::right
665 << deviceStats.segment[static_cast<std::size_t>(StatType::LARGE_POOL)].peak
666 << " | " << std::setw(10) << std::right
667 << deviceStats.segment[static_cast<std::size_t>(StatType::LARGE_POOL)]
668 .allocated
669 << " | " << std::setw(10) << std::right
670 << deviceStats.segment[static_cast<std::size_t>(StatType::LARGE_POOL)]
671 .freed
672 << " |\n"
673 << "|---------------------------------------------------------------------"
674 "------|\n"
675 << "| from small pool | " << std::setw(10) << std::right
676 << deviceStats.segment[static_cast<std::size_t>(StatType::SMALL_POOL)]
677 .current
678 << " | " << std::setw(10) << std::right
679 << deviceStats.segment[static_cast<std::size_t>(StatType::SMALL_POOL)].peak
680 << " | " << std::setw(10) << std::right
681 << deviceStats.segment[static_cast<std::size_t>(StatType::SMALL_POOL)]
682 .allocated
683 << " | " << std::setw(10) << std::right
684 << deviceStats.segment[static_cast<std::size_t>(StatType::SMALL_POOL)]
685 .freed
686 << " |\n"
687 << "|---------------------------------------------------------------------"
688 "------|\n"
689 << "| Non-releasable allocs | " << std::setw(10) << std::right
690 << deviceStats
691 .inactive_split[static_cast<std::size_t>(StatType::AGGREGATE)]
692 .current
693 << " | " << std::setw(10) << std::right
694 << deviceStats
695 .inactive_split[static_cast<std::size_t>(StatType::AGGREGATE)]
696 .peak
697 << " | " << std::setw(10) << std::right
698 << deviceStats
699 .inactive_split[static_cast<std::size_t>(StatType::AGGREGATE)]
700 .allocated
701 << " | " << std::setw(10) << std::right
702 << deviceStats
703 .inactive_split[static_cast<std::size_t>(StatType::AGGREGATE)]
704 .freed
705 << " |\n"
706 << "|---------------------------------------------------------------------"
707 "------|\n"
708 << "| from large pool | " << std::setw(10) << std::right
709 << deviceStats
710 .inactive_split[static_cast<std::size_t>(StatType::LARGE_POOL)]
711 .current
712 << " | " << std::setw(10) << std::right
713 << deviceStats
714 .inactive_split[static_cast<std::size_t>(StatType::LARGE_POOL)]
715 .peak
716 << " | " << std::setw(10) << std::right
717 << deviceStats
718 .inactive_split[static_cast<std::size_t>(StatType::LARGE_POOL)]
719 .allocated
720 << " | " << std::setw(10) << std::right
721 << deviceStats
722 .inactive_split[static_cast<std::size_t>(StatType::LARGE_POOL)]
723 .freed
724 << " |\n"
725 << "|---------------------------------------------------------------------"
726 "------|\n"
727 << "| from small pool | " << std::setw(10) << std::right
728 << deviceStats
729 .inactive_split[static_cast<std::size_t>(StatType::SMALL_POOL)]
730 .current
731 << " | " << std::setw(10) << std::right
732 << deviceStats
733 .inactive_split[static_cast<std::size_t>(StatType::SMALL_POOL)]
734 .peak
735 << " | " << std::setw(10) << std::right
736 << deviceStats
737 .inactive_split[static_cast<std::size_t>(StatType::SMALL_POOL)]
738 .allocated
739 << " | " << std::setw(10) << std::right
740 << deviceStats
741 .inactive_split[static_cast<std::size_t>(StatType::SMALL_POOL)]
742 .freed
743 << " |\n"
744 << "|---------------------------------------------------------------------"
745 "------|\n"
746 << "| Oversize allocations | " << std::setw(10) << std::right
747 << deviceStats.oversize_allocations.current << " | " << std::setw(10)
748 << std::right << deviceStats.oversize_allocations.peak << " | "
749 << std::setw(10) << std::right
750 << deviceStats.oversize_allocations.allocated << " | " << std::setw(10)
751 << std::right << deviceStats.oversize_allocations.freed << " |\n"
752 << "|---------------------------------------------------------------------"
753 "------|\n"
754 << "| Oversize GPU segments | " << std::setw(10) << std::right
755 << deviceStats.oversize_segments.current << " | " << std::setw(10)
756 << std::right << deviceStats.oversize_segments.peak << " | "
757 << std::setw(10) << std::right << deviceStats.oversize_segments.allocated
758 << " | " << std::setw(10) << std::right
759 << deviceStats.oversize_segments.freed << " |\n"
760 << "|====================================================================="
761 "======|";
762#else
763 os << "Memory summary is only available for CUDA/HIP devices";
764#endif
765
766 return os.str();
767}
768
770inline void init(std::ostream &os = Log(log::info)) {
771 torch::manual_seed(1);
772
773 // Set number of intraop thread pool threads
774#ifdef IGANET_WITH_OPENMP
775 at::set_num_threads(
776 utils::getenv("IGANET_INTRAOP_NUM_THREADS", omp_get_max_threads()));
777#else
778 at::set_num_threads(utils::getenv("IGANET_INTRAOP_NUM_THREADS", 1));
779#endif
780
781 // Set number of interop thread pool threads
782 at::set_num_interop_threads(utils::getenv("IGANET_INTEROP_NUM_THREADS", 1));
783
784#ifdef IGANET_WITH_MPI
785 int flag;
787
788 if (flag == 0)
789 if (MPI_Init(NULL, NULL) != MPI_SUCESS)
790 throw std::runtime_error("An error occured during MPI initialization");
791
792 int rank;
794 if (rank == 0)
795#endif
796 // Output version information
797 os << getVersion();
798}
799
801inline void finalize(std::ostream &os = Log(log::info)) {
802
803#if defined(CUDA_VERSION) || defined(HIP_VERSION)
804 std::cout << "\n" << memory_summary() << std::endl;
805#endif
806
807#ifdef IGANET_WITH_MPI
808 if (MPI_Finalize() != MPI_SUCCESS)
809 throw std::runtime_error("An error occured during MPI finalization");
810#endif
811
812 os << "Succeeded\n";
813}
814
817inline int get_iomanip() {
818 static int i = std::ios_base::xalloc();
819 return i;
820}
821
822inline std::ostream &verbose(std::ostream &os) {
823 os.iword(get_iomanip()) = 1;
824 return os;
825}
826inline std::ostream &regular(std::ostream &os) {
827 os.iword(get_iomanip()) = 0;
828 return os;
829}
830
831inline bool is_verbose(std::ostream &os) {
832 return os.iword(get_iomanip()) != 0;
833}
835
836} // namespace iganet
837
838namespace std {
839
841template <typename T, std::size_t N>
842inline std::ostream &operator<<(std::ostream &os, const std::array<T, N> &obj) {
843 at::optional<std::string> name_ = c10::demangle(typeid(obj).name());
844
845#if defined(_WIN32)
846 // Windows adds "struct" or "class" as a prefix.
847 if (name_->find("struct ") == 0) {
848 name_->erase(name_->begin(), name_->begin() + 7);
849 } else if (name_->find("class ") == 0) {
850 name_->erase(name_->begin(), name_->begin() + 6);
851 }
852#endif // defined(_WIN32)
853
854 os << *name_ << "(";
855 for (const auto &i : obj)
856 os << i << (&i == &(*obj.rbegin()) ? "" : ",");
857 os << ")";
858
859 return os;
860}
861
862namespace detail {
863template <typename... Ts, std::size_t... Is>
864inline std::ostream &output_tuple(std::ostream &os,
865 const std::tuple<Ts...> &obj,
866 std::index_sequence<Is...>) {
867 (..., (os << std::get<Is>(obj) << "\n"));
868 return os;
869}
870
871} // namespace detail
872
874template <typename... Ts>
875inline std::ostream &operator<<(std::ostream &os,
876 const std::tuple<Ts...> &obj) {
877 at::optional<std::string> name_ = c10::demangle(typeid(obj).name());
878
879#if defined(_WIN32)
880 // Windows adds "struct" or "class" as a prefix.
881 if (name_->find("struct ") == 0) {
882 name_->erase(name_->begin(), name_->begin() + 7);
883 } else if (name_->find("class ") == 0) {
884 name_->erase(name_->begin(), name_->begin() + 6);
885 }
886#endif // defined(_WIN32)
887
888 os << *name_ << "(\n";
889 detail::output_tuple(os, obj, std::make_index_sequence<sizeof...(Ts)>());
890 os << "\n)";
891
892 return os;
893}
894
895} // namespace std
Dummy output stream.
Definition core.hpp:110
NullOStream()
Constructor.
Definition core.hpp:113
NullStreamBuffer nullStreamBuffer
Definition core.hpp:116
Dummy stream buffer.
Definition core.hpp:103
int overflow(int c) override
Dummy output.
Definition core.hpp:106
Environment utility function.
T getenv(std::string variable, const T &default_value)
Returns the value from an environment variable.
Definition getenv.hpp:24
Definition boundary.hpp:22
bool is_verbose(std::ostream &os)
Definition core.hpp:831
constexpr bool is_SplineType_v
Alias to the value of is_SplineType.
Definition bspline.hpp:3243
void finalize(std::ostream &os=Log(log::info))
Finalizes the library.
Definition core.hpp:801
struct iganet::@0 Log
Logger.
init
Enumerator for specifying the initialization of B-spline coefficients.
Definition bspline.hpp:55
log
Enumerator for specifying the logging level.
Definition core.hpp:90
@ none
Definition boundary.hpp:38
std::ostream & regular(std::ostream &os)
Definition core.hpp:826
short int short_t
Definition core.hpp:74
std::string memory_summary(c10::DeviceIndex device=0)
Return a human-readable printout of the current memory allocator statistics for a given device.
Definition core.hpp:171
int get_iomanip()
Definition core.hpp:817
std::string getVersion()
Returns version information.
Definition sysinfo.hpp:662
std::ostream & output_tuple(std::ostream &os, const std::tuple< Ts... > &obj, std::index_sequence< Is... >)
Definition core.hpp:864
STL namespace.
std::ostream & operator<<(std::ostream &os, const std::array< T, N > &obj)
Print (as string) an std::array of generic objects.
Definition core.hpp:842
System information.