PyTorch internals. How PyTorch start?

2 min readFeb 3, 2020

When you import torch the control passed totorch/init.py which calls from torch._C import * It is responsible for the initialization of the PyTorch C modules within PyTorch. The file responsible for torch._C is located in torch/csrc/Module.cpp

In torch/init.py the following libraries are initialized:
- _C._initExtension(manager_path())
- _C._init_names(list(torch._storage_classes))

In torch/csrc/Module.CPP THPModule_initExtension initializes:
- torch::utils::initializeLayouts();
- torch::utils::initializeMemoryFormats();
- torch::utils::initializeQSchemes();
- torch::utils::initializeDtypes();
- torch::tensors::initialize_python_bindings();

Tensor storage is initialized in the same file:

auto module = THPObjectPtr(PyImport_ImportModule("torch"));if (!module) throw python_error();THPDoubleStorage_postInit(module);THPFloatStorage_postInit(module);THPHalfStorage_postInit(module);THPLongStorage_postInit(module);THPIntStorage_postInit(module);THPShortStorage_postInit(module);THPCharStorage_postInit(module);THPByteStorage_postInit(module);THPBoolStorage_postInit(module);THPQUInt8Storage_postInit(module);THPQInt8Storage_postInit(module);THPQInt32Storage_postInit(module);THPBFloat16Storage_postInit(module);THPAutograd_initFunctions();

THPModule_initNames initialized names in the same file.

static PyObject * THPModule_initNames(PyObject *self, PyObject *arg)
{
  static std::vector<std::string> names;THPObjectPtr types(PySequence_Fast(arg, "expected a sequence"));
  if (!types) return nullptr;int num_classes = PySequence_Fast_GET_SIZE(types.get());
  names.reserve(names.size() + num_classes);
  for (size_t i = 0; i < num_classes; i++) {
    PyObject* obj = PySequence_Fast_GET_ITEM(types.get(), i);
    THPUtils_assert(PyType_Check(obj), "expected a PyTypeObject");
    PyTypeObject* type = (PyTypeObject*)obj;THPObjectPtr module_name(PyObject_GetAttrString(obj, "__module__"));
    if (!module_name) return nullptr;
    THPUtils_assert(THPUtils_checkString(module_name.get()),
        "expected __module__ to be a string");
    std::string name = THPUtils_unpackString(module_name.get());
    names.push_back(name + "." + type->tp_name);
    type->tp_name = names.back().c_str();
  }
  Py_RETURN_NONE;
}

The following code should have placed at the beginning but you would probably lose what I am talking about. This part of the code makes everything available in torch._C

PyObject* initModule() {
  HANDLE_TH_ERRORS
  at::init_num_threads();C10_LOG_API_USAGE_ONCE("torch.python.import");// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define ASSERT_TRUE(cmd) if (!(cmd)) return nullptrTHPUtils_addPyMethodDefs(methods, TorchMethods);
  THPUtils_addPyMethodDefs(methods, DataLoaderMethods);
  THPUtils_addPyMethodDefs(methods, torch::autograd::python_functions());
  THPUtils_addPyMethodDefs(methods, torch::multiprocessing::python_functions());
#ifdef USE_CUDA
  THPUtils_addPyMethodDefs(methods, THCPModule_methods());
#endif
#ifdef USE_CUDNN
  THPUtils_addPyMethodDefs(methods, THCUDNN_methods());
#endif
#ifdef USE_DISTRIBUTED
#ifdef USE_C10D
  THPUtils_addPyMethodDefs(methods, torch::distributed::c10d::python_functions());
  THPUtils_addPyMethodDefs(methods, torch::distributed::rpc::python_functions());
  THPUtils_addPyMethodDefs(
      methods, torch::distributed::autograd::python_functions());
#endif
#endif

This part of code in torch/csrc/Module.cpp creates torch._C :

static struct PyModuleDef torchmodule = {
     PyModuleDef_HEAD_INIT,
     "torch._C",
     nullptr,
     -1,
     methods.data()
  };
  ASSERT_TRUE(module = PyModule_Create(&torchmodule));

This part of the code in torch/csrc/Module.cpp initializes the rest of the PyTorch C++ part.

ASSERT_TRUE(THPWrapper_init(module));
  ASSERT_TRUE(THPGenerator_init(module));
  ASSERT_TRUE(THPException_init(module));
  THPSize_init(module);
  THPDtype_init(module);
  THPDTypeInfo_init(module);
  THPLayout_init(module);
  THPMemoryFormat_init(module);
  THPQScheme_init(module);
  THPDevice_init(module);
  ASSERT_TRUE(THPVariable_initModule(module));
  ASSERT_TRUE(THPFunction_initModule(module));
  ASSERT_TRUE(THPEngine_initModule(module));
  // NOTE: We need to be able to access OperatorExportTypes from ONNX for use in
  // the export side of JIT, so this ONNX init needs to appear before the JIT
  // init.
  torch::onnx::initONNXBindings(module);
  torch::jit::initJITBindings(module);
  torch::throughput_benchmark::initThroughputBenchmarkBindings(module);
  torch::autograd::initNNFunctions(module);
  torch::autograd::init_legacy_variable(module);
  torch::python::init_bindings(module);

PyTorch internals. How PyTorch start?

Written by Andrei Li