PyTorch internals. How PyTorch start?

Andrei Li
2 min readFeb 3, 2020

--

PyTorch initialization

When you import torch the control passed totorch/init.py which calls from torch._C import * It is responsible for the initialization of the PyTorch C modules within PyTorch. The file responsible for torch._C is located in torch/csrc/Module.cpp

In torch/init.py the following libraries are initialized:
- _C._initExtension(manager_path())
- _C._init_names(list(torch._storage_classes))

In torch/csrc/Module.CPP THPModule_initExtension initializes:
- torch::utils::initializeLayouts();
- torch::utils::initializeMemoryFormats();
- torch::utils::initializeQSchemes();
- torch::utils::initializeDtypes();
- torch::tensors::initialize_python_bindings();

Tensor storage is initialized in the same file:

auto module = THPObjectPtr(PyImport_ImportModule("torch"));if (!module) throw python_error();THPDoubleStorage_postInit(module);THPFloatStorage_postInit(module);THPHalfStorage_postInit(module);THPLongStorage_postInit(module);THPIntStorage_postInit(module);THPShortStorage_postInit(module);THPCharStorage_postInit(module);THPByteStorage_postInit(module);THPBoolStorage_postInit(module);THPQUInt8Storage_postInit(module);THPQInt8Storage_postInit(module);THPQInt32Storage_postInit(module);THPBFloat16Storage_postInit(module);THPAutograd_initFunctions();

THPModule_initNames initialized names in the same file.

static PyObject * THPModule_initNames(PyObject *self, PyObject *arg)
{
static std::vector<std::string> names;
THPObjectPtr types(PySequence_Fast(arg, "expected a sequence"));
if (!types) return nullptr;
int num_classes = PySequence_Fast_GET_SIZE(types.get());
names.reserve(names.size() + num_classes);
for (size_t i = 0; i < num_classes; i++) {
PyObject* obj = PySequence_Fast_GET_ITEM(types.get(), i);
THPUtils_assert(PyType_Check(obj), "expected a PyTypeObject");
PyTypeObject* type = (PyTypeObject*)obj;
THPObjectPtr module_name(PyObject_GetAttrString(obj, "__module__"));
if (!module_name) return nullptr;
THPUtils_assert(THPUtils_checkString(module_name.get()),
"expected __module__ to be a string");
std::string name = THPUtils_unpackString(module_name.get());
names.push_back(name + "." + type->tp_name);
type->tp_name = names.back().c_str();
}
Py_RETURN_NONE;
}

The following code should have placed at the beginning but you would probably lose what I am talking about. This part of the code makes everything available in torch._C

PyObject* initModule() {
HANDLE_TH_ERRORS
at::init_num_threads();
C10_LOG_API_USAGE_ONCE("torch.python.import");// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define ASSERT_TRUE(cmd) if (!(cmd)) return nullptr
THPUtils_addPyMethodDefs(methods, TorchMethods);
THPUtils_addPyMethodDefs(methods, DataLoaderMethods);
THPUtils_addPyMethodDefs(methods, torch::autograd::python_functions());
THPUtils_addPyMethodDefs(methods, torch::multiprocessing::python_functions());
#ifdef USE_CUDA
THPUtils_addPyMethodDefs(methods, THCPModule_methods());
#endif
#ifdef USE_CUDNN
THPUtils_addPyMethodDefs(methods, THCUDNN_methods());
#endif
#ifdef USE_DISTRIBUTED
#ifdef USE_C10D
THPUtils_addPyMethodDefs(methods, torch::distributed::c10d::python_functions());
THPUtils_addPyMethodDefs(methods, torch::distributed::rpc::python_functions());
THPUtils_addPyMethodDefs(
methods, torch::distributed::autograd::python_functions());
#endif
#endif

This part of code in torch/csrc/Module.cpp creates torch._C :

static struct PyModuleDef torchmodule = {
PyModuleDef_HEAD_INIT,
"torch._C",
nullptr,
-1,
methods.data()
};
ASSERT_TRUE(module = PyModule_Create(&torchmodule));

This part of the code in torch/csrc/Module.cpp initializes the rest of the PyTorch C++ part.

ASSERT_TRUE(THPWrapper_init(module));
ASSERT_TRUE(THPGenerator_init(module));
ASSERT_TRUE(THPException_init(module));
THPSize_init(module);
THPDtype_init(module);
THPDTypeInfo_init(module);
THPLayout_init(module);
THPMemoryFormat_init(module);
THPQScheme_init(module);
THPDevice_init(module);
ASSERT_TRUE(THPVariable_initModule(module));
ASSERT_TRUE(THPFunction_initModule(module));
ASSERT_TRUE(THPEngine_initModule(module));
// NOTE: We need to be able to access OperatorExportTypes from ONNX for use in
// the export side of JIT, so this ONNX init needs to appear before the JIT
// init.
torch::onnx::initONNXBindings(module);
torch::jit::initJITBindings(module);
torch::throughput_benchmark::initThroughputBenchmarkBindings(module);
torch::autograd::initNNFunctions(module);
torch::autograd::init_legacy_variable(module);
torch::python::init_bindings(module);

--

--

Andrei Li

Born in 1983. MBA from IE Business School, PhD in economics.