On Github zolkko / pug-rust
25% better performance than CPython
"maybe like 10% runtime", hot attr
by Kevin Modzelewski @ pyston talk November 24, 2015
LST = list(map(''.join, product('abc', repeat=17)))
def foo(): return map(str.upper, LST)
def bar(): res = [] for i in LST: res.append(i.upper()) return res
def baz(): return [i.upper() for i in LST]
@jit(int32(int32, int32), nopython=True, nogil=True) def add_two(a, b): acc = 0 i = 0 while i < 1000: acc += a + b i += 1 return acc
12 SETUP_LOOP 40 (to 55) 15 LOAD_FAST 3 (i) 18 LOAD_CONST 2 (1000) 21 COMPARE_OP 0 (<) 24 POP_JUMP_IF_FALSE 54 27 LOAD_FAST 2 (acc) 30 LOAD_FAST 0 (a) 33 LOAD_FAST 1 (b) 36 BINARY_ADD 37 INPLACE_ADD 38 STORE_FAST 2 (acc) 41 LOAD_FAST 3 (i) 44 LOAD_CONST 3 (1) 47 INPLACE_ADD 48 STORE_FAST 3 (i) 51 JUMP_ABSOLUTE 15 54 POP_BLOCK
___main__.add_two$1.int32.int32: addl %r8d, %ecx imull $1000, %ecx, %eax movl %eax, (%rdi) xorl %eax, %eax retq
add_two.inspect_asm().values()[0].decode('string_escape')
_wrapper.__main__.add_two$1.int32.int32: movq %rdi, %r14 movabsq $_.const.add_two, %r10 movabsq $_PyArg_UnpackTuple, %r11 ... movabsq $_PyNumber_Long, %r15 callq *%r15 movq %rax, %rbx xorl %r14d, %r14d testq %rbx, %rbx je LBB1_8 movabsq $_PyLong_AsLongLong, %rax ...
+ ~77 Python C API instructions
Rich Wareham, "Creating a toy language with the Python, LLVM and the IPython web notebook" https://www.youtube.com/watch?v=G78cTmgeUxI
// sample.c void initsample(void) { Py_InitModule("sample", NULL); }
static PyObject * add_two(PyObject * self, PyObject * args); static PyMethodDef SampleMethods[] = { {"add_two", add_two, METH_VARARGS, ""}, {NULL, NULL, 0, NULL} }; void initsample(void) { Py_InitModule("sample", SampleMethods); }
PyObject * add_two(PyObject * self, PyObject * args) { int a, b, acc = 0; if (!PyArg_ParseTuple(args, "ii", &a, &b)) { PyErr_SetNone(PyExc_ValueError); return NULL; } for (int i = 0; i < 1000; i++) acc += a + b; return Py_BuildValue("i", acc); }
import sample
IMPORT_NAME 0 (sample) STORE_FAST 0 (sample)
// ceval.c ... w = GETITEM(names, oparg); v = PyDict_GetItemString(f->f_builtins, "__import__"); ... w = PyTuple_Pack(4, w, f->f_globals, f->f_locals == NULL ? Py_None : f->f_locals, v); ... x = PyEval_CallObject(v, w); ... SET_TOP(x); if (x != NULL) DISPATCH(); ...
dl_funcptr _PyImport_GetDynLoadFunc(const char *fqname, const char *shortname, const char *pathname, FILE *fp) { char funcname[258]; PyOS_snprintf(funcname, sizeof(funcname), "init%.200s", shortname); return dl_loadmod(Py_GetProgramName(), pathname, funcname); }
def add_two(a, b): i = acc = 0 while i < 1000: acc += a + b return acc
$ cat sample.c | wc -l 1906
__pyx_t_2 = PyNumber_Add(__pyx_v_a, __pyx_v_b); if (unlikely(!__pyx_t_2)) { __pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error; } __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = PyNumber_InPlaceAdd(__pyx_v_acc, __pyx_t_2); if (unlikely(!__pyx_t_3)) { __pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error; }
int cadd_two(int a, int b) { int32_t acc = 0; for (int i = 0; i < 1000; i++) acc += a + b; return acc; }
cdef extern from "sample_func.h": int cadd_two(int, int) def add_two(a, b): return cadd_two(a, b)
cythonize("sample.pyx", sources=[ 'sample_func.c' ])
__pyx_t_1 = __Pyx_PyInt_As_int32_t(__pyx_v_a); if (unlikely((__pyx_t_1 == (int32_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_ __pyx_t_2 = __Pyx_PyInt_As_int32_t(__pyx_v_b); if (unlikely((__pyx_t_2 == (int32_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_ __pyx_t_3 = __Pyx_PyInt_From_int32_t(cadd_two(__pyx_t_1, __pyx_t_2)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; _
#[no_mangle] pub extern fn initsample() { unsafe { Py_InitModule4_64(&SAMPLE[0] as *const _, &METHODS[0] as *const _, 0 as *const _, 0, PYTHON_API_VERSION); }; }
type PyCFunction = unsafe extern "C" fn (slf: *mut isize, args: *mut isize) -> *mut isize;
#[repr(C)] struct PyMethodDef { pub ml_name: *const i8, pub ml_meth: Option<PyCFunction>, pub ml_flags: i32, pub ml_doc: *const i8, } unsafe impl Sync for PyMethodDef { }
lazy_static! { static ref METHODS: Vec = { vec![ PyMethodDef { ml_name: &ADD_TWO[0] as *const _, ml_meth: Some(add_two), }, ... ] }; }
#[link(name="python2.7")] extern { fn Py_InitModule4_64(name: *const i8, methods: *const PyMethodDef, doc: *const i8, s: isize, apiver: usize) -> *mut isize; fn PyArg_ParseTuple(arg1: *mut isize, arg2: *const i8, ...) -> isize; fn Py_BuildValue(arg1: *const i8, ...) -> *mut isize; }
#[allow(unused_variables)] unsafe extern "C" fn add_two(slf: *mut isize, args: *mut isize) -> *mut isize { let mut a: i32 = 0; let mut b: i32 = 0; if PyArg_ParseTuple(args, &II_ARGS[0] as *const _, &a as *const i32, &b as *const i32) == 0 { return 0 as *mut _; } let mut acc: i32 = 0; for i in 0..1000 { acc += a + b; } Py_BuildValue(&I_ARGS[0] as *const _, acc) }
let acc: i32 = (0..).take(1000) .map(|_| a + b) .fold(0, |acc, x| acc + x);
__ZN7add_two20h391818698d43ab0ffcaE: ... callq 0x7a002 ## symbol stub for: _PyArg_ParseTuple testq %rax, %rax je 0x14e3 movl -0x8(%rbp), %eax addl -0x4(%rbp), %eax imull $0x3e8, %eax, %esi ## imm = 0x3E8 leaq _ref5540(%rip), %rdi ## literal pool for: "h" ...
https://github.com/dgrunwald/rust-cpython
#![feature(slice_patterns)] #[macro_use] extern crate cpython; use cpython::{PyObject, PyResult, Python, PyTuple, PyDict}; py_module_initializer!(sample, |py, m| { try!(m.add(py, "add_two", py_fn!(add_two))); Ok(()) }); fn add_two(p: Python, args: &PyTuple, kw: Option<&PyDict>) -> PyResult<PyObject> { match args.as_slice() { [a, b] => { let acc: i32 = 0; for i in 0..1000 { acc += a.value(p) + b.value(p) } Ok(acc.to_py_object()) }, _ => Ok(py.None()) } }