PyPy Status Blog

profiling JIT

# from a tarball
$ virtualenv -p /opt/pypy-c-jit-76426-linux/bin/pypy my-pypy-env

# from the svn checkout
$ virtualenv -p /path/to/pypy-trunk/pypy/translator/goal/pypy-c my-pypy-env

bin/python
bin/pypy
# from a tarball
$ virtualenv -p /opt/pypy-c-jit-76426-linux/bin/pypy my-pypy-env

# from the svn checkout
$ virtualenv -p /path/to/pypy-trunk/pypy/translator/goal/pypy-c my-pypy-env

bin/python
bin/pypy
stackless.py
stackless.py
cppyy
int
double
char*
class example01 {
private:
    static int count;
    int somedata;
public:

    example01(int a) : somedata(a) {
        count++;
    }
    ~example01() {
        count--;
    }
    static int getCount() {
        return count;
    }

    int addDataToInt(int a) {
        return somedata + a;
    }
};
int example01::count = 0;

import cppyy
cppyy.load_lib("example01Dict.so") # contains the Reflex information
example01_class = cppyy.gbl.example01
instance = example01_class(7)
assert example01_class.getCount() == 1
res = instance.addDataToInt(4)
assert res == 11
res = instance.addDataToInt(-4)
assert res == 3
instance.destruct() # so far explicit destruction needed
assert example01_class.getCount() == 0

cppyy
PyROOT
PyROOT
PyROOT
cppyy
int
double
char*
class example01 {
private:
    static int count;
    int somedata;
public:

    example01(int a) : somedata(a) {
        count++;
    }
    ~example01() {
        count--;
    }
    static int getCount() {
        return count;
    }

    int addDataToInt(int a) {
        return somedata + a;
    }
};
int example01::count = 0;

import cppyy
cppyy.load_lib("example01Dict.so") # contains the Reflex information
example01_class = cppyy.gbl.example01
instance = example01_class(7)
assert example01_class.getCount() == 1
res = instance.addDataToInt(4)
assert res == 11
res = instance.addDataToInt(-4)
assert res == 3
instance.destruct() # so far explicit destruction needed
assert example01_class.getCount() == 0

cppyy
PyROOT
PyROOT
PyROOT
[TraceUnfold]
[NativeCall]
pypy.rlib.jit
unroll_safe
dont_look_inside
purefunction
x = hint(x, promote=True)
x
[TraceUnfold]
[NativeCall]
pypy.rlib.jit
unroll_safe
dont_look_inside
purefunction
x = hint(x, promote=True)
x
import cpyext

pypy setup.py build

import cpyext

pypy setup.py build

re
(a|b)*a(a|b){20}a(a|b)*
a
2**(n+1)
re
match
match
match
while
jitdriver = jit.JitDriver(reds=["i", "result", "s"], greens=["re"])

def match(re, s):
    if not s:
        return re.empty
    # shift a mark in from the left
    result = re.shift(s[0], 1)
    i = 1
    while i < len(s):
        jitdriver.can_enter_jit(i=i, result=result, s=s, re=re)
        jitdriver.jit_merge_point(i=i, result=result, s=s, re=re)
        # shift the internal marks around
        result = re.shift(s[i], 0)
        i += 1
    re.reset()
    return result

jitdriver
jitdriver
jit_merge_point
can_enter_jit
can_enter_jit
jit_merge_point
marked
Regex
Char
class Char(Regex):
    _immutable_fields_ = ["c"]
    def __init__(self, c):
        ...

match
_shift
if
and
or
&
|
(a|b)*
Repetition(Alternative(Char('a'), Char('b')))
# arguments of the loop
# i0 is i in the match function
# result0 is result in the match function
# s0 is s in the match function
[i0, result0, s0] # those are the arguments to the machine code
char = s0[i0] # read the character
# read the current mark:
i5 = ConstPtr(ptr_repetition).marked
i7 = char == 'a' # is the character equal to 'a'
i8 = i5 & i7
i10 = char == 'b' # is the character equal to 'b'
i11 = i5 & i10
# write new mark
ConstPtr(ptr_chara).marked = i8
i13 = i8 | i11
# write new mark
ConstPtr(ptr_charb).marked = i11
# write new mark
ConstPtr(ptr_alternative).marked = i13
# increment the index
i17 = i0 + 1
i18 = len(s0)
# write new mark
ConstPtr(ptr_repetition).marked = i13
# check that index is smaller than the length of the string
i19 = i17 < i18
if not i19:
    go back to normally running match
jump(i17, i13, s0) # start from the top again

ConstPtr(ptr_*)
ptr_repetition
Repetition
ptr_chara
Char('a')
ptr_charb
Char('b')
ptr_alternative
Alternative
Repetition
shift
_shift
re
re
re
re
re2
re
(a|b)*a(a|b){20}a(a|b)*
a
2**(n+1)
re
match
match
match
while
jitdriver = jit.JitDriver(reds=["i", "result", "s"], greens=["re"])

def match(re, s):
    if not s:
        return re.empty
    # shift a mark in from the left
    result = re.shift(s[0], 1)
    i = 1
    while i < len(s):
        jitdriver.can_enter_jit(i=i, result=result, s=s, re=re)
        jitdriver.jit_merge_point(i=i, result=result, s=s, re=re)
        # shift the internal marks around
        result = re.shift(s[i], 0)
        i += 1
    re.reset()
    return result

jitdriver
jitdriver
jit_merge_point
can_enter_jit
can_enter_jit
jit_merge_point
marked
Regex
Char
class Char(Regex):
    _immutable_fields_ = ["c"]
    def __init__(self, c):
        ...

match
_shift
if
and
or
&
|
(a|b)*
Repetition(Alternative(Char('a'), Char('b')))
# arguments of the loop
# i0 is i in the match function
# result0 is result in the match function
# s0 is s in the match function
[i0, result0, s0] # those are the arguments to the machine code
char = s0[i0] # read the character
# read the current mark:
i5 = ConstPtr(ptr_repetition).marked
i7 = char == 'a' # is the character equal to 'a'
i8 = i5 & i7
i10 = char == 'b' # is the character equal to 'b'
i11 = i5 & i10
# write new mark
ConstPtr(ptr_chara).marked = i8
i13 = i8 | i11
# write new mark
ConstPtr(ptr_charb).marked = i11
# write new mark
ConstPtr(ptr_alternative).marked = i13
# increment the index
i17 = i0 + 1
i18 = len(s0)
# write new mark
ConstPtr(ptr_repetition).marked = i13
# check that index is smaller than the length of the string
i19 = i17 < i18
if not i19:
    go back to normally running match
jump(i17, i13, s0) # start from the top again

ConstPtr(ptr_*)
ptr_repetition
Repetition
ptr_chara
Char('a')
ptr_charb
Char('b')
ptr_alternative
Alternative
Repetition
shift
_shift
re
re
re
re
re2
re
class Regex(object):
    def __init__(self, empty):
        # empty denotes whether the regular expression
        # can match the empty string
        self.empty = empty
        # mark that is shifted through the regex
        self.marked = False

    def reset(self):
        """ reset all marks in the regular expression """
        self.marked = False

    def shift(self, c, mark):
        """ shift the mark from left to right, matching character c."""
        # _shift is implemented in the concrete classes
        marked = self._shift(c, mark)
        self.marked = marked
        return marked

match
def match(re, s):
    if not s:
        return re.empty
    # shift a mark in from the left
    result = re.shift(s[0], True)
    for c in s[1:]:
        # shift the internal marks around
        result = re.shift(c, False)
    re.reset()
    return result

Regex
Char
class Char(Regex):
    def __init__(self, c):
        Regex.__init__(self, False)
        self.c = c

    def _shift(self, c, mark):
        return mark and c == self.c

Char
Char
Epsilon
class Epsilon(Regex):
    def __init__(self):
        Regex.__init__(self, empty=True)

    def _shift(self, c, mark):
        return False

Epsilons
Binary
Alternative
a|b
class Binary(Regex):
    def __init__(self, left, right, empty):
        Regex.__init__(self, empty)
        self.left = left
        self.right = right

    def reset(self):
        self.left.reset()
        self.right.reset()
        Regex.reset(self)

class Alternative(Binary):
    def __init__(self, left, right):
        empty = left.empty or right.empty
        Binary.__init__(self, left, right, empty)

    def _shift(self, c, mark):
        marked_left  = self.left.shift(c, mark)
        marked_right = self.right.shift(c, mark)
        return marked_left or marked_right

Alternative
Alternative
Alternative
a|b|c
Alternative(Alternative(Char('a'), Char('b')), Char('c'))
"a"
Char('a')
Alternative
Repetition
a*
class Repetition(Regex):
    def __init__(self, re):
        Regex.__init__(self, True)
        self.re = re

    def _shift(self, c, mark):
        return self.re.shift(c, mark or self.marked)

    def reset(self):
        self.re.reset()
        Regex.reset(self)

Repetition
Repetition
Repetition
(a|b|c)*
abcbac
Sequence
ab
class Sequence(Binary):
    def __init__(self, left, right):
        empty = left.empty and right.empty
        Binary.__init__(self, left, right, empty)

    def _shift(self, c, mark):
        old_marked_left = self.left.marked
        marked_left = self.left.shift(c, mark)
        marked_right = self.right.shift(
            c, old_marked_left or (mark and self.left.empty))
        return (marked_left and self.right.empty) or marked_right

Sequence
abc
abcd
d
((abc)*|(abcd))(d|e)
abcabcabcd
abc
(abc)*
match
O(m*n)
m
n
re
class Regex(object):
    def __init__(self, empty):
        # empty denotes whether the regular expression
        # can match the empty string
        self.empty = empty
        # mark that is shifted through the regex
        self.marked = False

    def reset(self):
        """ reset all marks in the regular expression """
        self.marked = False

    def shift(self, c, mark):
        """ shift the mark from left to right, matching character c."""
        # _shift is implemented in the concrete classes
        marked = self._shift(c, mark)
        self.marked = marked
        return marked

match
def match(re, s):
    if not s:
        return re.empty
    # shift a mark in from the left
    result = re.shift(s[0], True)
    for c in s[1:]:
        # shift the internal marks around
        result = re.shift(c, False)
    re.reset()
    return result

Regex
Char
class Char(Regex):
    def __init__(self, c):
        Regex.__init__(self, False)
        self.c = c

    def _shift(self, c, mark):
        return mark and c == self.c

Char
Char
Epsilon
class Epsilon(Regex):
    def __init__(self):
        Regex.__init__(self, empty=True)

    def _shift(self, c, mark):
        return False

Epsilons
Binary
Alternative
a|b
class Binary(Regex):
    def __init__(self, left, right, empty):
        Regex.__init__(self, empty)
        self.left = left
        self.right = right

    def reset(self):
        self.left.reset()
        self.right.reset()
        Regex.reset(self)

class Alternative(Binary):
    def __init__(self, left, right):
        empty = left.empty or right.empty
        Binary.__init__(self, left, right, empty)

    def _shift(self, c, mark):
        marked_left  = self.left.shift(c, mark)
        marked_right = self.right.shift(c, mark)
        return marked_left or marked_right

Alternative
Alternative
Alternative
a|b|c
Alternative(Alternative(Char('a'), Char('b')), Char('c'))
"a"
Char('a')
Alternative
Repetition
a*
class Repetition(Regex):
    def __init__(self, re):
        Regex.__init__(self, True)
        self.re = re

    def _shift(self, c, mark):
        return self.re.shift(c, mark or self.marked)

    def reset(self):
        self.re.reset()
        Regex.reset(self)

Repetition
Repetition
Repetition
(a|b|c)*
abcbac
Sequence
ab
class Sequence(Binary):
    def __init__(self, left, right):
        empty = left.empty and right.empty
        Binary.__init__(self, left, right, empty)

    def _shift(self, c, mark):
        old_marked_left = self.left.marked
        marked_left = self.left.shift(c, mark)
        marked_right = self.right.shift(
            c, old_marked_left or (mark and self.left.empty))
        return (marked_left and self.right.empty) or marked_right

Sequence
abc
abcd
d
((abc)*|(abcd))(d|e)
abcabcabcd
abc
(abc)*
match
O(m*n)
m
n
_sre.so
_sre.so
#pypy
_sre.so
_sre.so
#pypy

Implementation	chars/s	speedup over pure Python
Pure Python code	12'200	1
Python `re` module	2'500'000	205
Google's `re2` implementation	550'000	45
RPython implementation translated to C	720'000	59
C++ implementation	750'000	61
Java implementation	1'920'000	157
RPython implementation with JIT	16'500'000	1352

Implementation	chars/s	speedup over pure Python
Pure Python code	12'200	1
Python `re` module	2'500'000	205
Google's `re2` implementation	550'000	45
RPython implementation translated to C	720'000	59
C++ implementation	750'000	61
Java implementation	1'920'000	157
RPython implementation with JIT	16'500'000	1352

Tuesday, August 17, 2010

Monday, August 2, 2010

Tuesday, July 27, 2010

Monday, July 26, 2010

Friday, July 9, 2010

Saturday, July 3, 2010

A Tracing JIT for .NET

Optimizations Done by the Optimizing JIT

A JavaScript Implementation

Performance Results

Further Possibilities

Comparison With PyPy

PyPy's Optimizations

Summary

A Tracing JIT for .NET

Optimizations Done by the Optimizing JIT

A JavaScript Implementation

Performance Results

Further Possibilities

Comparison With PyPy

PyPy's Optimizations

Summary

Saturday, June 26, 2010

Highlights of this release

Highlights of this release

Tuesday, June 8, 2010

Example Expression and First Numbers

Translating the Matcher

Generating a JIT

JIT Hints

Adaptions to the Original Code

JIT Example

Performance Results With JIT

Conclusion

Sources

Example Expression and First Numbers

Translating the Matcher

Generating a JIT

JIT Hints

Adaptions to the Original Code

JIT Example

Performance Results With JIT

Conclusion

Sources

Monday, May 31, 2010

Jason Creighton: 64bit JIT backend for PyPy

Bartosz Skowron: Fast ctypes for PyPy

Dan Roberts: Numpy in PyPy

Jason Creighton: 64bit JIT backend for PyPy

Bartosz Skowron: Fast ctypes for PyPy

Dan Roberts: Numpy in PyPy

Friday, May 21, 2010

Implementing Regular Expression Matchers

The Algorithm

Alternative

Repetition

Sequence

More Complex Example

Complexity

Summary & Outlook

Implementing Regular Expression Matchers

The Algorithm

Alternative

Repetition

Sequence

More Complex Example

Complexity

Summary & Outlook

Monday, May 3, 2010

Friday, April 9, 2010

Thursday, March 25, 2010

Sunday, March 14, 2010

Saturday, March 13, 2010

Friday, March 12, 2010