the informal ramblings of a formal language researcher

Wednesday, August 31, 2005

GC'ing classes

.NET as far as I can tell, does not garbage collect unreachable code. At best, you can try to manually manage the memory associated with dynamically loaded code by loading into separate AppDomains that you unload by hand. I have not really experimented with this option.

I was discussing this problem with a friend, who asserted that Java has the same problem.

To prove him wrong, I wrote the following class. You run it on the command line, passing an numeric argument that indicates the number of distinct classes you want to load. Try it out with and without the -Xnoclassgc option in Java!

import java.lang.Integer;
import java.lang.ClassLoader;
import java.lang.Class;
import java.lang.ClassNotFoundException;

public class ClassSFS {

public static void println(String s) {
System.out.println(s);
}
public static void println() {
System.out.println();
}
public static void print(String s) {
System.out.print(s);
}
public static void main(String[] args) {
println("Hello World");
int num_classes = Integer.parseInt(args[0]);
initbytes();
for(int i = 0; i < num_classes; i++) {

Tbytes[ 48 + 5 ] = (byte) (0x61 + (i/100) % 26);
Tbytes[ 48 + 6 ] = (byte) (0x61 + (i/10) % 26);
Tbytes[ 48 + 7 ] = (byte) (0x61 + (i/1) % 26);

CLoader cl = new CLoader();
try {
Class c = cl.findClass("T");
Object x = c.newInstance();
System.out.println("ClassLoader "+i+", x:"+x);
} catch (ClassNotFoundException e) {
System.out.println("ClassLoader "+i+", ClassNotFound e:"+e);
} catch (InstantiationException e) {
System.out.println("ClassLoader "+i+", InstantiationException e:"+e);
} catch (IllegalAccessException e) {
System.out.println("ClassLoader "+i+", IllegalAccessException e:"+e);
}
}
}

static class CLoader extends ClassLoader {
CLoader() { super(); }
public Class findClass(String name) throws ClassNotFoundException {
return
super.defineClass(name,
Tbytes,
0,
Tbytes.length);
}
}


private static int[] iTbytes = {

0xca, 0xfe, 0xba, 0xbe, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x20, 0x0a, 0x00, 0x0a, 0x00, 0x16, 0x07,
0x00, 0x17, 0x0a, 0x00, 0x02, 0x00, 0x16, 0x08, 0x00, 0x18, 0x0a, 0x00, 0x02, 0x00, 0x19, 0x09,
0x00, 0x09, 0x00, 0x1a, 0x0a, 0x00, 0x02, 0x00, 0x1b, 0x08, 0x00, 0x0b, 0x07, 0x00, 0x1c, 0x07,
/* f e e */
0x00, 0x1d, 0x01, 0x00, 0x03, 0x66, 0x65, 0x65, 0x01, 0x00, 0x12, 0x4c, 0x6a, 0x61, 0x76, 0x61,
0x2f, 0x6c, 0x61, 0x6e, 0x67, 0x2f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x3b, 0x01, 0x00, 0x06,
0x3c, 0x69, 0x6e, 0x69, 0x74, 0x3e, 0x01, 0x00, 0x03, 0x28, 0x29, 0x56, 0x01, 0x00, 0x04, 0x43,
0x6f, 0x64, 0x65, 0x01, 0x00, 0x0f, 0x4c, 0x69, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72,
0x54, 0x61, 0x62, 0x6c, 0x65, 0x01, 0x00, 0x08, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67,
0x01, 0x00, 0x14, 0x28, 0x29, 0x4c, 0x6a, 0x61, 0x76, 0x61, 0x2f, 0x6c, 0x61, 0x6e, 0x67, 0x2f,
0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x3b, 0x01, 0x00, 0x08, 0x3c, 0x63, 0x6c, 0x69, 0x6e, 0x69,
0x74, 0x3e, 0x01, 0x00, 0x0a, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x46, 0x69, 0x6c, 0x65, 0x01,
0x00, 0x06, 0x54, 0x2e, 0x6a, 0x61, 0x76, 0x61, 0x0c, 0x00, 0x0d, 0x00, 0x0e, 0x01, 0x00, 0x16,
0x6a, 0x61, 0x76, 0x61, 0x2f, 0x6c, 0x61, 0x6e, 0x67, 0x2f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67,
0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0x01, 0x00, 0x03, 0x54, 0x3c, 0x3e, 0x0c, 0x00, 0x1e, 0x00,
0x1f, 0x0c, 0x00, 0x0b, 0x00, 0x0c, 0x0c, 0x00, 0x11, 0x00, 0x12, 0x01, 0x00, 0x01, 0x54, 0x01,
0x00, 0x10, 0x6a, 0x61, 0x76, 0x61, 0x2f, 0x6c, 0x61, 0x6e, 0x67, 0x2f, 0x4f, 0x62, 0x6a, 0x65,
0x63, 0x74, 0x01, 0x00, 0x06, 0x61, 0x70, 0x70, 0x65, 0x6e, 0x64, 0x01, 0x00, 0x2c, 0x28, 0x4c,
0x6a, 0x61, 0x76, 0x61, 0x2f, 0x6c, 0x61, 0x6e, 0x67, 0x2f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67,
0x3b, 0x29, 0x4c, 0x6a, 0x61, 0x76, 0x61, 0x2f, 0x6c, 0x61, 0x6e, 0x67, 0x2f, 0x53, 0x74, 0x72,
0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0x3b, 0x00, 0x21, 0x00, 0x09, 0x00, 0x0a,
0x00, 0x00, 0x00, 0x01, 0x00, 0x0a, 0x00, 0x0b, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x01,
0x00, 0x0d, 0x00, 0x0e, 0x00, 0x01, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x01, 0x00, 0x01,
0x00, 0x00, 0x00, 0x05, 0x2a, 0xb7, 0x00, 0x01, 0xb1, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00,
0x00, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x11, 0x00, 0x12, 0x00,
0x01, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0xbb,
0x00, 0x02, 0x59, 0xb7, 0x00, 0x03, 0x12, 0x04, 0xb6, 0x00, 0x05, 0xb2, 0x00, 0x06, 0xb6, 0x00,
0x05, 0xb6, 0x00, 0x07, 0xb0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x13, 0x00, 0x0e, 0x00, 0x01, 0x00, 0x0f, 0x00,
0x00, 0x00, 0x1e, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x12, 0x08, 0xb3, 0x00, 0x06,
0xb1, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x00, 0x01, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x15

};

private static byte[] Tbytes;

public static void initbytes()
{
Tbytes = new byte[ iTbytes.length ];
for(int i = 0; i < iTbytes.length; i++) {
Tbytes[i] = (byte)(iTbytes[i]);
}

print(""+nybble2char((Tbytes[0]>>4) & 0xF));
print(""+nybble2char((Tbytes[0]>>0) & 0xF));
print(""+nybble2char((Tbytes[1]>>4) & 0xF));
print(""+nybble2char((Tbytes[1]>>0) & 0xF));
print(""+nybble2char((Tbytes[2]>>4) & 0xF));
print(""+nybble2char((Tbytes[2]>>0) & 0xF));
print(""+nybble2char((Tbytes[3]>>4) & 0xF));
print(""+nybble2char((Tbytes[3]>>0) & 0xF));
println();
}

private static char nybble2char(int b) {
switch (b) {
case 0xf: return 'f';
case 0xe: return 'e';
case 0xd: return 'd';
case 0xc: return 'c';
case 0xb: return 'b';
case 0xa: return 'a';
default: return (char) (b+'a');
}
}

public static int Tcounter = 0;
}


The iTbytes array was generated by compiling public class T { private static String fee = "fee"; public String toString() { return "T<>"+fee; }}, then loading the resulting class file into emacs, switching to hexl-mode, and doing some keyboard-macrology to convert it to something javac would accept.

Thursday, August 25, 2005

C#, pass-by-value, pass-by-reference

Here's a nice little snippet of C# for you.
using System;

namespace InterfaceSample {
public delegate void Changed();
interface IPoint {
int X { get; set; }
int Y { get; set; }
}

struct Point: IPoint {
private int xValue, yValue;
public int X { get { return xValue; } set { xValue = value; } }
public int Y { get { return yValue; } set { yValue = value; } }
}

public class EntryPoint {
public static int Main() {
String formatstr = " p1.X: {0}, p1.Y: {1}, p2.X: {2}, p2.Y: {3} ip1.X: {4}, ip1.Y: {5}, ip2.X: {6}, ip2.Y: {7}";

Point p1 = new Point();
p1.X = p1.Y = 42;
IPoint ip1 = p1;
Point p2 = (Point) ip1;
IPoint ip2 = ip1;

Console.WriteLine(formatstr, p1.X, p1.Y, p2.X, p2.Y, ip1.X, ip1.Y, ip2.X, ip2.Y);
p1.X = p1.Y = 21; Console.WriteLine("p1.X = p1.Y = 21;");
Console.WriteLine(formatstr, p1.X, p1.Y, p2.X, p2.Y, ip1.X, ip1.Y, ip2.X, ip2.Y);
ip1.X = ip1.Y = 84; Console.WriteLine("ip1.X = ip1.Y = 84;");
Console.WriteLine(formatstr, p1.X, p1.Y, p2.X, p2.Y, ip1.X, ip1.Y, ip2.X, ip2.Y);
return 0;
}
}
}
In C#, classes and interfaces can have properties, which have usage syntax similar to fields but semantics similar to methods. In the interface, you just declare the property name and whether it has getters/setters; in the class, you then define the behavior you want for the property.

In C#, you can declare struct types, which are value types in the language. This means that they are passed by value. However, they are not immutable.

In C#, a struct type can implement an interface. Ah, what fun.

Here's the output of the above program:
  p1.X: 42, p1.Y: 42, p2.X: 42, p2.Y: 42 ip1.X: 42, ip1.Y: 42, ip2.X: 42, ip2.Y: 42
p1.X = p1.Y = 21;
p1.X: 21, p1.Y: 21, p2.X: 42, p2.Y: 42 ip1.X: 42, ip1.Y: 42, ip2.X: 42, ip2.Y: 42
ip1.X = ip1.Y = 84;
p1.X: 21, p1.Y: 21, p2.X: 42, p2.Y: 42 ip1.X: 84, ip1.Y: 84, ip2.X: 84, ip2.Y: 84


The mutation to p's properties is not propagated over to ip, which looks odd to a Java programmer like me. This is because the assignment ip1 = p1 makes a copy of p1 when it "boxes" it into ip1.

And even odder, given the previous paragraph, the mutations to ip1 are carried over to ip2. Actually, this isn't so odd, since ip1 is an interface after all, that might ("must", considering boxing?) be implemented by an object, and therefore the assignment must copy a reference to the object.

Finally, you can copy from the interface back to a Point, but this requires a cast. This makes sense (see previous paragraph).

In the end, I don't think I have a big problem with value types. Its just the mutable value types that I get nervous about, because then you actually need to start thinking about the copy/reference semantics.

Wednesday, August 10, 2005

old stack based languages

We all know that I've been looking at Forth to digest its approach to allowing powerful compile-time constructions.

Fare, an LL-discuss member, mentioned POP-11 as an alternative to Forth (that might be even older). It seems to also have the ability to define compile-time programs; it remains to be seen how it compares to Forth (or Lisp/Scheme, for that matter).

heh


I just realized that my use of "We all know" up above is completely unfounded, because I didn't bother to blog during the month of July, which was when I was investigating Forth so heavily. I suppose I should finish the investigation (or at least both the books from the library) and write something up.

rules for intermediate representations

And the muse of compiler development did rise from her murky swamp, and did say unto the Larceny developers, "Thou shalt not convert your aye-arrh into object form, be it string, bytecode, or otherwise, until the last possible moment."

The Larceny developers did take exception to this rule, pleading "but we have chosen an invertible object form, from which the most exhalted client developer may extract the original structured aye-arrh. Its strings are formatted thusly, isomorphic to the structure of the input, and thus less painful for my mortal eyes to gaze upon than the radiance of the aye-arrh structure itself."

To this, the muse of compiler development rules responds, "verily, you might take such a path; but then you must also provide such inversion functions, and not place the onus of developing such functions upon the shoulders of the most exhalted client developer, who is already fed up with trying to make sense of your underspecified and confusingly named interfaces.

Here endeth the lesson.

Thursday, August 04, 2005

On Macros and JavaDot

Tonight I made a fun macro that tries to cut down on the verbosity when you refer to classes using Javadot; normally you have to explicit write out the full name with all the package prefixes. What I want is to introduce a nice shorthand, similar to the shorthand introduced by the import statement in Java.

Here is the macro:
;; (let/import ((X Y Z1 Z2 ...)) BODY ...) binds Y.Z1, Y.Z2, ... to
;; the expressions X.Y.Z1, X.Y.Z2, ..., and naturally generalizes to
;; more than one prefix X.
;; As one special case, if Zn is (), then that means import the constructor
;; X.Y. as the name Y. (note the period on the end).
(define-syntax let/import
(transformer (lambda (stx ren cmp)
(let ((bindings (cadr stx))
(body (cddr stx))
(construct-new-bindings
(lambda (binding)
(let* ((->s (lambda (x)
(if (null? x)
""
(symbol->string x))))
(prefix (->s (car binding)))
(middle (->s (cadr binding)))
(suffixes (map ->s (cddr binding)))
(s->/append (lambda l
(string->symbol (apply string-append l))))
(make-binding (lambda l
(list (apply s->/append l)
(symbol->javadot-symbol
(apply s->/append prefix "." l))))))
(map (lambda (suffix) (make-binding middle "." suffix))
suffixes)))))
`(,(ren 'let) (,@(apply append (map construct-new-bindings bindings)))
,@body)))))


This pretty much works.

However, using it seems to have exposed what I'd call a bug in how Common Larceny's macro expander interacts with Javadot.

Watch this:

> (let () System.Reflection.Emit.AssemblyBuilderAccess.RunAndSave$)
#<procedure of 0 arguments>

> (let () System.Reflection.Emit.AssemblyBuilderAccess.Run$)
#<procedure of 0 arguments>

> (let/import ((System.Reflection.Emit AssemblyBuilderAccess RunAndSave$)) AssemblyBuilderAccess.RunAndSave$)
#<procedure of 0 arguments>

> (let/import ((System.Reflection.Emit AssemblyBuilderAccess RunAndSave$)) System.Reflection.Emit.AssemblyBuilderAccess.Run$)
#<procedure of 0 arguments>

> (let/import ((System.Reflection.Emit AssemblyBuilderAccess RunAndSave$)) System.Reflection.Emit.AssemblyBuilderAccess.RunAndSave$)

Error: Reference to undefined global variable "system.reflection.emit.assemblybuilderaccess.runandsave$".

>


What huppen!?!

I dunno, but Ryan and I tried looking at the expanded output:

> (macro-expand '(let/import ((System.Reflection.Emit AssemblyBuilderAccess RunAndSave$)) System.Reflection.Emit.AssemblyBuilderAccess.Run$))
((lambda () ((lambda (.assemblybuilderaccess.runandsave$|4) (clr/find-static-field-getter '#f 'system.reflection.emit.assemblybuilderaccess.run)) (clr/find-static-field-getter '#f 'system.reflection.emit.assemblybuilderaccess.runandsave))))

> (macro-expand '(let/import ((System.Reflection.Emit AssemblyBuilderAccess RunAndSave$)) AssemblyBuilderAccess.RunAndSave$))
((lambda () ((lambda (.assemblybuilderaccess.runandsave$|4) .assemblybuilderaccess.runandsave$|4) (clr/find-static-field-getter '#f 'system.reflection.emit.assemblybuilderaccess.runandsave))))

> (macro-expand '(let/import ((System.Reflection.Emit AssemblyBuilderAccess RunAndSave$)) System.Reflection.Emit.AssemblyBuilderAccess.RunAndSave$))
((lambda () ((lambda (.assemblybuilderaccess.runandsave$|4) (clr/find-static-field-getter '#f 'system.reflection.emit.assemblybuilderaccess.runandsave)) system.reflection.emit.assemblybuilderaccess.runandsave$)))

>


Update


It turns out this isn't even a problem with Macros; it looks like its just a bug in our JavaDot implementation when you refer to the same identifier more than once. E.g.:
> (begin (display System.String.class) (display System.String.class))
#<System.RuntimeType System.String>
Error: Reference to undefined global variable "system.string.class".
So much for getting excited about some strange new bug...

Wednesday, August 03, 2005

Tales from the Larceny source: Eta the Ultimate


; Note: the idiom that is seen in this file,
; (emit-fixup-proc! as (lambda (b l) (fixup b l)))
; when `fixup' is a local procedure, avoids allocation of the closure
; except in the cases where the fixup is in fact needed, for gains in
; speed and reduction in allocation. (Ask me if you want numbers.)

I didn't understand this note in sparcasm.sch when I read it a week or so ago, so I asked Will to explain it to me. Here is what I got out of the explanation.

The code is something like:
(define (foo as x)
(define (fixup bv loc) --omitted--)
(if (usually-true)
(bar)
(emit-fixup-proc! as (lambda (b l) (fixup b l)))))
The idea here is that we know all the points where fixup is used, and it is being invoked at each one. Thus, fixup qualifies as a "known local procedure.", and we can compile it directly to a sequence of machine code instructions in the current text segment, and make it just another label in the compiled machine code for foo that we can jump to.

If we hadn't eta-expanded the reference to fixup, a la:
(define (foo as x)
(define (fixup bv loc) --omitted--)
(if (usually-true)
(bar)
(emit-fixup-proc! as fixup)))
then fixup is no longer a "known local procedure", because we cannot tell what emit-fixup-proc! will do with it (e.g. it may store it into a global variable which will later be invoked), and therefore in this latter case, we will conservatively generate a closure object and bind that to fixup.

In the former case, we also generate a closure object for the whole lambda expression, but we only generate this object on the uncommon control flow path; fixup itself is a free variable referenced by the closure, and therefore remains a simple label in the machine code text of foo.

This leads to a few questions, none of which I thought of in time to ask Will about them:
  1. Couldn't we have acheived the same effect (but without eta-expanding) by pushing the definition of fixup down closer to its use?
    • Quesswork answer: yes, but in the real source tree, fixup is actually referenced multiple times. Even then, one might be able to get away with cloning the definition and pushing the seperate clones down, but now you have to trade off the blow up in static code size versus the cost of allocating the closure at runtime.
  2. What about an optimization pass to automatically introduce eta expansions on references like these to otherwise known local procedures?
    • Quesswork answer: well, note here we rely on a human provided assurance as to what the common code path is. If it were more common to take the other route, then I don't think eta-expansion would be a win anymore. What kind of performance hit could such a transformation introduce then?
  3. What about optimizing whatever is introducing the code for the allocation of the closure for fixup, so that the introduced code doesn't allocate until it absolutely must (due to data-dependency requirements, either by a use of fixup or a mutation of some data that the construction of fixup requires)?
    • I dunno. Sounds tricky. Also note that this is pretty similar to (1.) above, except applied at a lower level than Scheme source.

Followers