Archival Post. This script was used to run experiments for CICLING 2007
submission where features were generated from external training data.
=======================================================================
#!/bin/csh
########### This script shows how to acquire features from a separate
########### set of training data and use them to represent context
########### vectors in the SenseClusters native order 2 methodology.
###########
########### By Ted Pedersen, October 2006
###########
########### DATA PREPARATION
## root directory
set HOMEDIR = /home/ted/Web
## where test files are, in sval2 (xml) format
set TESTDIR = $HOMEDIR/Test
# where training data resides, in plain text format
set TRAINDIR = $HOMEDIR/TrainNYT
# make sure test and training directories are really there!
if (! -e $TESTDIR) then
echo "No Test Dir <$TESTDIR>"
exit 1;
endif
if (! -e $TRAINDIR) then
echo "No Train Dir <$TRAINDIR>"
exit 1;
endif
# run through several different combinations of corpora and settings...
foreach CORPUS (25 75)
foreach STAT (leftFisher ll pmi odds)
foreach REMOVE (5 10 20 50)
foreach MEASURE (pk2 pk3 gap)
foreach TEST (alston2.xml connor2.xml miller3.xml collins4.xml pedersen4.xml)
set TRAIN = nyt-$CORPUS-$REMOVE.$STAT
echo "---------running $TRAIN $MEASURE--------"
########### CREATE FEATURE MATCH PATTERNS
nsp2regex.pl $TRAINDIR/$TRAIN > $TRAINDIR/$TRAIN.regex
########### SECOND ORDER CONTEXT REPRESENTATION
# create order 2 vec with bigram features
wordvec.pl $TRAINDIR/$TRAIN --feats $TRAIN.feats > $TRAIN.wordvec
nsp2regex.pl $TRAIN.feats > $TRAIN.regex.feats
order2vec.pl --rclass $TRAIN.rclass --rlabel $TRAIN.rlabel $TESTDIR/$TEST $TRAIN.wordvec $TRAIN.regex.feats > $TRAIN.vector
echo "order2vec done"
########### CLUSTERSTOPPING AND CLUSTERING
clusterstopping.pl $TRAIN.vector --prefix $TRAIN > $TRAIN.prediction
if (! -e $TRAIN.prediction) then
echo "No Cluster Prediction, Assume 2"
set CLUSTERS = 2
else
set CLUSTERS = `cat $TRAIN.prediction`
echo "Predict $CLUSTERS"
endif
vcluster -rclass $TRAIN.rclass -rlabel $TRAIN.rlabel $TRAIN.vector $CLUSTERS -clustfile $TRAIN.cluto.out > $TRAIN.cluto.report
########### EVALUATION
format_clusters.pl $TRAIN.cluto.out $TRAIN.rlabel --context $TESTDIR/$TEST > $TRAIN.clusters.context
clusterlabeling.pl $TRAIN.clusters.context > $TRAIN.clusterlabeling
cluto2label.pl $TRAIN.cluto.out key*key > $TRAIN.prelabel
label.pl $TRAIN.prelabel > $TRAIN.label
report.pl $TRAIN.label $TRAIN.prelabel > $TRAIN.report
mkdir $TEST-$TRAIN-$MEASURE
mv $TRAIN* $TEST-$TRAIN-$MEASURE
rm -fr key*
rm -fr expr*
end
end
end
end
end
-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
senseclusters-developers mailing list
senseclusters-developers@list...
https://lists.sourceforge.net/lists/listinfo/senseclusters-developers
opensubscriber is not affiliated with the authors of this message nor responsible for its content.