Ekaw2010 tutorial3 practical

29
Knowledge Acquisition from Social Networking Sites Z. Zhang, A.E. Cano, K. Elbedweihy, A.-S. Dadzie EKAW 2010 Tutorial T3 Friday 15th october 2010

description

Knowledge Acquisition fro

Transcript of Ekaw2010 tutorial3 practical

Page 1: Ekaw2010 tutorial3 practical

Knowledge Acquisition from Social Networking SitesZ. Zhang, A.E. Cano, K. Elbedweihy, A.-S. Dadzie

EKAW 2010 • Tutorial T3Friday • 15th october 2010

Page 2: Ekaw2010 tutorial3 practical

!"#$%&'()%"*

!"#$%&'&()#$&%#$%*&$#+,&*%-.%"&/0%1.2%333%

•! '"&+$,#-"&*#.+*/$%(+&'$+*%0*1"%23+&4+*-(5'6,6)%"*0$%7*,%(6-3*"+#2%$16"4*,6#+,*

•! 3+-$"*#%*',+*$+3+8-"#*#%%3,*#%*-(5'6$+*6"0%$7-)%"*-"&*

1"%23+&4+*0$%7*,%(6-3*"+#2%$16"4*,6#+,*

•! ($+-#+*-*,67/3+*-//36(-)%"*#%*&+7%",#$-#+*#.+*

#+(."%3%46+,*6"*/$-()(+*

9*36:3+*.%',+1++/6"4*;;;*

4./*&(%$-(2)-2(&%5%-.0%/&6&/%7%+1-2<1-,"-=+>+$(6,+,?*

•! &-#-*–! data/animalcorpus/!–! data/examples/!–! data/corpora/facebook_data | twitter_data/*

•! (%&+*–! facebook/!–! twitter/!–! information_extraction/ekawtutorial/ | jatr_v1.0/*

•! +>#+$"-3*36@$-$6+,*–! lib/!

•! &%2"3%-&,*0$%7*#'#%$6-3*2+@,6#+*http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/tutorial_prep.html#exercise_downloads

http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/tutorial_prep.html#third_party_downloads

9*36:3+*.%',+1++/6"4*;;;*

82,,#,+%-"&%900/#)9:.,$%

•! A+,#*6"#+$"+#*(%""+()%"*–! #%*$'"*0-(+@%%1*-"&*#26:+$*+>-7/3+,*

•! #+,#+&*26#.*;<=%>3?%

•! 9,-*@'63&*,($6/#*–! @'63&;>73*B*+1-2;1-,"-;A+,#C'""+$*(3-,,**

–! &%'@3+<(36(1*%"*,#-$#+$*D3+*0%$*+-(.*-//36(-)%"*-"&*E?F*EC *•! 7-G*"++&*#%*7%&60G*$64.#,*#%*+>+('#+*H(.7%&*IJJK*

–! +"#+$*L-"#M*-#*(%",%3+*0%$*#%/*3+8+3*%0*+-(.*,%'$(+*(%&+*0%3&+$*EC*

•! $-9,*9/.,&%–! ,+#*'/*(3-,,/-#.*HE?F*&+/+"&+"#K*–! (-33*N-8-(*26#.*+-(.*#+,#*(3-,,*

•! @<A%–! ($+-#+*-*"+2*-//36(-)%"*',6"4*,$(*0%3&+$,*0%$*+-(.*%0*#26:+$O*0-(+@%%1*P*6+*–! ,+#*'/*(3-,,/-#.*H!QR*&+/+"&+"#K*–! ,+#*'/*-//36(-)%"*/$%/+$)+,*-"&*$'"*+-(.*7-6"*7+#.%&*

9*36:3+*.%',+1++/6"4*;;;*

B$#,+%9,-%

Page 3: Ekaw2010 tutorial3 practical

9*36:3+*.%',+1++/6"4*;;;*

C-9,*9/.,&%

F+#'/*

D9)&E..F%9,*%-G#H&(%IJ@$%

•! Q%('7+"#-)%"S*–! 0-(+@%%1*T$-/.*9U!S**

http://developers.facebook.com/docs

–! #26:+$*9U!**http://apiwiki.twitter.com/Twitter-API-Documentation

•! F64"*'/S*–! 0-(+@%%1S*http://www.facebook.com

–! #26:+$S*https://twitter.com/signup

•! V6@$-$6+,*–! C+,#WXS*http://restfb.com*–! #26:+$YNS*http://twitter4j.org/en

F+#'/*

K9-2(9/%L9,+29+&%J(.)&$$#,+%9,*%@,D.(M9:.,%A'-(9):.,%

•!*E/+"ZVU*[;Y*\*]-8-*#%%316#*0%$*@'63&6"4*ZVU*-"&*!R*-//36(-)%",*

–! (%"#-6",*/$+<@'63#*3-"4'-4+*7%&+3,*#%*@+*',+&*@G*E/+"ZVU*0%$*

3-"4'-4+*/$%(+,,6"4*

http://opennlp.sourceforge.net

http://oak.dcs.shef.ac.uk/ekaw_2010_ka_from_sna_tutorial/

exercise_rscs/ie_models_eng.zip

•!*]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#*H]9ACK*http://www.dcs.shef.ac.uk/~ziqizhang/resources/tools/

jatr_v1.0.zip

F(+"-$6%*

NO>O%C.2-"%ID(#)9%P.(/*%Q20%5%M9-)"%$2MM9(#$9:.,%

•!*Q'$6"4*#.+*^_[_*`%$3&*a'/*#%'$"-7+"#*6"*F%'#.*90$6(-O*#26:+$*-"&*0-(+@%%1*2+$+*',+&*+>#+",68+3G*-,*-*&6,(',,6%"*@%-$&*0%$*0-",*#%*+>(.-"4+*6"0%$7-)%"*-"&*%/6"6%",*-@%'#*7-#(.+,b*

–! *.'"&$+&,*%0*#.%',-"&,*%0*7+,,-4+,*2+$+*4+"+$-#+&*&-63G*%"*#.+*#2%*,%(6-3*"+#2%$16"4*,6#+,b*

–! *-*3-$4+*/$%/%$)%"*%0*#.+,+*7+,,-4+,*&6,(',,*#.+*7-#(.*%0*#.+*&-Gb*

•!*2+*-$+*6"#+$+,#+&*6"*-"-3G,6"4*#.+,+*7+,,-4+,**

–! #%*'"&+$,#-"&*2.-#*-$+*#.+*7%,#*/%/'3-$*#%/6(,*#.-#*6"#+$+,#*/+%/3+b*

Page 4: Ekaw2010 tutorial3 practical

F(+"-$6%*(%"#;*

NO>O%C.2-"%ID(#)9%P.(/*%Q20%5%M9-)"%$2MM9(#$9:.,%

•!*A%*&%*,%*2+*@'63#*-*c7-#(.*,'77-$6,-)%"d*-//36(-)%"*

–!%#,02-*<*).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.*

–!%.2-02-*<*$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%#.-#*(-"*@+*

',+&*#%*,'77-$6,+*(%$/',*(%"#+"#*

•!*e,6"4*#.+*+>#$-(#+&*#+$7,*2+*(-"*-"-3G,+*2.-#*.-,*@++"*

#.+*0%(',*%0*&6,(',,6%"*%0*#.+*7-#(.*%0*#.+*&-G*

•!*W%$*#.6,*8+$G*+>+$(6,+O*2+*,#'&G*#.+*7-#(.*@+#2++"*

A,+/9,*%9,*%R&(M9,1%.,%-"&%NS-"%.D%;2,&%NO>O;*9*f"%23+&4+*

9(5'6,6)%"*/$%(+,,*

F(+"-$6%*9"-3G,6,*

8&7)90%D(.M%-"&%M.(,#,+%$&$$#.,%

•!*.%2*#%*6&+")0G*,/+(6D(*(%"#+"#*%0*6"#+$+,#*

–!*(%"#+"#*$+#$6+8-3*-"&*D3#+$6"4*

•!*.%2*#%*/$%(+,,*#.+*(%"#+"#*-"&*7-1+*,+",+*%0*6#*

–!*6"0%$7-)%"*+>#$-()%"**

–!*"-#'$-3*3-"4'-4+*/$%(+,,6"4**

F(+"-$6%*9"-3G,6,*

•!%@,02-T%).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.*

–! *2+*"++&*#%*/6"</%6"#*$+3+8-"#*7+,,-4+,*%"*#26:+$*-"&*

0-(+@%%1*

–! %',6"4%-G#H&(%-"&%D9)&E..F%9U!,U%2+*-//3G*).,-&,-%(&-(#&69/%9,*%V/-&(#,+*#%*@'63&*#.6,*(%$/',*

•!%W2-02-T%$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%*

–!2+*-//3G*@A%9,*%KLJ%%"*#.+*(%$/',*#%*-(.6+8+*#.6,*4%-3%

a%$/',*

4+"+$-)%"*

a%"#+"#*

-"-3G,6,*@G*!R*

a%$/',*T+"+$-)%"*

•! R.9/S*($+-#+*-*(%$/',*%0*7+,,-4+,**

–! #.-#* &6,(',,* #.+*7-#(.* @+#2++"* R"43-"&* -"&* T+$7-"G*

%"*^I#.*]'"+*^_[_*

•! @,02-S*

–! #26:+$*9U!*/$%86&6"4*-((+,,*#%*#26:+$*&-#-*

–! 0-(+@%%1*9U!*/$%86&6"4*-((+,,*#%*0-(+@%%1*&-#-*

–! (%"#+"#* D3#+$6"4* /-$-7+#+$,* H#.+* R"43-"&<T+$7-"G*

7-#(.*%"*^I#.*]'"+*^_[_K*

•! W2-02-%%

–! (%$/',*%0*7+,,-4+,*$+3-#+&*#%*%"3G*#.+*7-#(.*%0*6"#+$+,#*

Page 5: Ekaw2010 tutorial3 practical

-G#H&(%

a%$/',*T+"+$-)%"*',6"4*#26:+$*

a%&+*6"S*ekaw-kasna_exercises/twitter R>#+$"-3*36@,S*lib/twitter4j-core-2.1.6-SNAPSHOT.jar |

log4j-1.2.15.jar

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3>%8AC!%IJ@%XI,9/1$#,+%-"&%02E/#)%:M&/#,&%$-9-2$Y%

•! U$%86&+,*7+#.%&,*0%$*0+#(.6"4*&-#-*$+3-#+&*#%S**

•! *A67+36"+,O*F#-#',O*e,+$,O*g+7@+$,O*,'@,($6@+$,O*0%33%2+$,O*

,%(6-3*4$-/.,*+#(;*

–! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*

-(#'-33G*&%*,%7+#.6"4h*<*R&6#*#.+*(3-,,S*

ekaw.kasna.twitter.StatusTest

•! C+0+$*#%*#.+*A26:+$Y]*N-8-&%(*#%*(%7/3+#+*#.+*+>+$(6,+,S*

*http://twitter4j.org/en/javadoc/index.html

!(1%#-%1.2($&/DS*$'"*C-9-2$!&$-3Z969%

R>+$(6,+*

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3>%8AC!%IJ@%

•! 9"-3Gj+*#.+*,#$'(#'$+*-"&*(%"#+"#*%0*/'@36(*)7+36"+*,#-#',+,*–!`.+$+*2-,*#.+*,#-#',*#2++#+&*0$%7k*

–!`-,*6#*-*$+#2++#k

Page 6: Ekaw2010 tutorial3 practical

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3>%8AC!%IJ@%

•! !(1%#-%1.2($&/DS*+&6#*-"&*$'"*C-9-2$!&$-3Z969%

try{

//We request the public timeline, which returns a list of Status

ResponseList<Status> publicTimeline = twitter.getPublicTimeline();

/**

* Complete this exercise and analyse the structure and content of each of the Status.

* Have a look at the java doc of the Status Class, or just

check the available methods in your IDE

*/

Iterator<Status> it = publicTimeline.iterator();

while (it.hasNext()){

//TODO check what are the info you can get from a Status.

}

Twitter twitter = new TwitterFactory().getInstance();

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3>%8AC!%IJ@%

•! 9",2+$ try{

ResponseList<Status>publicTimeline = twitter.getPublicTimeline();

//*TODO Complete exercise and analyse structure and content of each status

GeoLocation geoLocation;

Place place;

while (it.hasNext()){

Status st = it.next();

log.info(st.getText());

log.info(st.getSource());

if ((geoLocation = st.getGeoLocation()) != null)

log.info(geoLocation.toString());

if ((place = st.getPlace()) != null) {

log.info(place.getFullName());

log.info(place.getBoundingBoxCoordinates().toString());

}

}

} catch (TwitterException e){

e.printStackTrace();

}

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3>%8AC!%IJ@%

•! E'#/'#*\*)7+36"+*,#-#', ??????????!!??888888888 RT @nico_news: ???????????????????????????????????????? http://bit.ly/aZcvfl

<a href="http://twipple.jp/" rel="nofollow">?????/twipple</a>

Southampton v Tranmere: Preview followed by live coverage of Saturday's game between Southampton and Tranmere in L... http://bit.ly/9N802N

<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>

Laper gueeee

<a href="http://www.snaptu.com" rel="nofollow">Snaptu.com</a>

?????????????????????????? / ??????????????????????????

<a href="http://www.echofon.com/" rel="nofollow">Echofon</a>

Changing the Language of Oppression http://bit.ly/aXA4w3 #specialneeds

<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>

Are you attending the SuperSwarm at Jewel, Piccadilly tonight? Let's get an idea of numbers via my poll @ www.theprgeek.co.uk #superswarmLDN

web

Simon Cowell To Receive Special Emmy Award: October 7, 2010: Music mogul and former American Idol judge Simo... http://tinyurl.com/299o5gg

<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>

"Wajahmu seperti bulan" --» ini artinya ngatain kan yah? Org bulan bolong2

<a href="http://blackberry.com/twitter" rel="nofollow">Twitter for BlackBerry®</a>

FM????????????

<a href="http://stone.com/Twittelator" rel="nofollow">Twittelator</a>

???? [????:?????/????????????????????????]559 #colopl_msg

<a href="http://t.colopl.jp/t/" rel="nofollow">Colotwi</a>

pikiran saya cabangnya banyak, jd pusing sendiri..penuh rasanya ni kepala

<a href="http://m.tweete.net" rel="nofollow">m.tweete.net</a>...

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3N%C&9()"%IJ@%

•! 933%2,*6"#+$-()%"*26#.*#26:+$*$&9()"*-"&*-(&,*$*&-#-*

–! #%/*#%/6(,*#.-#*-$+*('$$+"#3G*#$+"&6"4*%"*A26:+$*

•! !#*+>/%,+,*#.+*0%33%26"4*7+#.%&,S**

–! ,+-$(.O**

–! #$+"&,O**

–! #$+"&,?('$$+"#O*#$+"&,?&-63GO*#$+"&,?2++13G*

•! A.+*F+-$(.*9U!*,'//%$#,*-7%"4*

%#.+$,O*#.+*0%33%26"4*%/+$-#%$,*0%$*

(%",#$'()"4*-*5'+$G*,#$6"4*

Page 7: Ekaw2010 tutorial3 practical

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3[%C&9()"%IJ@%

$#,)&\#*T%

2,:/\#*T%

C#,)&T%

B,:/T%

F/+(6D+,*#.+*6&*%0*#.+*,#-#',*0$%7*2.6(.*#%*,#-$#*#.+*,+-$(.*

F/+(6D+,*#.+*6&*%0*#.+*,#-#',*0$%7*2.6(.*#%*+"&*#.+*,+-$(.*

F#-#',+,*/$%&'(+&*,6"(+*-*,/+(6D+&*&-#+*H+;4;*^_[_<_l<[_K*

V/-&(T/#,F$% C+#$6+8+,*#2++#,*26#.%'#*36"1,*

D(.MT% C+#$6+8+,*,#-#',+,*0$%7*-*468+"*',+$;*H+;4;*0$%7S*D0-K*

/9,+T% C+#$6+8+,*,#-#',+,*6"*-*468+"*3-"4'-4+*

W8% +;4;O*7+")%"6"4*g+>6(%*EC*W$-"(+*

T%Y% +;4;O*(%"#-6"6"4*0%%#@-33*26#.*-*/%,6)8+*-m#'&+*H+;4;*0%%#@-33*SK*K*

K&+9:.,% +;4;O*7+")%"6"4*@++$*@'#*"%#*$%%#*

C.2()&T% +;4;O*a%"#-6"6"4*0%%#@-33*+"#+$+&*86-*A26:+$W++&*H+;4;*"+2,*

,%'$(+SA26:+$W++&K*

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3N%C&9()"%IJ@%–! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*-(#'-33G*&%*

,%7+#.6"4h*<*R&6#*#.+*(3-,,S*ekaw.kasna.twitter.QueryTest

•! !(1%#-%1.2($&/DS*$'"*]2&(1!&$-3Z969%

Query query = new Query();

query.query("football");

//*TODO Modify the query object, and search for today's tweets (in english) related to football

//*TODO Restrict your results to tweets generated within 300 kilometers of Johannesburg, South Africa

// hint: use Query's geoCode method, the

Kilometers unit is given as Query.KILOMETERS

// hint: South Africa's lat: 26.12, long: 28.2

R>+$(6,+*

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3N%C&9()"%IJ@%

–! I,$G&(%

Query query = new Query();

query.query("football");

//*TODO Modify the query object, and search for today's tweets related to football

//*TODO Restrict your results to tweets generated within 300 kilometers of Johannesburg, South Africa

// hint: use Query's geoCode method, the

Kilometers unit is given as Query.KILOMETERS

// hint: Johannesburg’s lat: 26.12, long: 28.2 query.geoCode(new GeoLocation(26.12,28.2),

30,Query.KILOMETERS);

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3>%8AC!%IJ@%

•! E'#/'#*\*5'+$G*$+5'+,#*0%$*L0%%#@-33M*"+-$*L]%.-""+,@'$4M

hits:15

MQMhlanzi:Total Football 360: Bafana Eager to Keep the Momentum of Winning! http://t.co/xOPTaY9

Benleeds:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe or Dagenham and Redbridge?

Tumelo13:Gota admit I miss my NONstop #football convo's wit @Denisao_4 and @GordonTyler8! Haha talk bout nothing but the #beautifulgame

Tumelo13:RT @Denisao_4: Ey bra @Tumelo13 that's not a sin! That's for the love of football! I approve wow! Let's hope it works :)??Amen

Edwardo84:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer

jonerz97:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe or Dagenham and Redbridge?

dcocker11:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer

AntimoOsato91:@siasduplessis Oros and The Dutch National Football Team could be good sponsors too! Haha :)

IsaacTeka:#football - EURO 2012 qualifier between Germany and Turkey is gonna be a fierce encounter. #Ozil and #Khedira

applenessuk:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer

johnyrotten:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe or Dagenham and Redbridge?

kartikverma:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer

RawRemedy:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe or Dagenham and Redbridge?

TLW1Dan:RT @BumbleCricket: Liverpool FC ...what a mess ...greed rears its head again ...football and fans suffer

jopayne:RT @BumbleCricket: any big shot yank out there SO intersted in football that he would like to buy Accrington or Morecambe or Dagenham and Redbridge?

Page 8: Ekaw2010 tutorial3 practical

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3[%C-(&9M%IJ@%

RestAPI and SearchAPI only present a limited snapshot of a timeline. During the finals of the 2010 World Cup

the rate of tweets containing the tags #Spain, #Netherlands, #Germany, #Uruguay, was quite high.

Two options: •! make requests, say, every 2sec through the RestAPI or the Search API, •! BETTER:

•! start listening to a stream of public tweets & •! filter according to the tag patterns

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3[%C-(&9M%IJ@%

Twitter 4j allows you to retrieve streaming samples using the class TwitterStream. For the public timeline you just need basic authentication.

[*** Create a TwitterStream instance twitterStream = new TwitterStreamFactory(this).getInstance("yourAcc","yourPass");

Set a Listener for receiving the event of a status. Your listener should implement the method public void onStatus(Status status)

twitterStream.setStatusListener(this);

Start Sampling twitterStream.sample();

Do something with the tweet in your onStatus method

^*

l***

Y*

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3[%C-(&9M%IJ@%

–! P9#-h*i%'*2633*"++&*#%*(%7/3+#+*#.+*(%&+*0%$*6#*#%*-(#'-33G*&%*

,%7+#.6"4h*<*R&6#*#.+*(3-,,S*

ekaw.kasna.twitter.StreamTest

•! !(1%#-%1.2($&/DS*$'"*C-(&9M!&$-3Z969%

private void startConsuming() throws TwitterException {

twitterStream.setStatusListener(this);

//*TODO Using TwitterStream’s filter method, restrict your sampling to collect tweets that include

the words: football, worldcup, final

twitterStream.sample();

}

a%$/',*T+"+$-)%"*',6"4*#26:+$*

A'3[%C-(&9M%IJ@%

–! I,$G&(

private void startConsuming() throws TwitterException {

twitterStream.setStatusListener(this);

//*TODO Using TwitterStream’s filter method, restrict your sampling to collect tweets that include

the words: football, worldcup, final

String[] filterWords = {"#worldcup", "#WorldCup",

"#Worldcup", "#WORLDCUP"}; twitterStream.setStatusListener(this);

twitterStream.filter(0,null,filterWords);

twitterStream.sample(); }

Page 9: Ekaw2010 tutorial3 practical

a%$/',*T+"+$-)%"*',6"4*#26:+$*

I**#:.,9/%A'&()#$&T%I2-"&,:)9:.,%

•!*$+,#$6()%",*#%*-((+,,6"4*/$68-#+*&-#-hhh*

•!%Q^IKRAC%CAJ%NO>O**•!*(.-"4+*#%*-'#.+")(-)%"*7%&+*0%$*$+#$6+86"4*6"&686&'-3,M*

,#-#',*6"0%$7-)%"*

•!0$%7*-*,67/3+*',+$"-7+</-,,2%$&*#%S*

•! W92-"7E9$&*%92-"&,:)9:.,*%0*$+46,#+$+&*c-//36(-)%",d*

a%$/',*T+"+$-)%"*',6"4*#26:+$*

•! Try it yourself! •! Authenticating using Oauth

•! OAuthTest.java •! Using the application “Ekaw-Kasna” •! Login with your twitter account and go to: http://twitter.com/apps/new

a%$/',*T+"+$-)%"*',6"4*#26:+$*

i%'*2633*"++&*#.+,+*#2%*

,#$6"4,*0%$*-'#.+")(-)"4**

a%$/',*T+"+$-)%"*',6"4*#26:+$*

•! I2-"&,:)9:,+%2$#,+%W92-"%

–! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z*

•! +"#+$*#.+*eCV*-#*#.+*(%",%3+*6"*-*2+@*@$%2,+$*

•! #%*%@#-6"*-"*%-'#.=#%1+"*

i%'*2633*@+*4686"4*

-'#.%$6j-)%"*#%*#.6,*

-//36(-)%"*#%*-((+,,*

G%'$*6"0%$7-)%"*

Page 10: Ekaw2010 tutorial3 practical

a%$/',*T+"+$-)%"*',6"4*#26:+$*

•! I2-"&,:)9:,+%2$#,+%W92-"%

–! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z*

•! +"#+$*#.+*eCV*#%*%@#-6"*-"*%-'#.=#%1+"**

–!E"(+*G%'*c933%2d*-'#.%$6j-)%"*G%'*2633*@+*/$%86&+&*26#.*-*U!ZS*

A.6,*6,*#.+*U!Z*

"++&+&*#%*

(%7/3+#+*#.+*

-'#.+")(-)%"*

a%$/',*T+"+$-)%"*',6"4*#26:+$*

•! I2-"&,:)9:,+%2$#,+%W92-"%

–! C'""6"4*#.+*+>-7/3+*$+5'6$+,*-*U!Z*

•! +"#+$*#.+*eCV*#%*%@#-6"*-"*%-'#.=#%1+"**

–!E"(+*G%'*c933%2d*-'#.%$6j-)%"*G%'*2633*@+*/$%86&+&*26#.*#.+*U!ZS*

–! R"#+$*#.+*U!Z*#%*(%7/3+#+*-'#.+")(-)%"*

ciEe*9CR*9eAnRZA!a9ARQhhd*

D9)&E..F%

a%$/',*T+"+$-)%"*',6"4*0-(+@%%1*

a%&+*6"S*ekaw-kasna_exercises/facebook R>#+$"-3*36@,S*lib/restfb-1.5.3.jar | log4j-1.2.15.jar

Page 11: Ekaw2010 tutorial3 practical

0-(+@%%1*9U!*\*W+#(.6"4*E@N+(#,*

•! The Graph API •! provides facilities for reading and writing data to facebook

•! Each API request starts with the URL: https://graph.facebook.com

•! e.g., data about any object can be found by fetching https://graph.facebook.com/objectID

- objectID is the unique id of this object in the social graph

- e.g., the unique id for a page is its name: https://graph.facebook.com/facebook

0-(+@%%1*9U!*\*W+#(.6"4*e,+$*&-#-*

https://graph.facebook.com/facebook

0-(+@%%1*9U!*\*a%""+()%",*

•! All objects in the facebook social graph are connected via relationships (connections)

•! Fetch connections

https://graph.facebook.com/objectID/connection_type

•! e.g., the page’s own posts https://graph.facebook.com/facebook/posts

0-(+@%%1*9U!*\*a%""+()%",*

Page 12: Ekaw2010 tutorial3 practical

0-(+@%%1*9U!*\*U-4+*a%""+()%",*

D&&*% A.+*/-4+M,*2-33*

0#)-2(&% A.+*/-4+M,*/$%D3+*/6(#'$+*

-9++&*% A.+*/.%#%,O*86&+%,O*-"&*/%,#,*6"*2.6(.*#.6,*/-4+*.-,*@++"*#-44+&*

/#,F$% A.+*/-4+o,*/%,#+&*36"1,*

0".-.$% A.+*/.%#%,*#.6,*/-4+*.-,*'/3%-&+&*

+(.20$% A.+*4$%'/,*#.6,*/-4+*6,*-*7+7@+$*%0*

9/E2M$_6#*&.$% A.+*/.%#%*-3@'7,?86&+%,**#.6,*/-4+*.-,*($+-#+&*

$-9-2$&$% A.+*/-4+o,*,#-#',*'/&-#+,*

,.-&$% A.+*/-4+o,*"%#+,*

0.$-$% A.+*/-4+o,*%2"*/%,#,*

M&ME&($% A.+*/-4+o,*7+7@+$,;*i%'*(-"*%"3G*5'+$G*'/*#%*J__*7+7@+$,;*!#*6,*"%#*

/%,,6@3+*#%*6#+$-#+*#.$%'4.*#.+*36,#;*R>-7/3+S*.:/,S??4$-/.;0-(+@%%1;(%7?

pU9TR=!Qq?7+7@+$,k3676#rJ__*

&6&,-$% A.+*+8+"#,*#.6,*/-4+*6,*-:+"&6"4*

)"&)F#,$% a.+(16",*7-&+*@G*0$6+"&,*%0*#.+*('$$+"#*,+,,6%"*',+$*

0-(+@%%1*9U!*\*W63#+$6"4*Q-#-*

•! Data can be filtered using parameters •! e.g.,

-! since, until ---> specify date ranges -! limit ---> specify amount of returned data

•! e.g., fetching the feed -! within specified dates and -! with a limit of 50

https://graph.facebook.com/worldcup/feed?

since=2010-07-17&until=2010-07-20&limit=50

0-(+@%%1*9U!*\*W63#+$6"4*Q-#-*

c($+-#+&=)7+d*6,*26#.6"*

#.+*,/+(6D+&*&-#+*$-"4+,*

0-(+@%%1*9U!*\*W6"&6"4*E@N+(#,**

•! Search for objects https://graph.facebook.com/search?

q=query&type=objectType

- query ---> what you want to find - objectType ---> type of the object (e.g. facebook post, user)

•! e.g., search all public posts for “2010 world cup” https://graph.facebook.com/search?q=2010%20world

%20cup&type=post

Page 13: Ekaw2010 tutorial3 practical

0-(+@%%1*9U!*\*W6"&6"4*E@N+(#,**

U%,#,*(%"#-6"6"4*#.+*#+$7,**

c^_[_d*B*c2%$3&d*B*c('/d*

0-(+@%%1*9U!*\*T$-/.*9U!*R>+$(6,+*

Try it yourself!

•! Fetch the data about the page worldcup

•! Get the feed of this page (hint: connection is feed) •! this is the wall for the page worldcup

•! Return only the first 5 messages of this feed

•! Search for all pages containing worldcup in the page name

0-(+@%%1*9U!*\*T$-/.*9U!*R>+$(6,+*

•! ANSWERS

•! page worldcup: •! fetch https://graph.facebook.com/worldcup

0-(+@%%1*9U!*\*T$-/.*9U!*R>+$(6,+*

•! ANSWERS

•! Get the feed (wall) of the page worldcup: https://graph.facebook.com/worldcup/feed

Page 14: Ekaw2010 tutorial3 practical

0-(+@%%1*9U!*\*T$-/.*9U!*R>+$(6,+*

•! ANSWERS

•! Return only the first 5 messages of the feed: https://graph.facebook.com/worldcup/feed&limit=5

0-(+@%%1*9U!*\*T$-/.*9U!*R>+$(6,+*

•! ANSWERS

•! Search for all pages containing worldcupin the page name https://graph.facebook.com/search?q=worldcup&type=page

a36+"#*V6@$-$6+,*

•! Multiple client libraries for facebook API http://developers.facebook.com/search?

q=User:Client_Libraries

•! RestFB client library was the first java library to support the GraphAPI

•! Other Java libraries now supporting GraphAPI - BatchFB

- TinyFBGraphClient

- facebook Java Webapp

•!We use the RestFB client library in this tutorial

C+,#WX*9U!*\*`%$3&*a'/*F(+"-$6%**

•! Exercise: get the messages sent on the day of the England-Germany match - 27th of June 2010

Search for all pages containing “worldcup”

For every page: •! Get the messages posted on that day •! Store the messages to generate your corpus

[***

^*

Page 15: Ekaw2010 tutorial3 practical

C+,#WX*9U!*\*Q+0-'3#0-(+@%%1a36+"#**

•! DefaultfacebookClient

•! provides methods for reading and writing data to facebook graph

FacebookClient facebookClient

= new DefaultfacebookClient();

facebookClient = new

DefaultfacebookClient(ACCESS_TOKEN);

9((+,,*/'@36(*&-#-*

C+5'6$+&*#%*-((+,,*/$68-#+*

&-#-*%$*+&6#?/'@36,.*&-#-*

C+,#WX*9U!*\*F+-$(.6"4*

•! Step 1:

.:/,S??4$-/.;0-(+@%%1;(%7?,+-$(.k5r2%$3&

B('/P#G/+r/-4+P3676#r[_*

Connection<T>

fetchConnection(String connection, Class<T> connectionType,

Parameter... parameters)

facebookClient facebookClient = new DefaultfacebookClient();

Connection<Page> pageSearch =

facebookClient.fetchConnection("search",Page.class, Parameter.with("q", "world cup"), Parameter.with("type",

"page"), Parameter.with("limit", "10"));

C+,#WX*9U!*\*F+-$(.6"4*

•! $+#'$",*-*36,#*%0*#.+*D$,#*[_*/-4+,*-@%'#*c2%$3&('/d*

•! W%$*+-(.*/-4+O*/$%/+$)+,*$+#'$"+&*6"(3'&+S*

–! 6&O*"-7+O*(-#+4%$GO*0++&O*/6(#'$+,*b

4+#Q-#-*<<s*$+#'$",*-*36,#*%0*%@N+(#,*H&+/+"&6"4*%"*#.+*

(%""+()%"*$+5'+,#+&K*

for (Page page : pageSearch.getData()) {

System.out.print("Name: " + page.getName());

System.out.print("Category: " + page.getCategory());

System.out.println("ID: " + page.getId());

}*

C+,#WX*9U!*\*$+#'$"*0$%7*$+5'+,#*<*/-4+,*

•! World Cup Pages

K9M&% Q9-&+.(1% @<%

`%$3&*a'/* U%36)(6-",* J_tY[_YulvI*

`%$3&*a'/* U$%&'(#,=%#.+$* [lJJJvYvuItt^lu*

2%$3&*('/* F/%$#,=-#.3+)(,* [lY[Ivl_l^vv_vl*

`%$3&*a'/*^_[_* U$%&'(#,=%#.+$* ^JIvvtYItvvv*

C'4@G*`%$3&*a'/* F/%$#,=-#.3+)(,* [[v^Iv^l^[Il*

^_[_*`%$3&*a'/* e"1"%2"* [^J_YtltY^_^^tJ*

w`ECVQ*aeUd* a3'@,* [^lvttYI^[Iv*

`%$3&*a'/*%"*RFUZ* F/%$#,=-#.3+)(,* [v[Jl[lt^_Y_*

`ECVQ*aeU* F/%$#,=#+-7,* [^_l_IlvYvv[_Jv*

^_[_*`%$3&*a'/* V%(-3=@',6"+,,* lvI[[uIIlt[v*

Page 16: Ekaw2010 tutorial3 practical

C+,#WX*9U!*\*R>+$(6,+*

Try it yourself!

•! Edit the class SearchTest.java

•! Search for all groups talking about a topic of interest to you •! Get the first 15 groups •! For every group: - print name and ID

ANSWERS

Connection<Group> groupSearch = facebookClient.fetchConnection( "search", Group.class, Parameter.with("q", "2010 world cup"), Parameter.with("type", "group"), Parameter.with("limit", "15"));

for (Group group : groupSearch.getData()) { System.out.println("Name: " + group.getName()); System.out.println("ID: " + group.getId());

}

C+,#WX*9U!*\*R>+$(6,+*

C+,#WX*9U!*\*$+#'$"*0$%7*$+5'+,#*<*4$%'/,*

‘2010 world cup’ groups

K9M&% @<%

kkkkkkk**x-7-3+1*Ey(6-3*T$%'/* ^^JJ^[YItu[J*

^_[_*W!W9*`ECVQ*aeU* [^Y[Iulu_uJ[YJv*

^_[_*W!W9*`%$3&*a'/* ^^_YtlvIYJ*

^_[_*W!W9*`ECVQ*aeU*FEeAn*9WC!a9* ^I_Ilt[tYJI*

^_[_*W60-*`%$3&*a'/*F%'#.*90$61-* [^_uIl^[[^II[Ju*

^_[_*W!W9*`%$3&*a'/*F%'#.*90$6(-* [[[I_tJvJJ[YYlv*

^_[_*W60-*`%$3&*a'/*Q$6"16"4*T-7+* ^lv[^t[ut_^u*

^_[_*W!W9*`ECVQ*aeU*FEeAn*9WC!a9* [_tJ^t^u^J[Jlt_*

g'"&6-3*^_[_*F'&-0$6(-*^_[_*`%$3&*('/* [uuv^tvtIlvl*

!#-36-*<*^_[_*W!W9*`%$3&*a'/* [tJYlYIlt^^*

^_[_<W!W9<`%$3&<a'/* [^vlIIll_I[^uIl*

^_[_*`%$3&*a'/** [[^_uJ^JttlJYYu*

^_[_*`%$3&*a'/* [ulll^l[vlIl*

^_[_*W!W9*`%$3&*a'/* [l_YvttuvuvJYII*

^_[_*W!W9*`%$3&*a'/* [vl[Y_tt[uIt*

C+,#WX*9U!*\*T+m"4*#.+*0++&*

•! Step 2:

.:/,S??4$-/.;0-(+@%%1;(%7?2%$3&('/?0++&k

,6"(+r^_[_<_v<^IP'")3r^_[_<_v<^tP3676#r^_*

Connection<T>

fetchConnection(String connection, Class<T> connectionType,

Parameter... parameters)

Connection<Post> myFeed = facebookClient.fetchConnection(

"worldcup/feed", Post.class, Parameter.with("since", "2010-06-27T11:00:00"), Parameter.with("until",

"2010-06-28T17:00:00"), Parameter.with("limit", "10"));

Page 17: Ekaw2010 tutorial3 practical

•! 0++&*$+#'$",*-33*/%,#,*2$6:+"*%"*#.+*,/+(6D+&*&-#+*

•! W%$*+-(.*/%,#*-:$6@'#+,*$+#'$"+&*6"(3'&+S*

–! ($+-)%"*)7+O*/%,#*"-7+O*&+,($6/)%"b;*

CRFA*9U!*\*T+m"4*#.+*0++&*

for (Post post : myFeed.getData()) {

System.out.println("Message: " + post.getMessage());

System.out.println("\tCreation Time" + post.getCreatedTime());

}*

•! Message: the english were hoping to play penalties what a waste of their training time

Creation Time: Sun Jun 27 17:45:13 BST 2010

•! Message: Deutschland, Deutschland über alles, über alles in der Welt

Creation Time: Sun Jun 27 17:29:25 BST 2010

•! Message: world cup?? this wasn't a 'football games' but 'fakeball' games!! Lampard was scored but the referee was blind....4-1?? congrats to the referees coz they have a massive party tonite to celebrate!! $$$$$$$$$$$$$$$$ wow.... even can makes people blind!!! world cup??? **** off!!!

Creation Time: Sun Jun 27 17:25:32 BST 2010

•! Message: how are we suppose to be patriotic with a team that plays like that, none of them deserve the money they get, waste of time..............

Creation Time: Sun Jun 27 16:48:06 BST 2010

•! Message: john terry on england should get worst defender for the year...he's no good

Creation Time: Sun Jun 27 16:42:39 BST 2010

CRFA*9U!*\*$+#'$"*0$%7*$+5'+,#*<*0++&*

Try it yourself! - ConnectionsTest.java

CRFA*9U!*\*U%,#*U$%/+$)+,O*a%""+()%",*

#*% A.+*/%,#*!Q*

D(.M% 9"*%@N+(#*(%"#-6"6"4*#.+*!Q*-"&*"-7+*%0*#.+*',+$*2.%*/%,#+&*#.+*7+,,-4+*

-.% 9*36,#*%0*#.+*/$%D3+,*7+")%"+&*%$*#-$4+#+&*6"*#.6,*/%,#*

M&$$9+&% A.+*7+,,-4+*

0#)-2(&% !0*-8-63-@3+O*-*36"1*#%*#.+*/6(#'$+*6"(3'&+&*26#.*#.6,*/%,#*

/#,F% A.+*36"1*-:-(.+&*#%*#.6,*/%,#*

,9M&% A.+*"-7+*%0*#.+*36"1*

)90:.,_*&$)(#0:.,% A.+*(-/)%"?&+,($6/)%"**%0*#.+*36"1*H-//+-$,*@+"+-#.*#.+*36"1*"-7+K*

$.2()&% !0*-8-63-@3+O*#.+*,%'$(+*36"1*-:-(.+&*#%*#.6,*/%,#*H0%$*+;4;O*-*z-,.*%$*86&+%*D3+K*

#).,% 9*36"1*#%*-"*6(%"*$+/$+,+")"4*#.+*#G/+*%0*#.6,*/%,#*

9H(#E2:.,% 9*,#$6"4*6"&6(-)"4*2.6(.*-//36(-)%"*2-,*',+&*#%*($+-#+*#.6,*/%,#*

9):.,$% 9*36,#*%0*-8-63-@3+*-()%"*"-7+,*-"&*36"1,*H6"(3'&6"4*(%77+")"4O*3616"4*-"&*-"*

%/)%"-3*-//<,/+(6D+&*-()%"K*

/#F&$% A.+*"'7@+$*%0*361+,*%"*#.6,*/%,#*

)(&9-&*\:M&% A.+*)7+*#.+*/%,#*2-,*6"6)-33G*/'@36,.+&*

20*9-&*\:M&% A.+*)7+*%0*#.+*3-,#*(%77+"#*%"*#.6,*/%,#*

Properties

).MM&,-$% 933*%0*#.+*(%77+"#,*%"*#.6,*/%,#*

Connections

933*/$%/+$)+,*P*

(%""+()%",*%0*-*

cU%,#d*

a%$/',*T+"+$-)%"*',6"4*0-(+@%%1*

I**#:.,9/%A'&()#$&T%I2-"&,:)9:.,%

•!*$+,#$6()%",*#%*-((+,,6"4*/$68-#+*&-#-hhh*•!*9((+,,*A%1+"*$+5'6$+&*0%$*,%7+*7+#.%&,*

•!#%*/$+8+"#*-((+,,*H$+-&*%$*2$6#+K*#%*/$68-#+*&-#-*•!+;4;O*/'@36,.6"4*#%*#.+*0-(+@%%1*,%(6-3*4$-/.*

•!*X6&&6"4#%"*/$%86&+,*-*4%%&*+>/3-"-)%"*0%$*4+m"4*-((+,,*#%1+",*-#S*http://benbiddington.wordpress.com/2010/04/23/facebook-graph-api-getting-access-tokens

•!*+;4;O*0+#(.*#.+*0$6+"&,*%0*',+$*L1.-&6N-;+3@+&2+6.GM*•!*#.6,*$+5'6$+,*-'#.+")(-)%"*\*#%1+"*L>>`a`bO``O;;;M*

https://graph.facebook.com/khadija.elbedweihy/

friends&access_token=11585905509...

•!%!(1%#-%1.2($&/D3;;;*

Page 18: Ekaw2010 tutorial3 practical

0-(+@%%1*9U!*\*W+#(.6"4*e,+$*&-#-*

https://graph.facebook.com/khadija.elbedweihy

U'@36(*Q-#-*%"3G*

0-(+@%%1*9U!*\*W+#(.6"4*e,+$*&-#-*

•! fetch specific fields https://graph.facebook.com/khadija.elbedweihy?

fields=id,name,picture

V6"1*#%*#.+*

/6(#'$+*

U6(#'$+*-#*#.+*

468+"*36"1*

0-(+@%%1*9U!*\*9'#.%$6j-)%"*R>-7/3+*

9((+,,*#%1+"*2%$1,*

0%$*#.+*-'#.%$6j+&*

',+$*%"3G**

0-(+@%%1*9U!*\*9'#.%$6j-)%"*R>-7/3+*

F-7+*-((+,,*#%1+"*0%$*-*

&6{+$+"#*',+$*c*.&$%,.-%

G.(Fd*

Page 19: Ekaw2010 tutorial3 practical

0-(+@%%1*9U!*\*e,+$*W6+3&,*

#*T% A.+*',+$M,*!Q*

V($-\,9M&T% A.+*',+$M,*D$,#*"-7+*

/9$-\,9M&T% A.+*',+$M,*3-,#*"-7+*

,9M&T% A.+*',+$M,*0'33*"-7+*

9E.2-% A.+*',+$M,*@3'$@*#.-#*-//+-$,*'"&+$*#.+6$*/$%D3+*/6(#'$+*

E#(-"*91% A.+*',+$M,*@6$#.&-G*

G.(F_&*2)9:.,% 9*36,#*%0*#.+*2%$1?+&'(-)%"*.6,#%$G*0$%7*#.+*',+$M,*/$%D3+*

&M9#/T% A.+*/$%>6+&*%$*(%"#-(#*+7-63*-&&$+,,*4$-"#+&*@G*#.+*',+$*

G&E$#-&% 9*36"1*#%*#.+*',+$M,*/+$,%"-3*2+@,6#+*

".M&-.G,% A.+*',+$M,*.%7+#%2"*

/.)9:.,% A.+*',+$M,*('$$+"#*3%(-)%"*

+&,*&(% A.+*',+$M,*4+"&+$*

#,-&(&$-&*\#,% T+"&+$,*#.+*',+$*6,*6"#+$+,#+&*6"*

M&&:,+\D.(% AG/+,*%0*$+3-)%",.6/,*#.+*',+$*6,*,++16"4*

(&/9:.,$"#0\$-9-2$% A.+*',+$M,*$+3-)%",.6/*,#-#',*

(&/#+#.,% A.+*',+$M,*$+3646%"*

0-(+@%%1*9U!*\*e,+$*a%""+()%",*

".M&T% A.+*',+$M,*Z+2,*W++&;*C+5'6$+,*#.+*read_stream*/+$76,,6%"*

D&&*T% A.+*',+$M,*2-33;*C+5'6$+,*#.+*read_stream /+$76,,6%"*#%*,++*

"%"</'@36(*/%,#,;*

-9++&*T% A.+*/.%#%,O*86&+%,O*-"&*/%,#,*6"*2.6(.*#.6,*',+$*.-,*@++"*

#-44+&;*C+5'6$+,*#.+*read_stream /+$76,,6%";*

0.$-$T% A.+*',+$M,*%2"*/%,#,;*C+5'6$+,*#.+*read_stream /+$76,,6%"*

#%*,++*"%"</'@36(*/%,#,;*

0#)-2(&T% A.+*',+$M,*/$%D3+*/6(#'$+*

D(#&,*$T% A.+*',+$M,*0$6+"&,*

9):6#:&$_#,-&(&$-$_

M2$#)_E..F$_

M.6#&$_-&/&6#$#.,T%

A.+*-()86)+,?6"#+$+,#,?7',6(?@%%1,?7%86+,?#+3+86,6%"*36,#+&*%"*

#.+*',+$M,*/$%D3+*

/#F&$T% 933*#.+*/-4+,*#.6,*',+$*.-,*L361+&M;*C+5'6$+,*#.+ user_likes %$*

0riend_likes*/+$76,,6%";*

0".-.$T% A.+*/.%#%,*#.6,*',+$*6,*#-44+&*6";*C+5'6$+,*#.+*

user_photo_video_tagsO*friend_photo_video_tag,*-"&*

user_photos*%$ friend_photos*/+$76,,6%",;*

#,D.(M9:.,%&'-(9):.,%

F(+"-$6%*9"-3G,6,*

@,02-T%-*).(02$%.D%M&$$9+&$%$+3-#+&*#%*-*7-#(.*

–!*2+*"++&*#%*/6"</%6"#*$+3+8-"#*7+,,-4+,*%"*#26:+$*-"&*

0-(+@%%1*

–!%2$#,+%-G#H&(%9,*%D9)&E..F%IJ@U%2+*-//3G*).,-&,-%(&-(#&69/%9,*%V/-&(#,+*#%*@'63&*#.6,*(%$/',*

W2-02-T%-*$-"1+&*36,#*%0*(&0(&$&,-9:6&%-&(M$%*

–!*2+*-//3G*@A%9,*%KLJ%%"*#.+*(%$/',*#%*-(.6+8+*#.6,*4%-3%

a%$/',*

4+"+$-)%"*

a%"#+"#*

-"-3G,6,*@G*!R*

Page 20: Ekaw2010 tutorial3 practical

•! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U%

G&%D.//.G%-"&$&%$-&0$%

–!Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+*

•! A%1+"6,-)%"*

•! UEF*#-446"4*

–! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#**

•! /.$-,+*(.'"16"4*

–! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"**

•! #+$7*$+(%4"6)%"*

a%"#+"#*9"-3G,6,*86-*!R*

•! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U%

G&%D.//.G%-"&$&%$-&0$T*

a%"#+"#*9"-3G,6,*86-*!R*

–!Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+*

H#%1+"6,-)%"O*UEF*#-446"4K*

–! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#*

H/.$-,+*(.'"16"4O*+")#G*$+(%4"6)%"K*

–! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"*H#+$7*

$+(%4"6)%"K*

E/+"ZVU*

]9AC*

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! R.9/S*/$%(+,,*"-#'$-3*3-"4'-4+*#+>#*,'(.*#.-#*,/+(6D(*6"0%$7-)%"*(-"*@+*6&+")D+&*

–! A.+,+*/$%(+,,+,*6"(3'&+*

•! F+"#+"(+*,+47+"#-)%"*

•! A%1+"6,-)%"*

•! U-$#*%0*F/++(.*#-446"4*

•! @,02-*

–!-*,6"43+*7+,,-4+*

•! W2-02-*

–!-*,+5'+"(+*%0*UEF*#-44+&*#%1+",*

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! I,%&'9M0/&S*H3%(-#+&*6"*c&-#-?+>-7/3+,?

+>-7/3+[;#>#dK*

c8..,&1%D9#/$%-.%&,*%+.9/%*(.2+"-3%P91,&%8..,&1e$%

-(#0%-.%C.2-"%ID(#)9%NO>O%E&+9,%G#-"%"#+"%

&'0&)-9:.,$%E2-%"&%/&96&$%G#-".2-%9%$#,+/&%+.9/%

$).(&*%9f&(%-"(&&%+(.20%M9-)"&$%9,*%9%>7g%*&D&9-%

-.%R&(M9,13d*

Page 21: Ekaw2010 tutorial3 practical

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! C&,-&,)&%$&+M&,-9:.,%

–! @,02-S*-*,6"43+*7+,,-4+*

–!W2-02-S*-*36,#*%0*,+"#+"(+,*

Rooney fails to end goal drought. | Wayne Rooney's trip to South Africa 2010 began with high expectations but he leaves without a single goal scored after three group matches and a 1-4 defeat to Germany.

Try it yourself! <*F+"#+"(+F+47+"#-)%";N-8-**

/* Input */ (LINE 17)

String pathToInput = "../../data/examples/example1.txt";

String content = "…";

/* Creates an object of OpenNLP sentence segmentation detector */

SentenceDetector detector = new SentenceDetector("lib/opennlp/models/EnglishSD.bin.gz");

/* Call the actual method to identify the end offsets of sentences. */

int[] result = detector.sentPosDetect(content);

/* Print out the sentences */

int start=0, i=0;

do {

……

} while(start<result[result.length-1]);

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! C&,-&,)&%$&+M&,-9:.,%2$#,+%W0&,KLJ*

Rooney fails to end goal drought. Wayne Rooney's

trip to South Africa 2010 began with high

expectations but he leaves without a single goal scored after three group matches and a 1-4 defeat

to Germany.

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! !.F&,#$9:.,%

–! !"/'#S*-*,6"43+*,+"#+"(+O*%$*7+,,-4+*

–!E'#/'#S*-*36,#*%0*#%1+",*

Rooney fails to end goal drought

Rooney, fails, to, end, goal, drought, .

Try it yourself! <*A%1+"6,-)%";N-8-**

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! !.F&,#$9:.,%2$#,+%W0&,KLJ

/* Input text message */ (LINE 28)

String content = "…" // read in the text content from "example1.txt"

List<String> sentences = new ArrayList<String>();

……

/* Code for splitting sentences */

/*Creates an object of OpenNLPtokeniser using a pre-built English language model. */

//change the path accordingly

String pathToEngTokenisationModel = "lib/opennlp/models/EnglishTok.bin.gz"; Tokenizertokeniser tokeniser = new Tokenizer(pathToEngTokenisationModel);

/*Tokenise each sentence and print out the result*/

for(String sentence: sentences){

String[] result=tokeniser.tokenize(sentence);

for(String tok:result)

System.out.println(tok);

} Rooney fails to end goal drought.

Page 22: Ekaw2010 tutorial3 practical

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

Rooney/NNP fails/VBZ to/TO end/VB goal/NN drought/NN ./.

•! J9(-%.D%$0&&)"%-9++#,+%

–! @,02-S*-*36,#*%0*#%1+",*

–!W2-02-S*-*36,#*%0*#%1+",*26#.*#.+6$*/-$#*%0*,/++(.*#-4*

Rooney, fails, to, end, goal, drought, .

Try it yourself! <*UEFA-44+$;N-8-**

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! JWC%-9++#,+%2$#,+%W0&,KLJ*/*Input text message*/ (LINE 31)

String content = "…" //read in the text content from example1.txt

List<String> tokens = new ArrayList<String>();

/* Code for tokenisation and add the result into the list object above.

You do not need to do sentence segmentation in this case. Because the

tokenisation will detect sentence boundary as a first step*/

/*Creates an object of OpenNLP POS tagger using a pre-built English language model.*/

//change the path accordingly

String pathToEngPOSModel = "lib/opennlp/models/tag.bin.gz";

/* You MAY specify additionally two parameters for the constructor, i.e.,

TagDicionary and Dictionary.*/

PosTagger tagger = new PosTagger(pathToEngPOSModel, (Dictionary)null);

/*Tag the list of tokens and print out the result*/

String[] result=tagger.tag(tokens.toArray(new String[0]));

for (String tag: result)

System.out.println(tag);

Rooney/NNP fails/VBZ to/TO end/VB goal/NN drought/NN ./.

a%"#+"#*9"-3G,6,*\*U.$-,+*a.'"16"4*

•! R.9/S*6&+")0G6"4*6"0%$7-)%"*'"6#,*#.-#*7-1+*4%%&*(-"&6&-#+*#+$7,*%0*%'$*6"#+$+,#*

•! !"*#.6,*+>+$(6,+O*2+*0%(',*%"*,.2,%0"(9$&$%

–!2.6(.*%|+"*@+-$*67/%$#-"#*&%7-6"<,/+(6D(*6"0%$7-)%"*

•! @,02-*

–!UEF<#-44+&*#%1+",*

•! W2-02-*

–!Z%'"*/.$-,+,*

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

Rooney/NNP fails/VBZ to/TO end/VB goal/NN drought/NN ./.

•! J"(9$&%)"2,F#,+%

–! @,02-S*-*36,#*%0*JWC7-9++&*%-.F&,$%

–!W2-02-S*-*36,#*%0*/.$-,+,*H"%'",?8+$@*/.$-,+,K*

Rooney, goal drought

Try it yourself!

%\*+&6#*#.+*(3-,,*U.$-,+a.'"1+$;N-8-*-"&*$'"*

R>+$(6,+*

Page 23: Ekaw2010 tutorial3 practical

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*

(LINE 32 in PhraseChunker.java)

//initilising all required NLP processors, If you get an out of memory

//exception, try increasing your JVM heap space to at least 256MB

String pathToEngTokenisationModel = "lib/opennlp/models/EnglishTok.bin.gz";

String pathToEngPOSModel = "lib/opennlp/models/tag.bin.gz";

String pathToEngPhraseModel = "lib/opennlp/models/EnglishChunk.bin.gz";

SentenceDetector detector = new SentenceDetector("lib/opennlp/models/EnglishSD.bin.gz");

Tokenizertokeniser = new Tokenizer(pathToEngTokenisationModel);

PosTagger tagger = new PosTagger(pathToEngPOSModel, (Dictionary) null);

TreebankChunkerchunker = new TreebankChunker(pathToEngPhraseModel);

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*(LINE 44 in PhraseChunker.java)

int[] result = detector.sentPosDetect(content);

int start = 0, i = 0;

do {

//sentence splitting

String sentence = content.substring(start, result[i]);

//TODO: tokenization, put tokens in a String array. Hint:

//Tokenisation.java

String[] tokens = null;

//TODO: POS tagging, put tags in a String array. Hint: POSTagger.java

String[] tags = null;

//This is the method you use to chunk phrases on a list of tokens and

//a list of tags

String[] phrases = chunker.chunk(tokens, tags);

//See the result

for(String p:phrases)

System.out.println(p);

……

start = result[i];

i++;

} while (start < result[result.length - 1]);

(LINE 44 in PhraseChunker.java)

int[] result = detector.sentPosDetect(content);

int start = 0, i = 0;

do {

//sentence splitting

String sentence = content.substring(start, result[i]);

//TODO: tokenization, put tokens in a String array.

String[] tokens=null;

//TODO: POStagging, put tags in a String array. Hint: POSTagger.java

String[] tags = null;

//This is the method you use to chunk phrases on a list of tokens and

//a list of tags

String[] phrases = chunker.chunk(tokens, tags);

//See the result

for (int k = 0; k < phrases.length; k++) {

System.out.println(phrases[k] + "\t\t" + tokens[k]);

}

……

start = result[i];

i++;

} while (start < result[result.length - 1]);

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*

A.+*$+,'3#*6,*"%#*+>-(#3G*#.+*/.$-,+,*2+*

+>/+(#+&O*@'#*-*36,#*%0*c#-4,dO*2.6(.*-$+*

(%77%"3G*',+&*6"*ZVU*/.$-,+*

(.'"16"4S*

X<ZU*****C%%"+G * *C%%"+G*

X<}U******0-63,*

!<}U*******#% * * *0-63,*#%*+"&*

!<}U*******+"&*

X<ZU*****4%-3*

!<ZU******&$%'4.# * *4%-3*&$%'4.#*

B – “begin” I – “inside” NP – “Noun phrase” VP – “Verb phrase”

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*

•! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ*(LINE 78 in PhraseChunker.java)

String npstart = "B-NP";

String vpstart = "B-VP";

String npcontinue = "I-NP";

String vpcontinue = "I-VP";

String other = "O";

String phrase = "";

for (int n = 0; n < tokens.length; n++) {

if (phrases[n].equals(npstart) || phrases[n].equals(vpstart)) {

phrase = tokens[n];

for (int m = n + 1; m < tokens.length; m++) {

if (phrases[m].equals(npcontinue) ||

phrases[m].equals(vpcontinue)) {

phrase = phrase+" "+tokens[m];

} else {

System.out.println("Actual phrase: "+phrase);

phrase = "";

break;

...

}

a%&+*0$%7*36"+*It*%"2-$&,*/$%(+,,+,*

#.6,*$+,'3#*-"&*4+"+$-#+,*#.+*$+-3*

/.$-,+,*

Page 24: Ekaw2010 tutorial3 practical

a%"#+"#*9"-3G,6,*\*Z-#'$-3*V-"4'-4+*9"-3G,6,*•! J"(9$&%)"2,F#,+%2$#,+%W0&,KLJ%

–! A.+*-",2+$b;*

B-NP Rooney B-VP fails I-VP to I-VP end B-NP goal I-NP drought O . Actual phrase: Rooney Actual phrase: fails to end Actual phrase: goal drought

(LINE 44 in PhraseChunker.java)

int[] result = detector.sentPosDetect(content);

int start = 0, i = 0;

do {

//sentence splitting

String sentence = content.substring(start, result[i]);

//TODO: tokenization, put tokens in a String array.

String[] tokens=tokeniser.tokenize(sentence);

//TODO: pos tagging, put tags in a String array.

String[] tags = tagger.tag(tokens);

//This is the method you use to chunk phrases on a list of tokens //and a list of tags

String[] phrases = chunker.chunk(tokens, tags);

//See the result

for(String p:phrases)

System.out.println(p);

……

start = result[i];

i++;

} while (start < result[result.length - 1]);

g%$+*+>+$(6,+,*60*G%'*-$+*6"#+$+,#+&*

•! C+/+-#*/$+86%',*#-,1,*',6"4*#.+*(%$/',*4+"+$-#+&*',6"4*#.+*#26:+$*-"&*0-(+@%%1*9U!,*

•! A$GS*

–! F+"#+"(+*,+47+"#-)%"*

–! A%1+"6,-)%"*

–! U-$#<%0<,/++(.*#-446"4*

–! U.$-,+*(.'"16"4*

•! !.%9,9/1$&%-"&%).,-&,-%9,*%&'-(9)-%#M0.(-9,-%-&(M$U%

G&%D.//.G%-"&$&%$-&0$%

–!Z-#'$-3*3-"4'-4+*-"-3G,+,*%0*+-(.*7+,,-4+*

H#%1+"6,-)%"O*UEF*#-446"4K*

–! !&+")0G*(-"&6&-#+*6"0%$7-)%"*'"6#,*%0*6"#+$+,#*

H/.$-,+*(.'"16"4O*+")#G*$+(%4"6)%"K*

–! !&+")0G*,#-),)(-33G*67/%$#-"#*6"0%$7-)%"*H#+$7*

$+(%4"6)%"K*

Z+>#* Q%7-6"*A+$7*C+(%4"6)%"*

•! R.9/S*+>#$-(#*,#-),)(-33G*,64"6D(-"#*#+$7,O*2.6(.*

(%33+()8+3G*&+#+$76"+*#.+*,'77-$G*%0*#.+*7-#(.*

•! 8&)90T**&%7-6"*#+$7*$+(%4"6)%"*/$%(+&'$+*

–!KLJ%0(.)&$$&$%#%*6&+")0G*(-"&6&-#+*3+>6(%",O*+;4;O*"%'"</.$-,+,O*+"))+,*

–! C-9:$:)9/%M&9$2(&$%#%*+8-3'-#+*#.+*,64"6D(-"(+*%0*

(-"&6&-#+*3+>6(%",*

•! #+$7*0$+5'+"(G~*�<6&0~*2+6$&"+,,O*43%,,+>O*(<8-3'+O*

#+$7+>*

Page 25: Ekaw2010 tutorial3 practical

Q%7-6"*A+$7*C+(%4"6)%"*

•! R.9/S*+>#$-(#*,#-),)(-33G*,64"6D(-"#*#+$7,O*2.6(.*

(%33+()8+3G*&+#+$76"+*#.+*,'77-$G*%0*#.+*7-#(.*

•! 8&)90T**&%7-6"*#+$7*$+(%4"6)%"*/$%(+&'$+*

–!KLJ%0(.)&$$&$%#%*6&+")0G*(-"&6&-#+*3+>6(%",O*+;4;O*"%'"</.$-,+,O*+"))+,*

–! C-9:$:)9/%M&9$2(&$%#%*+8-3'-#+*#.+*,64"6D(-"(+*%0*

(-"&6&-#+*3+>6(%",*

•! #+$7*0$+5'+"(G~*�<6&0~*2+6$&"+,,O*43%,,+>O*(<8-3'+O*

#+$7+>*

]9AC*\*]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#*

•! ;I!8%%

–! ]-8-<@-,+&*#%%316#*0%$*&+8+3%/6"4*-"&*#+,)"4*&%7-6"*

#+$7*$+(%4"6)%"*-34%$6#.7,*

•! B$&%;I!8%-.%

–!+>#$-(#*&%7-6"*#+$7,*0$%7*-*(%33+()%"*%0*

&%('7+"#,*

•! J*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&*

–! 67/3+7+"#*-&&6)%"-3*-34%$6#.7,*

–! +8-3'-#+**&6{+$+"#*-34%$6#.7,*'"&+$*#.+*,-7+*

0$-7+2%$1*

]9AC*\*]-8-*9'#%7-)(*A+$7*C+(%4"6)%"*#%%316#*

•! ;I!8%6,*-*]-8-<@-,+&*#%%316#*0%$*&+8+3%/6"4*-"&*#+,)"4*&%7-6"*#+$7*$+(%4"6)%"*-34%$6#.7,*

•! B$&%;I!8%-.%

–!+>#$-(#*&%7-6"*#+$7,*0$%7*-*(%33+()%"*%0*

&%('7+"#,*

•! J*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&*

–! 67/3+7+"#*-&&6)%"-3*-34%$6#.7,*

–! +8-3'-#+**&6{+$+"#*-34%$6#.7,*'"&+$*#.+*,-7+*

0$-7+2%$1*

!"*#.+*0%33%26"4*+>+$(6,+O*G%'*2633*',+*

#.+,+*-34%$6#.7,*-"&*(%7/-$+*#.+*$+,'3#,**

9&8-"(+&*#%/6(*\*2633*@+*

(%8+$+&*@$6+zG*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! ;I!8%*

–! @-,6(*D(&h2&,)1%M&9$2(&*

–! J*-&&6)%"-3*,#-#+<%0<#.+<-$#*-34%$6#.7,*67/3+7+"#+&%

•! A+$7*0$+5'+"(G*6"8+$,+*&%('7+"#*0$+5'+"(G*H�<6&0K*

•! a<}-3'+*

•!`+6$&"+,,*

•! T3%,,-$G*+>#$-()%"*HT3%,,+>K*

•! A+$7*+>#$-(#%$*HA+$7+>K*

•! (-"*@+*',+&*-,*-*(%77-"&<36"+*@-,+&*-//36(-)%"*

@-(14$%'"&*6,*(%8+$+&*6"*

#.+%$G*,36&+,*H�[vl<*[IvK*

Page 26: Ekaw2010 tutorial3 practical

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! ^.G%-.%2$&%;I!8%*

–! V%(-#+*G%'$*]9AC*0%3&+$*

–! (%"D4'$+*G%'$*-//36(-)%"*6"*Z9-(30(.0&(:&$*6"*pG%'$=N-#$q?#+,#*

•! N-#$;,G,#+7;"3/rpG%'$=N-#$q?"3/=$+,%'$(+,*

*9((+,,*#%*ZVU*#%%3,*$+5'6$+&*@G*]9AC*

•! N-#$;,G,#+7;#+$7;7->2%$&,rJ*

*g->67'7*"'7@+$*%0*2%$&,*6"*-*#+$7*

•! N-#$;,G,#+7;#+$7;64"%$+=&646#,r#$'+*

*a-"*-*#+$7*(%"#-6"*&646#,k %%

–! (%/G*/$%/+$)+,*D3+,*#%*pG%'$=N-#$q?(3-,,+,*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! ^.G%-.%2$&%;I!8%*

–! ,#-$#*#.+*-//36(-)%"O*+;4;O*#.+*0$+5'+"(G*7+-,'$+*uk.ac.shef.wit.jatr.debug.TestFrequency

–! 26#.*76"67'7*7+7%$G*�7>J[^7*

–! (%/G*N-#$*-"&*3%4Y*/$%/+$)+,*D3+,*#%*G%'$*(3-,,+,*0%3&+$*-|+$*(3+-"*-"&*$+(%7/63+*

–! ,++*D3+*L5'6(1,#-$#;#>#M*6"*]9AC*0%3&+$*0%$*-&&6)%"-3*6"0%$7-)%"*

–! #+,#*26#.*#.+*#26:+$*-"&*0-(+@%%1*(%$/%$-*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

82,,#,+%-"&%-&$-$%G#-"%9,-%

•! -"#*,($6/#*,+#*'/*#%*$'"*0$%7*0%3&+$*pG%'$=N-#$q?#+,#*

•! &+0-'3#*-$4'7+"#,*

–! /-#.=#%=(%$/',*r*0%3&+$*6"*pG%'$=N-#$q?#+,#?w)"Gw*

–! /-#.=#%=$+0+$+"(+=(%$/',=,#-#,*r*pG%'$=N-#$q?w"3/=$+,%'$(+,?@"(='"60$5,;"%$7-3w*

•! #%*',+*-3#+$"-)8+*-$4,*+"#+$*%"+*%$*@%#.*%0*–! ant

-Dpath_to_corpus=alt_corpus_path -Dpath_to_reference_corpus_stats=alt_reference_corpus_stats_path

•! %'#/'#*#%*#+,#*0%3&+$*

–! /-:+$"S*!"#$%&'()*+),*B*9AC=9VTEC!Ang;#>#*

–! %$*$'"*9VV*#+,#,*@G*(-336"4*LI/+.(#-"M!&$-&(M*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

82,,#,+%-"&%-&$-$%G#-"%9,-*

Page 27: Ekaw2010 tutorial3 practical

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! B,*&($-9,*#,+%-"&%.2-02-*

–! A.+*/$%(+,,*%0*#.+*-//36(-)%"*6,*3%44+&*6"*cN-#$;3%4d*

–! A.+*$+,'3#,*-$+*%'#/'#*#%*-*D3+*(-33+&**

*p-34%$6#.7="-7+q=9AC=934%$6#.7;#>#O*+;4;O*

cF67/3+=#+$7=0$+5'+"(G=9AC=9VTEC!Ang;#>#d*

–! 2.6(.*(%"#-6",*$-"1+&*36,#*%0*#+$7,*+>#$-(#+&*0$%7*#.+*

(%$/',O*%"+*#+$7*/+$*36"+S*

2%$3&('/*�`ECVQaeU*�`%$3&a'/*�2%$3&('/*�`%$3&('/ ***[u^Y;_*

A.+*D$,#*#+$7*

6,*#.+*

(-"%"6(-3*0%$7*

%0*-33*%0*6#,*

8-$6-"#,**

A.+*%#.+$*#+$7,*-$+*

#.+*8-$6-"#,*0%'"&*6"*

#.+*(%$/',*

A.+*"'7@+$*6,*

#.+*(-3('3-#+&*

,(%$+*0%$*#.-#*

#+$7**

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi*

–! 3%%1*-#*$'"HK*7+#.%&*6"*uk.ac.shef.wit.jatr.debug.TestFrequency.java

Part 1: Extracting candidate terms by NLP

//stop word list

StopList stop = new StopList(true);

//lemmatiser

Lemmatiser lemmatizer = new Lemmatiser();

//noun phrase extractor

CandidateTermExtractornpextractor = new

NounPhraseExtractorOpenNLP(stop, lemmatizer);

……

9*c,#%/*2%$&d*36,#*6,*

',+&*#%*$+7%8+*"%6,+*

2%$&,O*+;4;O*L#.+MO*L-"&M*

V+77-),-)%"*6,*',+&*#%*

"%$7-36,+*#+$7,*#%*#.+6$*

(-"%"6(-3*0%$7,*H,++*#.+%$G*

,36&+,*[ll<*[lvK*

]9AC*',+,*-*&+0-'3#*%/+"<"3/*

@-,+&*"%'"*/.$-,+*(.'"1+$*#%*

+>#$-(#*(-"&6&-#+*#+$7,*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi*

–! 3%%1*-#*$'"HK*7+#.%&*6"*uk.ac.shef.wit.jatr.debug.TestFrequency.java

Part 1: Extracting candidate terms by NLP

//stop word list

StopList stop = new StopList(true);

//lemmatiser

Lemmatiserlemmatizer = new Lemmatiser();

//noun phrase extractor

CandidateTermExtractornpextractor = new

NounPhraseExtractorOpenNLP(stop, lemmatizer);

……

Rooney, fails, to, end, goal, drought, . 9*c,#%/*2%$&d*36,#*

6,*',+&*#%*$+7%8+*

"%6,+*2%$&,*

V+77-),-)%"*6,*',+&*#%*

"%$7-36,+*#+$7,*#%*#.+6$*

(-"%"6(-3*0%$7,*H,++*#.+%$G*

,36&+,*[ll<*[lvK*

]9AC*',+,*-*&+0-'3#*%/+"<"3/*

@-,+&*"%'"*/.$-,+*(.'"1+$*#%*

+>#$-(#*(-"&6&-#+*#+$7,*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi*

Part 1: Extracting candidate terms by NLP cont.

TermFreqCounter npcounter = new TermFreqCounter();

WordCounter wordcounter = new WordCounter();

//create global resource index builder, which indexes

global resources,

//such as documents and terms and their relations

GlobalResourceIndexBuilder builder = new

GlobalResourceIndexBuilder();

//build the global resource index

GlobalResourceIndex termDocIndex = builder.build(new

CorpusImpl(args[0]), npextractor);

….

U$%(+,,%$,*

$+5'6$+&*0%$*

(%'")"4*#+$7*

0$+5'+"(6+,*

W%$*6"&+>6"4*#+$7,*

-"&*&%('7+"#,*

!"8%16"4*ZVU*/$%(+,,+,*#%*$+-&*6"*

&%('7+"#,O*,+47+"#*,+"#+"(+,O*

-//3G*#%1+"6,-)%"O*UEF*#-446"4O*

-"&*/.$-,+*(.'"16"4*

Page 28: Ekaw2010 tutorial3 practical

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! B,*&($-9,*#,+%-"&%0(.)&$$%5%".G%*.&$%#-%G.(Fi*

Part 2: Apply statistical analyse on extracted terms.

FeatureCorpusTermFrequency termCorpusFreq =

new FeatureBuilderCorpusTermFrequency(npcounter,

wordcounter, lemmatizer).build(termDocIndex);

AlgorithmTester tester = new AlgorithmTester();

tester.registerAlgorithm(new FrequencyAlgorithm(), new

FrequencyFeatureWrapper(termCorpusFreq));

tester.execute(termDocIndex);

System.out.println("Ended at: " + new Date());

a$+-#+*0+-#'$+,*

$+5'6$+&*@G*#.6,*

/-$)('3-$*

-34%$6#.7*

a$+-#+*-"*6",#-"(+*%0*#.+*

-34%$6#.7*0%$*#+,)"4*-"&*

36"1*#%*6#,*$+5'6$+&*

0+-#'$+,*!"8%1+*,#-),)(-3*-"-3G,6,O*6;+;O*

(%7/'#+*#.+*,(%$+*',6"4*#.+*

-34%$6#.76(*0%$7'3-*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! ^.G%-.%2$&%;I!8%\*,'77-$G*

–! A%*$'"*%#.+$*-34%$6#.7,O*$+/3-(+*#.+*-34%$6#.7*#+,#+$*(3-,,*

6"*#.+*(%77-"&S**

*N-8-*p7+7%$G=(%"D4q*<(3-,,/-#.*p-33=N-$=D3+,q*

'1;-(;,.+0;26#;N-#$;&+@'4;p&+,6$+&=-34%$6#.7=#+,#+$q*

*p/-#.=#%=%'$=2%$3&=('/=(%$/',q*

–! F%'$(+*(%&+*-"&*N-8-&%(*-$+*-8-63-@3+*

–! F%7+*-34%$6#.7,*7-G*-,1*0%$*-"*-&&6)%"-3*/-$-7+#+$*6"*

#.+*(%77-"&S*p/-#.=#%=$+0+$+"(+=(%$/',=,#-#,q*

•! #.+,+*-34%$6#.7,*',+*$+0+$+")-3*(%$/',*,#-),)(,*#%*(%7/'#+*

#.+*c#+$7"+,,d*

•! ',+*#.+*D3+*c@"(='"60$5,;"%$7-3d*H,#-#,*%0*#.+*X$6),.*

Z-)%"-3*a%$/',K*'"&+$*cpG%'$=N-#$q?"3/=$+,%'$(+,q*.+$+*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! j.(&%&'&()#$&$*

–!A$G*-33*#.+*-34%$6#.7,*%"*#.+*&6{+$+"#*(%$/%$-*

/$%86&+&*0%$*G%'*

•! ',6"4*#.+*`%$3&*a'/*(%$/',S*(%$/',*H#26:+$*P*0-(+@%%1K*

•! #$G*#.+*`616/+&6-*(%$/',*0%$*-$)(3+,*-@%'#*-"67-3,*&-#-*

H-"67-3(%$/',K*

–!(%7/-$+*#.+*+{+(#*%0*&6{+$+"#*(%$/%$-*%"*-(('$-(G*

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! I*69,)&*%-.0#)%5%*&6&/.0M&,-%2$#,+%;I!8*

–!A%*&+8+3%/*"+2*-34%$6#.7,*',6"4*]9ACO*G%'*7',#*

•! !7/3+7+"#*G%'$*%2"*-34%$6#.7O*67/3+7+")"4*

#.+*6"#+$0-(+*uk.ac.shef.wit.jatr.core.algorithm.Algorithm

•! !7/3+7+"#*G%'$*%2"*-34%$6#.7*0+-#'$+*

2$-//+$*

–!#%*0+#(.*0+-#'$+,*$+5'6$+&*@G*G%'$*-34%$6#.7*

–!G%'$*(3-,,*7',#*+>#+"&*uk.ac.shef.wit.jatr.core.algorithm.Abstrac

tFeatureWrapper

•! 36,#*%0*+>-7/3+,*(-"*@+*0%'"&*6"*#.+*/-(1-4+*uk.ac.shef.wit.jatr.core.algorithm

Page 29: Ekaw2010 tutorial3 practical

Q%7-6"*A+$7*C+(%4"6)%"*',6"4*]9AC*

•! I*69,)&*%-.0#)%5%*&6&/.0M&,-%2$#,+%;I!8*

–! !7/3+7+"#*"+2*0+-#'$+,*<*0%$*+-(.*($+-#+**

•! -*"+2*(3-,,*+>#+"&6"4*uk.ac.shef.wit.jatr.core.feature.AbstractFeature*

•! -"%#.+$*(3-,,*+>#+"&6"4**uk.ac.shef.wit.jatr.core.feature.AbstractFeature

Builder

–! -*36,#*%0*+>-7/3+,*(-"*@+*0%'"&*6"*#.+*/-(1-4+*uk.ac.shef.wit.jatr.core.feature

–! A$G*%#.+$*ZVU*#%%3,*

–! a$+-#+*G%'$*%2"*7+#.%&,*0%$*+>#$-()"4*(-"&6&-#+*#+$7,*H+;4;O*

"<4$-7*6",#+-&*%0*"%'"*/.$-,+,K*

•! ,++*uk.ac.shef.wit.jatr.core.npextractor

A.+*R"&*<*F'77-$G*

•! @,%-"#$%&'&()#$&%G&%"96&*

–! V+-$"#*#%*',+*#.+*0-(+@%%1*-"&*#26:+$*9U!,**

•! #%*(%33+(#*6"#+$+,)"4*&-#-*0%$*,/+(6D(*-//36(-)%"*/'$/%,+*

–! V+-$"#*#%*',+*E/+"ZVU**

•! #%*/+$0%$7*@-,6(*ZVU*#-,1,*

–! V+-$"#*#%*',+*]9AC**

•! #%*/+$0%$7*&%7-6"*#+$7*$+(%4"6)%"*0$%7*-*(%$/',*

–! A+,#+&*E/+"ZVU*-"&*]9AC*%"**

•! -*0-(+@%%1*(%$/',**

•! -*#26:+$*(%$/',*

A.+*R"&*\*W6"-3*`%$&,*

•! =,.G/&*+&%9)h2#$#:.,%D(.M%$.)#9/%

,&-G.(F#,+%$#-&$%#$%)"9//&,+#,+*

–!R>+$(6,+,*,.%2*#.-#*#,D.(M9/%/9,+29+&%-"&*$".(-U%

-&($&%M&$$9+&$*(-',+*6"-(('$-(6+,*6"*$+,'3#,*

–!A.6,*6,*.%2+8+$*"%#*-*$+-3<2%$3&*-//36(-)%"*

•! 3-$4+$*&-#-*,+#,O*7%$+*8-$6+&*&-#-*"+(+,,-$G*#%*

-//$+(6-#+*0'33*,(-3+*%0*(.-33+"4+,*

–!n%2*#%*D3#+$*$+-3*',+0'3*#+$7,*0$%7*#.+*$+,'3#*

-((%$&6"4*#%*',+$*6"#+$+,#k*

–!n%2*#%*36"1*#.+*#+$7,*#%*#.+6$*(%"#+>#*,%*#.+G*7-1+*

,+",+k*

–!-"&*7-"G*7%$+*5'+,)%",*#%*(%",6&+$b*

A.+*R"&*\*9*X64*A.-"1*i%'h*

A.-"1*G%'*8+$G*7'(.*0%$*

-:+"&6"4*#.6,*#'#%$6-3h*