Mongo db &_spark
-
Upload
bryan-reinero -
Category
Software
-
view
1.255 -
download
0
Transcript of Mongo db &_spark
Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e
Lineage
Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e
Lineage
Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e Transform
Transform ActionParalleliz
e
Lineage
{"_id" : ObjectId("4f16fc97d1e2d32371003e27"),"body" : "the scrimmage is still up in the air.
"subFolder" : "notes_inbox","mailbox" : "bass-e","filename" : "450.","headers" : {
"X-cc" : "","From" : "[email protected]","Subject" : "Re: Plays and other information","X-Folder" : "\\Eric_Bass_Dec2000\\Notes Folders\\Notes inbox","Content-Transfer-Encoding" : "7bit","X-bcc" : "","To" : "[email protected]","X-Origin" : "Bass-E","X-FileName" : "ebass.nsf","X-From" : "Michael Simmons","Date" : "Tue, 14 Nov 2000 08:22:00 -0800 (PST)","X-To" : "Eric Bass","Message-ID" : "<6884142.1075854677416.JavaMail.evans@thyme>","Content-Type" : "text/plain; charset=us-ascii","Mime-Version" : "1.0"
}}
{ "_id" : "[email protected]|[email protected]", "value" : 2}{ "_id" : "[email protected]|[email protected]", "value" : 2}{ "_id" : "[email protected]|[email protected]", "value" : 2 }
Spark ConfigurationConfiguration conf = new Configuration();conf.set(
"mongo.job.input.format", "com.mongodb.hadoop.MongoInputFormat”);conf.set(
"mongo.input.uri", "mongodb://localhost:27017/db.collection”);
Spark ContextJavaPairRDD<Object, BSONObject> documents = context.newAPIHadoopRDD( conf,
MongoInputFormat.class,Object.class,BSONObject.class
);
Spark ContextJavaPairRDD<Object, BSONObject> documents = context.newAPIHadoopRDD( conf,
MongoInputFormat.class,Object.class,BSONObject.class
);
Spark ContextJavaPairRDD<Object, BSONObject> documents = context.newAPIHadoopRDD( conf,
MongoInputFormat.class,Object.class,BSONObject.class
);
Spark ContextJavaPairRDD<Object, BSONObject> documents = context.newAPIHadoopRDD( conf,
MongoInputFormat.class,Object.class,BSONObject.class
);
Spark ContextJavaPairRDD<Object, BSONObject> documents = context.newAPIHadoopRDD( conf,
MongoInputFormat.class,Object.class,BSONObject.class
);
Spark Submit/usr/local/spark-1.5.1/bin/spark-submit \ --class com.mongodb.spark.examples.DataframeExample \ --master local Examples-1.0-SNAPSHOT.jar
JavaRDD<Message> messages = documents.map (
new Function<Tuple2<Object, BSONObject>, Message>() {
public Message call(Tuple2<Object, BSONObject> tuple) { BSONObject header = (BSONObject)tuple._2.get("headers");
Message m = new Message(); m.setTo( (String) header.get("To") ); m.setX_From( (String) header.get("From") ); m.setMessage_ID( (String) header.get( "Message-ID" ) ); m.setBody( (String) tuple._2.get( "body" ) );
return m; } });
THANKS!{ Name: ‘Bryan Reinero’,
Title: ‘Developer Advocate’,
Twitter: ‘@blimpyacht’,Email: