!"#$%& (%) *+,-./#%& 0,("1 2,,-#3($.%4 56(,7+" 89
:;8<;=
5.@"4+ 56(,7+"4 8
I%7".)@3$.%
5.@"4+ I%7".)@3$.%
: I%7".)@3$.% 7. P().., (%) 76+ P().., Q3.4/47+H P().., 2"36#7+37@"+ (%) P*O0 9
I%7".)@3$.% 7. P()..,
IH,."$%& K+-($.%(- *(7( E#76 2,(36+ 0S.., U I%7".)@3$.% 7. IH,(-( (%) P#B+ < V R.)+-#%& (%) R(%(%& *(7( E#76 IH,(-( (%) P#B+ *(7( O."H(74 W *(7( O#-+ L("$$.%#%& T
IH,."$%& (%) R.)+-#%& 07"@37@"+) *(7(
=
5(,7@"#%& *(7( E#76 2,(36+ O-@H+
8; 88 8: !" 8U 8< 8V 8W
0,("1 J(4#34 !."1#%& E#76 K**4 #% 0,("1 2&&"+&($%& *(7( E#76 L(#" K**4 :*';34 /3. &-8<1='34 78/*9 >88<'2/;13( L("(--+- L".3+44#%& #% 0,("1 0,("1 K** L+"4#47+%3+ 5.HH.% L(F+"%4 #% 0,("1 *(7( L".3+44#%& 0,("1 0MN (%) *(7(O"(H+4
8T
5.%3-@4#.%
I%&+4$%& 07"+(H#%& *(7(
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
5.@"4+ 5.%3-@4#.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$
5.@"4+ 56(,7+"4 8
I%7".)@3$.%
5.@"4+ I%7".)@3$.%
: I%7".)@3$.% 7. P().., (%) 76+ P().., Q3.4/47+H P().., 2"36#7+37@"+ (%) P*O0 9
I%7".)@3$.% 7. P()..,
IH,."$%& K+-($.%(- *(7( E#76 2,(36+ 0S.., U I%7".)@3$.% 7. IH,(-( (%) P#B+ < V R.)+-#%& (%) R(%(%& *(7( E#76 IH,(-( (%) P#B+ *(7( O."H(74 W *(7( O#-+ L("$$.%#%& T
IH,."$%& (%) R.)+-#%& 07"@37@"+) *(7(
=
5(,7@"#%& *(7( E#76 2,(36+ O-@H+
8; 88 8: !" 8U 8< 8V 8W
0,("1 J(4#34 !."1#%& E#76 K**4 #% 0,("1 2&&"+&($%& *(7( E#76 L(#" K**4 :*';34 /3. &-8<1='34 78/*9 >88<'2/;13( L("(--+- L".3+44#%& #% 0,("1 0,("1 K** L+"4#47+%3+ 5.HH.% L(F+"%4 #% 0,("1 *(7( L".3+44#%& 0,("1 0MN (%) *(7(O"(H+4
8T
5.%3-@4#.%
I%&+4$%& 07"+(H#%& *(7(
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
5.@"4+ 5.%3-@4#.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$
0,("1 .% ( 5-@47+" ?3 )6'( 26/8)-* =1, 5'<< <-/*3 !
@15 )1 5*')- / 78/*9 >88<'2/;13
!
@15 )1 *,3 / 78/*9 >88<'2/;13 1* )6- 78/*9 76-<< 13 / A>BC 2<,()-*
!
@15 )1 /22-(( /3. ,(- )6- 78/*9 >88<'2/;13 :-+ D?
!
@15 )1 213E4,*- /88<'2/;13 8*18-*;-( /3. <144'34
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
78/*9 >88<'2/;13( G(H 78/*9 76-<<
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.% P.H+E."1
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
5.%3-@4#.%
!
P.H+E."1^^ 5.%_&@"+ ( 0,("1 2,,-#3($.% P.H+E."1
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#F
0,("1 06+-- B4A 0,("1 2,,-#3($.%4 !
!
J6- 78/*9 76-<< /<<15( '3)-*/2;G- -K8<1*/;13 /3. L/3'8,;13 1M ./)/ – KQLN @4#%& L/76.% ." 03(-( 78/*9 /88<'2/;13( *,3 /( '3.-8-3.-3) 8*14*/L( – L/76.%` 03(-(` ." [(B( – +A&A` QXN ,".3+44#%&` 07"+(H#%&` (%) 4. .%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#I
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
O*-/;34 )6- 78/*9O13)-K)
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
5.%3-@4#.%
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#N
X6+ 0,("15.%7+Y7 !
!
QG-*= 78/*9 8*14*/L 3--.( / 78/*9O13)-K) – X6+ #%7+"(3$B+ 46+-- 3"+(7+4 .%+ G." /.@ ?3 =1,* 153 78/*9 /88<'2/;13 =1, 2*-/)- =1,* 153 78/*9O13)-K) – C(H+) sc D/ 3.%B+%$.% – 5(-- sc.stop E6+% ,".&"(H 7+"H#%(7+4
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#P
L/76.% QY(H,-+^ !.")5.@%7 import sys from pyspark import SparkContext if __name__ == "__main__": if len(sys.argv) < 2: print >> sys.stderr, "Usage: WordCount " exit(-1) sc = SparkContext() counts = sc.textFile(sys.argv[1]) \ .flatMap(lambda line: line.split()) \ .map(lambda word: (word,1)) \ .reduceByKey(lambda v1,v2: v1+v2) for pair in counts.take(5): print pair sc.stop()
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#R
03(-( QY(H,-+^ !.")5.@%7 import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ object WordCount { def main(args: Array[String]) { if (args.length < 1) { System.err.println("Usage: WordCount ") System.exit(1) } val sc = new SparkContext() val counts = sc.textFile(args(0)). flatMap(line => line.split("\\W")). map(word => (word,1)).reduceByKey(_ + _) counts.take(5).foreach(println) sc.stop() } } > 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#S
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
U,'<.'34 / 78/*9 >88<'2/;13 V72/ /3. W/G/X
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
5.%3-@4#.%
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!T
J@#-)#%& ( 0,("1 2,,-#3($.%^ 03(-( ." [(B( !
!
72/ 1* W/G/ 78/*9 /88<'2/;13( L,() +- 21L8'<-. /3. /((-L+<-. '3)1 W>B E<-( – [2K _-+ E#-- D+ ,(44+) 7. E."1+" %.)+4 >8/26- Y/G-3 '( / 818,* +,'<. )11< – O." 4,+3#_3 4+a%& "+3.HH+%)($.%4` 4++ http://spark.apache.org/docs/latest/buildingwith-maven.html
!
!
U,'<. .-)/'<( 5'<< .'Z-* .-8-3.'34 13 – b+"4#.% .G P().., ZP*O0\ – *+,-./H+%7 ,-(c."H Z0,("1 07(%)(-.%+` d2KC` R+4.4\ O13('.-* ,('34 /3 ?&Q – I%7+--#[ ." Q3-#,4+ ("+ 7E. ,.,@-(" +Y(H,-+4 – 5(% "@% 0,("1 -.3(--/ #% ( )+D@&&+" > 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!!
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
B,33'34 / 78/*9 >88<'2/;13
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
5.%3-@4#.%
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!$
K@%%#%& ( 0,("1 2,,-#3($.% !
J6- -/('-() 5/= )1 *,3 / 78/*9 >88<'2/;13 '( ,('34 )6- spark-submit (2*'8)
L/76.%
$ spark-submit WordCount.py fileURL
03(-(
$ spark-submit --class WordCount \ MyJarFile.jar fileURL
[(B(
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!"
0,("1 2,,-#3($.% 5-@47+" e,$.%4 !
78/*9 2/3 *,3 – N.3(--/ – C. )#47"#D@7+) ,".3+44#%& – N.3(--/ E#76 H@-$,-+ E."1+" 76"+()4 – e% ( 3-@47+"
!
[12/< L1.- '( ,(-M,< M1* .-G-<18L-3) /3. )-(;34
!
0*1.,2;13 ,(- '( /
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!F
0@,,."7+) 5-@47+" K+4.@"3+ R(%(&+"4 !
!
!
@/.118 A>BC – I%3-@)+) #% 5*P – R.47 3.HH.% G." ,".)@3$.% 4#7+4 – 2--.E4 46("#%& 3-@47+" "+4.@"3+4 E#76 .76+" (,,-#3($.%4 Z+A&A R(,K+)@3+` IH,(-(\ 78/*9 7)/3./<13– I%3-@)+) E#76 0,("1 – Q(4/ 7. #%47(-- (%) "@% – N#H#7+) 3.%_&@"(D#-#7/ (%) 43(-(D#-#7/ – ]4+G@- G." 7+4$%&` )+B+-.,H+%7` ." 4H(-- 4/47+H4 >8/26- Y-(1( – O#"47 ,-(c."H 4@,,."7+) D/ 0,("1 – C.E @4+) -+44 .f+% > 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!I
P.E 0,("1 K@%4 .% d2KC^ 5-#+%7 R.)+ Z8\ C.)+R(%(&+" *"#B+" L".&"(H
*(7(C.)+ QY+3@7."
0,("1 5.%7+Y7 C.)+R(%(&+" QY+3@7."
K+4.@"3+ R(%(&+"
*(7(C.)+ QY+3@7."
C(H+ C.)+ C.)+R(%(&+"
*(7(C.)+
>88<'2/;13 Y/()-* !
C.)+R(%(&+"
*(7(C.)+
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!N
P.E 0,("1 K@%4 .% d2KC^ 5-#+%7 R.)+ Z:\ C.)+R(%(&+" *"#B+" L".&"(H
*(7(C.)+ QY+3@7."
0,("1 5.%7+Y7 C.)+R(%(&+" QY+3@7."
K+4.@"3+ R(%(&+"
*(7(C.)+ QY+3@7."
C(H+ C.)+ C.)+R(%(&+"
*(7(C.)+
>88<'2/;13 Y/()-* !
C.)+R(%(&+"
*(7(C.)+
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!P
P.E 0,("1 K@%4 .% d2KC^ 5-#+%7 R.)+ Z9\ C.)+R(%(&+" *"#B+" L".&"(H
*(7(C.)+ QY+3@7."
0,("1 5.%7+Y7 C.)+R(%(&+" QY+3@7."
QY+3@7."
C(H+ C.)+
K+4.@"3+ R(%(&+"
C.)+R(%(&+"
>88<'2/;13 Y/()-* !
*"#B+" L".&"(H
0,("1 5.%7+Y7
*(7(C.)+
C.)+R(%(&+" QY+3@7."
*(7(C.)+ QY+3@7."
*(7(C.)+
>88<'2/;13 Y/()-* $
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!R
P.E 0,("1 K@%4 .% d2KC^ 5-#+%7 R.)+ ZU\ C.)+R(%(&+" *"#B+" L".&"(H
*(7(C.)+ QY+3@7."
0,("1 5.%7+Y7 C.)+R(%(&+" QY+3@7."
QY+3@7."
C(H+ C.)+
K+4.@"3+ R(%(&+"
C.)+R(%(&+"
>88<'2/;13 Y/()-* !
*"#B+" L".&"(H
0,("1 5.%7+Y7
*(7(C.)+
C.)+R(%(&+" QY+3@7."
*(7(C.)+ QY+3@7."
*(7(C.)+
>88<'2/;13 Y/()-* $
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#!S
P.E 0,("1 K@%4 .% d2KC^ 5-@47+" R.)+ Z8\ C.)+R(%(&+"
*(7(C.)+ QY+3@7."
4 @ D H # 7
K+4.@"3+ R(%(&+"
C.)+R(%(&+" QY+3@7."
*(7(C.)+ QY+3@7."
C(H+ C.)+ C.)+R(%(&+" >88<'2/;13 Y/()-* *(7(C.)+ *"#B+" L".&"(H 0,("1 5.%7+Y7
C.)+R(%(&+"
*(7(C.)+
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$T
P.E 0,("1 K@%4 .% d2KC^ 5-@47+" R.)+ Z:\ C.)+R(%(&+"
*(7(C.)+ QY+3@7."
4 @ D H # 7
K+4.@"3+ R(%(&+"
C.)+R(%(&+" QY+3@7."
*(7(C.)+ QY+3@7."
C(H+ C.)+ C.)+R(%(&+" >88<'2/;13 Y/()-* *(7(C.)+ *"#B+" L".&"(H 0,("1 5.%7+Y7
C.)+R(%(&+"
*(7(C.)+
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$!
PI**QC 0NI*Q */%(H#3 K+4.@"3+ 2--.3($.% (%) *(7( N.3(-#7/
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$$
K@%%#%& ( 0,("1 2,,-#3($.% N.3(--/ !
D(- spark-submit --master )1 (8-2'M= 2<,()-* 18;13 – N.3(- .,$.%4 – local[*] g "@% -.3(--/ E#76 (4 H(%/ 76"+()4 (4 3."+4 Z)+G(@-7\ – local[n] g "@% -.3(--/ E#76 % 76"+()4 – local g "@% -.3(--/ E#76 ( 4#%&-+ 76"+() L/76.%
$ spark-submit --master local[3] \ WordCount.py fileURL
03(-(
$ spark-submit --master local[3] --class \ WordCount MyJarFile .jar fileURL
[(B(
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$"
K@%%#%& ( 0,("1 2,,-#3($.% .% ( 5-@47+" !
D(- spark-submit --master )1 (8-2'M= 2<,()-* 18;13 – 5-@47+" .,$.%4
– yarn-client – yarn-cluster masternode:port Z0,("1 07(%)(-.%+\ – spark:// – mesos://masternode:port ZR+4.4\ L/76.%
$ spark-submit --master yarn-cluster \ WordCount.py fileURL
03(-(
$ spark-submit --master yarn-cluster --class \ WordCount MyJarFile .jar fileURL
[(B(
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$F
07("$%& 76+ 0,("1 06+-- .% ( 5-@47+" !
!
J6- 78/*9 76-<< 2/3 /<(1 +- *,3 13 / 2<,()-* 0=(8/*9 /3. (8/*9#(6-<< +1)6 6/G- / --master 18;13 – yarn Z3-#+%7 H.)+ .%-/\ – 0,("1 ." R+4.4 3-@47+" H(%(&+" ]KN – local[*] g "@% E#76 (4 H(%/ 76"+()4 (4 3."+4 Z)+G(@-7\ – local[ n] g "@% -.3(--/ E#76 " E."1+" 76"+()4 – local g "@% -.3(--/ E#76.@7 )#47"#D@7+) ,".3+44#%&
L/76.%
$ pyspark --master yarn
03(-(
$ spark-shell --master yarn
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$I
e,$.%4 E6+% 0@DH#a%& ( 0,("1 2,,-#3($.% 7. ( 5-@47+" !
71L- 1)6-* spark-submit 18;13( M1* 2<,()-*( --jars g ())#$.%(- [2K _-+4 Z03(-( (%) [(B( .%-/\ --py-files g ())#$.%(- L/76.% _-+4 ZL/76.% .%-/\ --driver-java-options g ,("(H+7+"4 7. ,(44 7. 76+ )"#B+" [bR --executor-memory g H+H."/ ,+" +Y+3@7." Z+A&A 8;;;R` :h\ Z*+G(@-7^ 8h\ --packages ?? R(B+% 3..")#%(7+4 .G (% +Y7+"%(- -#D"("/ 7. #%3-@)+ •
•
•
•
•
!
0<,( (-G-*/< A>BC#(8-2'E2 18;13( •
•
!
--num-executors --queue
7615 /<< /G/'+<- 18;13( •
--help
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$N
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
J6- 78/*9 >88<'2/;13 :-+ D?
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
5.%3-@4#.%
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$P
X6+ 0,("1 2,,-#3($.% !+D ]I
X6+ 0,("1 ]I -+74 /.@ H.%#7." "@%%#%& i.D4` (%) B#+E 47($4$34 (%) 3.%_&@"($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$R
233+44#%& 76+ 0,("1 ]I !
J6- :-+ D? '( *,3 += )6- 78/*9 .*'G-*( – !6+% "@%%#%& -.3(--/^ http://localhost:4040 – !6+% "@%%#%& .% ( 3-@47+"` (33+44 B#( 76+ 3-@47+" ]I` +A&A d2KC ]I
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#$S
b#+E#%& 0,("1 [.D P#47."/ Z8\ !
!
\'-5'34 78/*9 W1+ @'()1*= – 0,("1 ]I #4 .%-/ (B(#-(D-+ E6#-+ 76+ (,,-#3($.% #4 "@%%#%& – ]4+ 0,("1 P#47."/ 0+"B+" 7. B#+E H+7"#34 G." ( 3.H,-+7+) (,,-#3($.% – e,$.%(- 0,("1 3.H,.%+%7 >22-(('34 )6- @'()1*= 7-*G-* – O." -.3(- i.D4` (33+44 D/ ]KN – QA&A localhost:18080 – O." d2KC [.D4` 3-#31 P#47."/ -#%1 #% d2KC ]I
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"T
b#+E#%& 0,("1 [.D P#47."/ Z:\ !
78/*9 @'()1*= 7-*G-*
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"!
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
@1L-51*9] :*')- /3. B,3 / 78/*9 >88<'2/;13
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
5.%3-@4#.%
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"$
J@#-)#%& (%) K@%%#%& 03(-( 2,,-#3($.%4 #% 76+ P.H+E."1 244#&%H+%74 !
U/('2 Y/G-3 8*1^-2)( /*- 8*1G'.-. '3 )6- exercises/spark/
projects $ mvn package $ spark-submit \ --class solution.CountJPGs \ target/countjpgs-1.0.jar \ weblogs.*
L".i+37 *#"+37."/ 07"@37@"+ +countjpgs pom.xml +src main + +scala +solution +target -countjpgs-1.0.jar
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#""
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.% !
!
?3 )6'( 61L-51*9 /(('43L-3) =1, 5'<< – !"#7+ ( 0,("1 (,,-#3($.% 7. 3.@%7 [Lh "+S@+474 #% ( E+D 4+"B+" -.& – IG /.@ 36..4+ 7. @4+ 03(-(` 3.H,#-+ (%) ,(31(&+ 76+ (,,-#3($.% #% ( [2K _-+ – K@% 76+ (,,-#3($.% -.3(--/ 7. 7+47 – 0@DH#7 76+ (,,-#3($.% 7. "@% .% 76+ d2KC 3-@47+" 0<-/(- *-M-* )1 )6- @1L-51*9 .-(2*'8;13
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"F
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
O13E4,*'34 78/*9 0*18-*;-(
!
N.&%&
!
5.%3-@4#.%
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"I
0,("1 2,,-#3($.% 5.%_&@"($.% !
78/*9 8*1G'.-( 3,L-*1,( 8*18-*;-( M1* 213E4,*'34 =1,* /88<'2/;13
!
71L- -K/L8<- 8*18-*;-(
– spark.master – spark.app.name – spark.local.dir g E6+"+ 7. 47."+ -.3(- _-+4 4@36 (4 46@j+ .@7,@7 Z)+G(@-7 /tmp\ – spark.ui.port g ,."7 7. "@% 76+ 0,("1 2,,-#3($.% ]I Z)+G(@-7 4040\ – spark.executor.memory g 6.E H@36 H+H."/ 7. (--.3(7+ 7. +(36 QY+3@7." Z)+G(@-7 512m \ – 2%) H(%/ H."+AAA – 0++ 0,("1 5.%_&@"($.% ,(&+ G." H."+ )+7(#-4
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"N
0,("1 2,,-#3($.% 5.%_&@"($.% !
78/*9 >88<'2/;13( 2/3 +- 213E4,*-. – *+3-("($B+-/ #$ – L".&"(HH($3(--/
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"P
*+3-("($B+ 5.%_&@"($.% e,$.%4 !
!
spark-submit (2*'8) – +A&A` spark-submit --driver-memory 500M 0*18-*;-( E<– X(D? ." 4,(3+?4+,("(7+) -#47 .G ,".,+"$+4 (%) B(-@+4 – N.() E#76 spark-submit --properties-file filename – QY(H,-+^ spark.master spark://masternode:7077 spark.local.dir /tmp spark.ui.port 4444
!
7')- .-M/,<)( 8*18-*;-( E<-
– $SPARK_HOME/conf/spark-defaults.conf – X+H,-(7+ _-+ ,".B#)+)
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"R
0+a%& 5.%_&@"($.% L".,+"$+4 L".&"(HH($3(--/ !
78/*9 213E4,*/;13 (-_34( /*- 8/*) 1M )6- 78/*9O13)-K)
!
O13E4,*- ,('34 / 78/*9O13M 1+^-2)
!
!
71L- -K/L8<- M,32;13( – setAppName( name) ) master – setMaster( property-name, value) – set(
set M,32;13( *-),*3 / 78/*9O13M 1+^-2) )1 (,881*) 26/'3'34
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#"S
0,("15.%G QY(H,-+ ZL/76.%\ import sys from pyspark import SparkContext from pyspark import SparkConf if __name__ == "__main__": if len(sys.argv) < 2: print >> sys.stderr, "Usage: WordCount " exit(-1) sconf = SparkConf() \ .setAppName("Word Count") \ .set("spark.ui.port","4141") sc = SparkContext(conf=sconf) counts = sc.textFile(sys.argv[1]) \ .flatMap(lambda line: line.split()) \ .map(lambda w: (w,1)) \ .reduceByKey(lambda v1,v2: v1+v2) for pair in counts.take(5): print pair
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#FT
0,("15.%G QY(H,-+ Z03(-(\ import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf object WordCount { def main(args: Array[String]) { if (args.length < 1) { System.err.println("Usage: WordCount ") System.exit(1) } val sconf = new SparkConf() .setAppName("Word Count") .set("spark.ui.port","4141") val sc = new SparkContext(sconf) val counts = sc.textFile(args(0)). flatMap(line => line.split("\\W")). map(word => (word,1)). reduceByKey(_ + _) counts.take(5).foreach(println) } } > 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#F!
b#+E#%& 0,("1 L".,+"$+4 !
A1, 2/3 G'-5 )6- 78/*9 8*18-*)= (-_34 '3 )678/*9 >88<'2/;13 D?
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#F$
56(,7+" X.,#34 :*';34 78/*9 >88<'2/;13(
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
[144'34
!
5.%3-@4#.%
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#F"
0,("1 N.&%& !
!
78/*9 ,(-( >8/26- [14F^ M1* <144'34 – 2--.E4 G." 3.%7".--#%& -.&%& (7 "@%$H+ @4#%& ( ,".,+"$+4 _-+ – Q%(D-+ ." )#4(D-+ -.&%&` 4+7 -.&%& -+B+-4` 4+-+37 .@7,@7 )+4$%($.% – O." H."+ #%G. 4++ http://logging.apache.org/log4j/1.2/ [14F^ 8*1G'.-( (-G-*/< <144'34 <-G-<( – O(7(– Q""." – !("% – I%G. – *+D@& – X"(3+ – ek
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#FF
0,("1 N.& O#-+4 Z8\ !
!
[14 E<- <12/;13( .-8-3. 13 =1,* 2<,()-* L/3/4-L-3) 8`1*L A>BC – IG -.& (&&"+&($.% .k` -.&4 ("+ 47."+) -.3(--/ .% +(36 E."1+" %.)+ – IG -.& (&&"+&($.% #4 .%` -.&4 ("+ 47."+) #% P*O0 – *+G(@-7 /var/log/hadoop-yarn – 233+44 B#( yarn logs 3.HH(%) ." d2KC KR ]I
$ yarn application -list Application-Id application_1441395433148_0003 application_1441395433148_0001
Application-Name Application-Type… Spark shell SPARK … myapp.jar MAPREDUCE …
$ yarn logs -applicationId < appid > … > 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#FI
0,("1 N.& O#-+4 Z:\
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#FN
5.%_&@"#%& 0,("1 N.&%& Z8\ !
!
[144'34 <-G-<( 2/3 +- (-) M1* )6- 2<,()-*a M1* '3.'G'.,/< /88<'2/;13(a 1* -G-3 M1* (8-2'E2 21L813-3)( 1* (,+(=()-L( &-M/,<) M1* L/26'3-] bSPARK_HOME/conf/log4j.properties – 07("7 D/ 3.,/#%& log4j.properties.template -.&UiA,".,+"$+4A7+H,-(7+
# Set everything to be logged to the console log4j.rootCategory=INFO, console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err …
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#FP
5.%_&@"#%& 0,("1 N.&%& Z:\ !
!
78/*9 5'<< ,(- )6- E*() log4j.properties E<- ') E3.( '3 )6- W/G/ 2((8/)6 78/*9 76-<< 5'<< *-/. log4j.properties M*1L )6- 2,**-3) .'*-2)1*= – 5.,/ log4j.properties 7. 76+ E."1#%& )#"+37."/ (%) +)#7 l%&'(#$)*"+',*$-./#$& m-.&UiA,".,+"$+4
# Set everything to be logged to the console log4j.rootCategory=DEBUG, console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err …
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#FR
56(,7+" X.,#34 :*';34 78/*9 >88<'2/;13(
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
O132<,('13
!
P.H+E."1^ 5.%_&@"+ ( 0,("1 2,,-#3($.%
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#FS
Q44+%$(- L.#%74 Z8\ !
D(- )6- 78/*9 76-<< /88<'2/;13 M1* '3)-*/2;G- ./)/ -K8<1*/;13
!
:*')- / 78/*9 /88<'2/;13 )1 *,3 '3.-8-3.-3)<=
!
78/*9 /88<'2/;13( *-c,'*- / 78/*9 O13)-K) 1+^-2)
!
78/*9 /88<'2/;13( /*- *,3 ,('34 )6- spark-submit (2*'8)
!
!
78/*9 213E4,*/;13 8/*/L-)-*( 2/3 +- (-) /) *,3;L- ,('34 )6spark-submit (2*'8) 1* 8*14*/LL/;2/<<= ,('34 / SparkConf 1+^-2) 78/*9 ,(-( <14F^ M1* <144'34 – 5.%_&@"+ @4#%& ( log4j.properties _-+
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#IT
Q44+%$(- L.#%74 Z:\ !
!
!
78/*9 '( .-('43-. )1 *,3 13 / 2<,()-* – 0,("1 #%3-@)+4 ( D(4#3 3-@47+" H(%(&+H+%7 ,-(c."H 3(--+) 0,("1 07(%)(-.%+ – 5(% (-4. "@% .% P().., d2KC (%) R+4.4 J6- L/()-* .'()*'+,)-( )/(9( )1 '3.'G'.,/< 51*9-*( '3 )6- 2<,()-* – X(414 "@% #% -0-.1/#$2 g [bR4 "@%%#%& .% E."1+" %.)+4 78/*9 2<,()-*( 51*9 2<1(-<= 5')6 @&d7 – X(414 ("+ (44#&%+) 7. E."1+"4 E6+"+ 76+ )(7( #4 ,6/4#3(--/ 47."+) E6+% ,.44#D-+
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#I!
56(,7+" X.,#34 :*';34 /3. &-8<1='34 / 78/*9 >88<'2/;13
&'()*'+,)-. &/)/ 0*12-(('34 5')6 78/*9
!
0,("1 2,,-#3($.%4 B4A 0,("1 06+--
!
5"+($%& 76+ 0,("15.%7+Y7
!
J@#-)#%& ( 0,("1 2,,-#3($.% Z03(-( (%) [(B(\
!
K@%%#%& ( 0,("1 2,,-#3($.%
!
X6+ 0,("1 2,,-#3($.% !+D ]I
!
P.H+E."1^ !"#7+ (%) K@% ( 0,("1 2,,-#3($.%
!
5.%_&@"#%& 0,("1 L".,+"$+4
!
N.&%&
!
5.%3-@4#.%
!
@1L-51*9] O13E4,*- / 78/*9 >88<'2/;13
> 5.,/"#&67 :;8;?:;8< 5-.@)+"(A 2-- "#&674 "+4+"B+)A C.7 7. D+ "+,".)@3+) ." 46("+) E#76.@7 ,"#." E"#F+% 3.%4+%7 G".H 5-.@)+"(A
!"#I$