Sample dataset which has year and temperature.
200935
200942
200912
201040
201020
201015
Map function in Python:
#!/usr/bin/env python
import re
import sys
for line in sys.stdin:
val = line.strip()
(key,value) = (val[0:4],val[4:6])
print "%s\t%s" % (key,value)
Reduce function in Python:
#!/usr/bin/env python
import sys
(prev_year, max_temp) = (None, -sys.maxint)
for line in sys.stdin:
(year,temp) = line.strip().split("\t")
if not prev_year:
prev_year = year
max_temp = temp
if prev_year != year:
print "%s\t%s" % (prev_year,max_temp)
prev_year = year
max_temp = temp
if prev_year == year:
max_temp = max(int(max_temp),int(temp))
print "%s\t%s" % (prev_year,max_temp)
200935
200942
200912
201040
201020
201015
Map function in Python:
#!/usr/bin/env python
import re
import sys
for line in sys.stdin:
val = line.strip()
(key,value) = (val[0:4],val[4:6])
print "%s\t%s" % (key,value)
Reduce function in Python:
#!/usr/bin/env python
import sys
(prev_year, max_temp) = (None, -sys.maxint)
for line in sys.stdin:
(year,temp) = line.strip().split("\t")
if not prev_year:
prev_year = year
max_temp = temp
if prev_year != year:
print "%s\t%s" % (prev_year,max_temp)
prev_year = year
max_temp = temp
if prev_year == year:
max_temp = max(int(max_temp),int(temp))
print "%s\t%s" % (prev_year,max_temp)
No comments:
Post a Comment