日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

MapReduce----电信数据清洗

發布時間:2023/12/18 编程问答 23 豆豆
生活随笔 收集整理的這篇文章主要介紹了 MapReduce----电信数据清洗 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

MapReduce---電信數據清洗

  • 數據解析及題目分析
    • 數據解析
    • 題目及分析
  • 代碼實現
    • 自定義類
    • Map階段
    • Reduce階段
    • Driver階段

數據解析及題目分析

數據解析


數據一

18620192711,15733218050,1506628174,1506628265,650000,810000 18641241020,15733218050,1509757276,1509757464,330000,620000 15778423030,15614201525,1495290451,1495290923,370000,420000 13341109505,15151889601,1492661762,1492662200,330000,460000 13341109505,13666666666,1470111026,1470111396,360000,230000 15032293356,13799999999,1495937181,1495937360,500000,630000 15733218050,13341109505,1452601976,1452602401,620000,530000 13269361119,13269361119,1487640690,1487641023,450000,430000 13799999999,15338595369,1511928814,1511929111,540000,230000 15733218050,15778423030,1542457633,1542457678,450000,530000 13341109505,17731088562,1484364844,1484365342,460000,360000 18332562075,15778423030,1522426275,1522426473,140000,120000 13560190665,18301589432,1485648596,1485648859,620000,820000 15733218050,13520404983,1538992531,1538992605,130000,150000 15778423030,13566666666,1484008721,1484009210,810000,330000 13566666666,17731088562,1541812913,1541813214,220000,360000 15778423030,15733218050,1464198621,1464198803,630000,340000 15151889601,13341109505,1467441052,1467441538,640000,440000 18620192711,13666666666,1510997876,1510998253,450000,610000 13341109505,18641241020,1509074946,1509075201,710000,310000 17731088562,13341109505,1471571270,1471571706,430000,630000 13520404983,13560190665,1476626194,1476626683,500000,440000 15338595369,13341109505,1523996031,1523996059,420000,460000 15151889601,13341109505,1489658199,1489658394,330000,500000 13560190665,15338595369,1510890681,1510891129,410000,520000 15733218050,13566666666,1503498540,1503498726,420000,310000 17731088562,13560190665,1470571255,1470571708,540000,330000 15338595369,15614201525,1496767879,1496768364,520000,500000 17731088562,15778423030,1494602567,1494602784,500000,420000 15778423030,18641241020,1517445007,1517445358,450000,530000 13566666666,17731088562,1464697765,1464697894,360000,620000 15778423030,13799999999,1525543218,1525543493,500000,820000 13341109505,13520404983,1521861238,1521861421,500000,130000 13566666666,13560190665,1513918160,1513918538,340000,210000 15032293356,18620192711,1485688388,1485688537,540000,530000 13799999999,13341109505,1531196363,1531196438,230000,320000 15338595369,15151889601,1512125514,1512125978,540000,810000 18332562075,13560190665,1523311951,1523312239,650000,410000 15778423030,15032293356,1467953782,1467954054,810000,540000 15151889601,15733218050,1530848147,1530848231,310000,150000 13269361119,18301589432,1541271874,1541272273,310000,310000 15032293356,15338595369,1520833915,1520834201,450000,360000 15778423030,13269361119,1452817391,1452817596,820000,410000 13520404983,18332562075,1474563316,1474563593,710000,540000 18301589432,15778423030,1473596284,1473596528,620000,310000 15732648446,15151889601,1535584645,1535585117,530000,310000 18301589432,13269361119,1511910316,1511910341,340000,320000 13560190665,18641241020,1533379659,1533379717,120000,710000 15338595369,18332562075,1474152847,1474153092,330000,500000 13520404983,17731088562,1504907456,1504907617,820000,510000 15732648446,18301589432,1521692836,1521692977,220000,370000 15032293356,15614201525,1471445293,1471445756,360000,530000 18641241020,15778423030,1517192728,1517193050,210000,610000 17731088562,15733218050,1493420249,1493420555,370000,820000 18620192711,13799999999,1477952709,1477953088,310000,140000 13666666666,13799999999,1541066076,1541066541,230000,640000 13269361119,17731088562,1540060141,1540060511,150000,540000 18332562075,13799999999,1489772390,1489772817,540000,710000 13799999999,15732648446,1503882021,1503882332,530000,520000 13566666666,15614201525,1504983084,1504983241,820000,140000 18641241020,15032293356,1463447030,1463447080,330000,640000 18301589432,13566666666,1493646451,1493646796,310000,510000 15732648446,15032293356,1537185125,1537185619,430000,810000 15338595369,13341109505,1493411872,1493411891,370000,150000 15778423030,17731088562,1540631847,1540632271,320000,500000 13666666666,15614201525,1545200734,1545200959,360000,640000 15032293356,13799999999,1455000970,1455001084,460000,650000 18641241020,18620192711,1529968498,1529968626,410000,510000 17731088562,15732648446,1455361378,1455361505,440000,650000 18301589432,13666666666,1518564232,1518564421,210000,640000 15733218050,18620192711,1515672794,1515673149,360000,360000 13520404983,18620192711,1521620546,1521620913,820000,370000 18332562075,18641241020,1498131159,1498131300,820000,230000 13666666666,18301589432,1491354142,1491354544,220000,710000 18301589432,15614201525,1511731560,1511732015,810000,620000 13269361119,13666666666,1539065031,1539065096,810000,810000 15778423030,18641241020,1518364528,1518364995,130000,610000 15733218050,15032293356,1491974898,1491975316,340000,810000 13269361119,15733218050,1543514850,1543514946,410000,460000 13341109505,13666666666,1482223100,1482223577,220000,410000 15338595369,13341109505,1495958992,1495959292,330000,420000 13341109505,18641241020,1511010003,1511010292,540000,620000 18620192711,13269361119,1462453298,1462453559,320000,360000 13666666666,13799999999,1518047527,1518047967,640000,420000 13341109505,13666666666,1474872886,1474872907,360000,510000 13666666666,18641241020,1473575493,1473575663,150000,520000 15151889601,15732648446,1509418483,1509418891,510000,540000 13560190665,13520404983,1467696946,1467697103,150000,460000 13520404983,15614201525,1510958686,1510959064,320000,610000 15778423030,15614201525,1470012457,1470012660,210000,210000 15778423030,17731088562,1542680029,1542680382,630000,520000 18332562075,15338595369,1453896030,1453896522,640000,370000 15032293356,18620192711,1488286898,1488287248,530000,150000 18641241020,15733218050,1489804133,1489804185,150000,630000 15733218050,13666666666,1506782751,1506782854,220000,500000 13520404983,17731088562,1487421622,1487421784,230000,330000 15151889601,13269361119,1538113862,1538113902,370000,630000 15778423030,17731088562,1466691118,1466691412,540000,530000 15032293356,13520404983,1521151509,1521151701,520000,430000 15614201525,13666666666,1464083166,1464083352,330000,650000

字段解析:呼叫者手機號,接受者手機號,開始時間戳,接受時間戳,呼叫者地址省份編碼,接受者地址省份編碼


數據二

1,110000,北京市 2,120000,天津市 3,130000,河北省 4,140000,山西省 5,150000,內蒙古自治區 6,210000,遼寧省 7,220000,吉林省 8,230000,黑龍江省 9,310000,上海市 10,320000,江蘇省 11,330000,浙江省 12,340000,安徽省 13,350000,福建省 14,360000,江西省 15,370000,山東省 16,410000,河南省 17,420000,湖北省 18,430000,湖南省 19,440000,廣東省 20,450000,廣西壯族自治區 21,460000,海南省 22,500000,重慶市 23,510000,四川省 24,520000,貴州省 25,530000,云南省 26,540000,西藏自治區 27,610000,陜西省 28,620000,甘肅省 29,630000,青海省 30,640000,寧夏回族自治區 31,650000,新疆維吾爾自治區 32,710000,臺灣省 33,810000,香港特別行政區 34,820000,澳門特別行政區

字段解析:地址id,省份編碼,省份名稱


數據三

7,18000696806,趙賀彪 8,15151889601,張倩 9,13269361119,王世昌 10,15032293356,張濤 11,17731088562,張陽 12,15338595369,李進全 13,15733218050,杜澤文 14,15614201525,任宗陽 15,15778423030,梁鵬 16,18641241020,郭美彤 17,15732648446,劉飛飛 18,13341109505,段光星 19,13560190665,唐會華 20,18301589432,楊力謀 21,13520404983,溫海英 22,18332562075,朱尚寬 23,18620192711,劉能宗 24,13566666666,劉柳 25,13666666666,鄧二 26,13799999999,菜中路

字段解析:電話ID,電話號碼,姓名


題目及分析


  • 將電話號碼替換成人名
  • 將撥打、接聽電話的時間戳轉換成日期
  • 求出電話的通話時間,以秒做單位
  • 將省份編碼替換成省份名稱
  • 最后數據的樣例:
  • 鄧二,張倩,13666666666,15151889601,2018-03-29 10:58:12,2018-03-29 10:58:42,30,黑龍江省,上海市
    • 需求一和需求四可以將數據二和數據三緩存到內存里,然后進行替換操作
    • 需求二簡單的時間類型轉換
    • 需求三日期類型的加減

    代碼實現

    自定義類

    import org.apache.hadoop.io.WritableComparable;import java.io.DataInput; import java.io.DataOutput; import java.io.IOException;public class Data implements WritableComparable<Data> {private String name_A;private String name_B;private String phoneNum_A;private String phoneNum_B;private String startTime;private String endTime;private String phoneLong;private String location_A;private String location_B;@Overridepublic int compareTo(Data o) {return 0;}@Overridepublic void write(DataOutput dataOutput) throws IOException {dataOutput.writeUTF(name_A);dataOutput.writeUTF(name_B);dataOutput.writeUTF(phoneNum_A);dataOutput.writeUTF(phoneNum_B);dataOutput.writeUTF(startTime);dataOutput.writeUTF(endTime);dataOutput.writeUTF(phoneLong);dataOutput.writeUTF(location_A);dataOutput.writeUTF(location_B);}@Overridepublic void readFields(DataInput dataInput) throws IOException {name_A = dataInput.readUTF();name_B = dataInput.readUTF();phoneNum_A = dataInput.readUTF();phoneNum_B = dataInput.readUTF();startTime = dataInput.readUTF();endTime = dataInput.readUTF();phoneLong = dataInput.readUTF();location_A = dataInput.readUTF();location_B = dataInput.readUTF();}public void set(String name_A, String name_B, String phoneNum_A, String phoneNum_B, String startTime, String endTime, String phoneLong, String location_A, String location_B) {this.name_A = name_A;this.name_B = name_B;this.phoneNum_A = phoneNum_A;this.phoneNum_B = phoneNum_B;this.startTime = startTime;this.endTime = endTime;this.phoneLong = phoneLong;this.location_A = location_A;this.location_B = location_B;}@Overridepublic String toString() {return name_A + "," +name_B + "," +phoneNum_A + "," +phoneNum_B + "," +startTime + "," +endTime + "," +phoneLong + "," +location_A + "," +location_B;}public String getName_A() {return name_A;}public void setName_A(String name_A) {this.name_A = name_A;}public String getName_B() {return name_B;}public void setName_B(String name_B) {this.name_B = name_B;}public String getPhoneNum_A() {return phoneNum_A;}public void setPhoneNum_A(String phoneNum_A) {this.phoneNum_A = phoneNum_A;}public String getPhoneNum_B() {return phoneNum_B;}public void setPhoneNum_B(String phoneNum_B) {this.phoneNum_B = phoneNum_B;}public String getStartTime() {return startTime;}public void setStartTime(String startTime) {this.startTime = startTime;}public String getEndTime() {return endTime;}public void setEndTime(String endTime) {this.endTime = endTime;}public String getPhoneLong() {return phoneLong;}public void setPhoneLong(String phoneLong) {this.phoneLong = phoneLong;}public String getLocation_A() {return location_A;}public void setLocation_A(String location_A) {this.location_A = location_A;}public String getLocation_B() {return location_B;}public void setLocation_B(String location_B) {this.location_B = location_B;} }

    Map階段

    import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper;import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.net.URI; import java.text.SimpleDateFormat; import java.util.HashMap; import java.util.Map;public class MapTest extends Mapper<LongWritable, Text, Data, NullWritable> {private Data k = new Data();private Map<String, String> userName = new HashMap<String, String>();private Map<String, String> location = new HashMap<String, String>();//用戶姓名private String name_A;private String name_B;//用戶地址private String loc_A;private String loc_B;//通話時間的轉換private String startTime;private String endTime;//通話時間private String time;@Overrideprotected void setup(Context context) throws IOException, InterruptedException {URI[] uris = context.getCacheFiles();File user = new File(uris[0]);String line;//緩存用戶姓名信息BufferedReader br;br = new BufferedReader(new FileReader(user));while ((line = br.readLine()) != null) {userName.put(line.split(",")[1], line.split(",")[2]);}//緩存地址信息File loc = new File(uris[1]);br = new BufferedReader(new FileReader(loc));while ((line = br.readLine()) != null) {location.put(line.split(",")[1], line.split(",")[2]);}}@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String[] data = value.toString().split(",");//將用戶號碼轉換成姓名name_A = userName.get(data[0]);name_B = userName.get(data[1]);//將時間戳轉換成日期類型SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");startTime = sdf.format(Long.parseLong(data[2]));endTime = sdf.format(Long.parseLong(data[3]));//計算通話時間time = Long.parseLong(data[3]) - Long.parseLong(data[2]) + "秒";//替換地址位置loc_A = location.get(data[4]);loc_B = location.get(data[5]);//寫出數據k.set(name_A, name_B, data[0], data[1], startTime, endTime, time, loc_A, loc_B);context.write(k, NullWritable.get());} }

    Reduce階段

    import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class RedTest extends Reducer<Data, NullWritable,Data,NullWritable> {@Overrideprotected void reduce(Data key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {//直接寫出數據即可for (NullWritable v:values){context.write(key,NullWritable.get());}} }

    Driver階段

    import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.File; import java.net.URI;public class DriTest {public static void main(String[] args) throws Exception {File file = new File("D:\\MP\\電信\\output");if (file.exists()) {delFile(file);driver();} else {driver();}}public static void delFile(File file) {File[] files = file.listFiles();if (files != null && files.length != 0) {for (int i = 0; i < files.length; i++) {delFile(files[i]);}}file.delete();}public static void driver() throws Exception {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setMapperClass(MapTest.class);job.setJarByClass(DriTest.class);job.setReducerClass(RedTest.class);job.setMapOutputKeyClass(Data.class);job.setMapOutputValueClass(NullWritable.class);job.setOutputKeyClass(Data.class);job.setOutputValueClass(NullWritable.class);URI [] uris = new URI[2];uris[0] = new URI("file:///D:/MP/電信/input/userPhone.txt");uris[1] = new URI("file:///D:/MP/電信/input/location.txt");job.setCacheFiles(uris);FileInputFormat.setInputPaths(job, "D:\\MP\\電信\\input\\data.txt");FileOutputFormat.setOutputPath(job, new Path("D:\\MP\\電信\\output"));boolean b = job.waitForCompletion(true);System.exit(b ? 0 : 1);} }

    總結

    以上是生活随笔為你收集整理的MapReduce----电信数据清洗的全部內容,希望文章能夠幫你解決所遇到的問題。

    如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。