{"created":"2020-11-19T15:19:43.948679+00:00","id":6268,"links":{},"metadata":{"_buckets":{"deposit":"ceacec72-8ae3-4135-9385-9a08603cc75f"},"_deposit":{"created_by":45,"id":"6268","owner":"45","owners":[45],"owners_ext":{"displayname":"","email":"dimennyaung@uit.edu.mm","username":""},"pid":{"revision_id":0,"type":"recid","value":"6268"},"status":"published"},"_oai":{"id":"oai:meral.edu.mm:recid/6268","sets":["1582963342780:1605779935331"]},"communities":["uit"],"item_1583103067471":{"attribute_name":"Title","attribute_value_mlt":[{"subitem_1551255647225":"Data Compression Strategy for Reference-Free Sequencing FASTQ Data","subitem_1551255648112":"en"}]},"item_1583103085720":{"attribute_name":"Description","attribute_value_mlt":[{"interim":"Today, Next Generation Sequencing (NGS) technologies play a vital role for many research fields such as medicine, microbiology and agriculture, etc. The huge amount of these genomic sequencing data produced is growing exponentially. These data storages, processing and transmission becomes the most important challenges. Data compression seems to be a suitable solution to overcome these challenges. This paper proposes a lossless data compression strategy to process reference-free raw sequencing data in FASTQ format. The proposed system splits the input file into block files and creates a dynamic dictionary for reads. Afterwards, the transformed read sequences and dictionary are compressed by using appropriate lossless compression method. The performance of the proposed system was compared with existing state-of-art compression algorithms for three sample data sets. The proposed system provides up to 3% compression ratio of other compression algorithms."}]},"item_1583103108160":{"attribute_name":"Keywords","attribute_value_mlt":[{"interim":"Genomic Sequencing data"},{"interim":"lossless compression"},{"interim":"reference-free sequence"},{"interim":"reference-based sequence"}]},"item_1583103120197":{"attribute_name":"Files","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_access","date":[{"dateType":"Available","dateValue":"2020-11-19"}],"displaytype":"preview","filename":"Data Compression Strategy for Reference-Free Sequencing FASTQ Data.pdf","filesize":[{"value":"1.6 Mb"}],"format":"application/pdf","license_note":"© 2017 ICAIT","licensetype":"license_note","url":{"url":"https://meral.edu.mm/record/6268/files/Data Compression Strategy for Reference-Free Sequencing FASTQ Data.pdf"},"version_id":"5a348376-1cc4-4192-b946-b45990a583a0"}]},"item_1583103147082":{"attribute_name":"Conference papers","attribute_value_mlt":[{"subitem_acronym":"ICAIT-2017","subitem_c_date":"1-2 November, 2017","subitem_conference_title":"1st International Conference on Advanced Information Technologies","subitem_place":"Yangon, Myanmar","subitem_session":"Data Science","subitem_website":"https://www.uit.edu.mm/icait-2017/"}]},"item_1583105942107":{"attribute_name":"Authors","attribute_value_mlt":[{"subitem_authors":[{"subitem_authors_fullname":"Hsu Mon Lei Aung"},{"subitem_authors_fullname":"Swe Zin Hlaing"}]}]},"item_1583108359239":{"attribute_name":"Upload type","attribute_value_mlt":[{"interim":"Publication"}]},"item_1583108428133":{"attribute_name":"Publication type","attribute_value_mlt":[{"interim":"Conference paper"}]},"item_1583159729339":{"attribute_name":"Publication date","attribute_value":"2017-11-02"},"item_title":"Data Compression Strategy for Reference-Free Sequencing FASTQ Data","item_type_id":"21","owner":"45","path":["1605779935331"],"publish_date":"2020-11-19","publish_status":"0","recid":"6268","relation_version_is_last":true,"title":["Data Compression Strategy for Reference-Free Sequencing FASTQ Data"],"weko_creator_id":"45","weko_shared_id":-1},"updated":"2021-12-13T05:26:06.834284+00:00"}