{"created":"2020-12-09T14:35:18.589383+00:00","id":6703,"links":{},"metadata":{"_buckets":{"deposit":"85c0c967-264e-41b3-9a7e-35ca2016581d"},"_deposit":{"created_by":45,"id":"6703","owner":"45","owners":[45],"owners_ext":{"displayname":"","email":"dimennyaung@uit.edu.mm","username":""},"pid":{"revision_id":0,"type":"depid","value":"6703"},"status":"published"},"_oai":{"id":"oai:meral.edu.mm:recid/00006703","sets":["1582963342780:1596102355557"]},"communities":["uit"],"item_1583103067471":{"attribute_name":"Title","attribute_value_mlt":[{"subitem_1551255647225":"Capacity Optimized Deduplication for Big Unstructured Data in Scale-out Distributed Storage System","subitem_1551255648112":"en"}]},"item_1583103085720":{"attribute_name":"Description","attribute_value_mlt":[{"interim":"Organizations in every market segment require their storage utilization to optimize and cost-effectively align with the changing storage capacity needs of their business. Scale-out distributed storage is becoming an important environment for the storage and exchange of information as the data intensive workloads are rapidly growing in the last decades. Data deduplication can provide efficient storage-space solution for the storage industries. However, during the data deduplication process, it faces the barrier of network and the burden of disk access. In this paper, an efficient deduplication scheme for scale-out distributed storage system is proposed to address the above issues. Since duplicate data detection is important in deduplication, Bloom filter array (BFA) is applied to reduce the I/O intensive workloads and accelerate the detection process without the hops between the nodes. The performance of this system is evaluated in terms of deduplication efficiency and throughput. The proposed deduplication scheme not only saves storage space but also shortens time for further processes."}]},"item_1583103108160":{"attribute_name":"Keywords","attribute_value_mlt":[{"interim":"Scale-out Distributed Storage"},{"interim":"Bloom Filter"},{"interim":"Data Deduplication"},{"interim":"Capacity Optimization"},{"interim":"Unstructured Data"}]},"item_1583103120197":{"attribute_name":"Files","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_access","date":[{"dateType":"Available","dateValue":"2020-12-09"}],"displaytype":"preview","filename":"Capacity Optimized Deduplication for Big Unstructured Data in Scale-out Distributed Storage System.pdf","filesize":[{"value":"774 Kb"}],"format":"application/pdf","url":{"url":"https://meral.edu.mm/api/files/85c0c967-264e-41b3-9a7e-35ca2016581d/Capacity%20Optimized%20Deduplication%20for%20Big%20Unstructured%20Data%20in%20Scale-out%20Distributed%20Storage%20System.pdf"},"version_id":"131a1900-cbc3-4a62-aae3-7a7d7e9ff98e"}]},"item_1583103131163":{"attribute_name":"Journal articles","attribute_value_mlt":[{"subitem_journal_title":"IERI"}]},"item_1583105942107":{"attribute_name":"Authors","attribute_value_mlt":[{"subitem_authors":[{"subitem_authors_fullname":"Myat Pwint Phyu"},{"subitem_authors_fullname":"Thandar Thein"}]}]},"item_1583108359239":{"attribute_name":"Upload type","attribute_value_mlt":[{"interim":"Publication"}]},"item_1583108428133":{"attribute_name":"Publication type","attribute_value_mlt":[{"interim":"Journal article"}]},"item_1583159729339":{"attribute_name":"Publication date","attribute_value":"2013-12-02"},"item_title":"Capacity Optimized Deduplication for Big Unstructured Data in Scale-out Distributed Storage System","item_type_id":"21","owner":"45","path":["1596102355557"],"publish_date":"2020-12-09","publish_status":"0","recid":"6703","relation_version_is_last":true,"title":["Capacity Optimized Deduplication for Big Unstructured Data in Scale-out Distributed Storage System"],"weko_creator_id":"45","weko_shared_id":-1},"updated":"2021-12-13T00:43:12.196730+00:00"}