diff --git a/research/nlp/senta/src/config.py b/research/nlp/senta/src/config.py index f8bc44d092506f15de17c61ff496fdb2fd5d005a..743485ad13020f32bd35dc8ed231ece4b6232884 100644 --- a/research/nlp/senta/src/config.py +++ b/research/nlp/senta/src/config.py @@ -72,12 +72,12 @@ sstcfg = { "split_char": " ", "unk_token": "[UNK]", "params": { - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json" + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json" } }, "need_convert": True, - "vocab_path": "../roberta_en.vocab.txt", + "vocab_path": "roberta_en.vocab.txt", "max_seq_len": 512, "truncation_type": 0, "padding_id": 1, @@ -90,7 +90,7 @@ sstcfg = { } ], "config": { - "data_path": "../data/en/finetune/SST-2/train", + "data_path": "data/en/finetune/SST-2/train", "shuffle": True, "batch_size": 1, "epoch": 10, @@ -122,12 +122,12 @@ sstcfg = { "split_char": " ", "unk_token": "[UNK]", "params": { - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json" + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json" } }, "need_convert": True, - "vocab_path": "../roberta_en.vocab.txt", + "vocab_path": "roberta_en.vocab.txt", "max_seq_len": 512, "truncation_type": 0, "padding_id": 1, @@ -153,7 +153,7 @@ sstcfg = { } ], "config": { - "data_path": "../data/en/finetune/SST-2/test", + "data_path": "data/en/finetune/SST-2/test", "shuffle": False, "batch_size": 1, "epoch": 1, @@ -197,12 +197,12 @@ sstcfg = { "split_char": " ", "unk_token": "[UNK]", "params": { - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json" + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json" } }, "need_convert": True, - "vocab_path": "../roberta_en.vocab.txt", + "vocab_path": "roberta_en.vocab.txt", "max_seq_len": 512, "truncation_type": 0, "padding_id": 1, @@ -216,7 +216,7 @@ sstcfg = { } ], "config": { - "data_path": "../data/en/finetune/SST-2/dev", + "data_path": "data/en/finetune/SST-2/dev", "shuffle": False, "batch_size": 1, "epoch": 1, @@ -267,12 +267,12 @@ semcfg = { "split_char": " ", "unk_token": "[UNK]", "params": { - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json" + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json" } }, "need_convert": True, - "vocab_path": "../roberta_en.vocab.txt", + "vocab_path": "roberta_en.vocab.txt", "max_seq_len": 512, "truncation_type": 0, "padding_id": 1, @@ -292,12 +292,12 @@ semcfg = { "split_char": " ", "unk_token": "[UNK]", "params": { - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json" + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json" } }, "need_convert": True, - "vocab_path": "../roberta_en.vocab.txt", + "vocab_path": "roberta_en.vocab.txt", "max_seq_len": 512, "truncation_type": 0, "padding_id": 1, @@ -322,15 +322,15 @@ semcfg = { } ], "config": { - "data_path": "../data/en/finetune/absa_laptops/train", + "data_path": "data/en/finetune/absa_laptops/train", "shuffle": True, "batch_size": 1, "epoch": 10, "sampling_rate": 1.0, "extra_params": { - "vocab_path": "../roberta_en.vocab.txt", - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json", + "vocab_path": "roberta_en.vocab.txt", + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json", "label_map_config": "", "max_seq_len": 512, "do_lower_case": True, @@ -369,12 +369,12 @@ semcfg = { "split_char": " ", "unk_token": "[UNK]", "params": { - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json" + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json" } }, "need_convert": True, - "vocab_path": "../roberta_en.vocab.txt", + "vocab_path": "roberta_en.vocab.txt", "max_seq_len": 512, "truncation_type": 0, "padding_id": 1, @@ -395,12 +395,12 @@ semcfg = { "split_char": " ", "unk_token": "[UNK]", "params": { - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json" + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json" } }, "need_convert": True, - "vocab_path": "../roberta_en.vocab.txt", + "vocab_path": "roberta_en.vocab.txt", "max_seq_len": 512, "truncation_type": 0, "padding_id": 1, @@ -426,15 +426,15 @@ semcfg = { } ], "config": { - "data_path": "../data/en/finetune/absa_laptops/test", + "data_path": "data/en/finetune/absa_laptops/test", "shuffle": False, "batch_size": 1, "epoch": 1, "sampling_rate": 1.0, "extra_params": { - "vocab_path": "../roberta_en.vocab.txt", - "bpe_vocab_file": "../roberta_en.vocab.bpe", - "bpe_json_file": "../roberta_en.encoder.json", + "vocab_path": "roberta_en.vocab.txt", + "bpe_vocab_file": "roberta_en.vocab.bpe", + "bpe_json_file": "roberta_en.encoder.json", "label_map_config": "", "max_seq_len": 512, "do_lower_case": True, diff --git a/research/nlp/senta/src/make_dataset.py b/research/nlp/senta/src/make_dataset.py index b9b7f9821b0787ced3e5f8a7f7b004675df4b3fb..b9c0826cb4afe8829f8f646ed49bc4f041579f5d 100644 --- a/research/nlp/senta/src/make_dataset.py +++ b/research/nlp/senta/src/make_dataset.py @@ -109,13 +109,13 @@ if __name__ == "__main__": train_wrapper = dataset_reader.train_reader.data_generator() make_dataset( wrapper=train_wrapper, - output_path='../data/', + output_path='data/', task_name=args.job, mode="train") dev_wrapper = dataset_reader.dev_reader.data_generator() make_dataset( wrapper=dev_wrapper, - output_path='../data/', + output_path='data/', task_name=args.job, mode="dev") if args.job == "Sem-L": @@ -127,12 +127,12 @@ if __name__ == "__main__": train_wrapper = dataset_reader.train_reader.data_generator() make_dataset( wrapper=train_wrapper, - output_path='../data/', + output_path='data/', task_name=args.job, mode="train") dev_wrapper = dataset_reader.test_reader.data_generator() make_dataset( wrapper=dev_wrapper, - output_path='../data/', + output_path='data/', task_name=args.job, mode="dev") diff --git a/research/nlp/senta/src/utils/params.py b/research/nlp/senta/src/utils/params.py index 2cea4a45f852844f5e483e739f1b1af8cf4d7faa..1d087e794b5e7b38be1076dfdbabaced639415e4 100644 --- a/research/nlp/senta/src/utils/params.py +++ b/research/nlp/senta/src/utils/params.py @@ -25,7 +25,7 @@ def replace_none(params): try: value = chr(int(value, base=16)) print("ord(value): ", ord(value)) - except IOError: + except ValueError: pass params[key] = value return params