!13 [sync] PR-9: Sentencepiece version updata: Upgraded from 0.1.92 to 0.1.98
From: @openeuler-sync-bot Reviewed-by: @zhunaipan Signed-off-by: @zhunaipan
This commit is contained in:
commit
d4e5feb396
@ -1,30 +0,0 @@
|
|||||||
From 624091a90e816f555106a1b1f994a45cb4989051 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Malcolm Smith <smith@chaquo.com>
|
|
||||||
Date: Tue, 12 Jan 2021 13:43:28 +0000
|
|
||||||
Subject: [PATCH 5/7] Add missing #include for BYTE_ORDER
|
|
||||||
|
|
||||||
---
|
|
||||||
src/util.h | 7 +++++++
|
|
||||||
1 file changed, 7 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/src/util.h b/src/util.h
|
|
||||||
index bf8a758..1680f4b 100644
|
|
||||||
--- a/src/util.h
|
|
||||||
+++ b/src/util.h
|
|
||||||
@@ -36,6 +36,13 @@
|
|
||||||
#include <pthread.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#if !defined(__APPLE__) && !defined(_WIN32)
|
|
||||||
+#include <endian.h>
|
|
||||||
+#if defined(BYTE_ORDER) && defined(__BIG_ENDIAN) && BYTE_ORDER == __BIG_ENDIAN
|
|
||||||
+#define IS_BIG_ENDIAN
|
|
||||||
+#endif
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
namespace sentencepiece {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
--
|
|
||||||
2.18.0.huawei.25
|
|
||||||
|
|
||||||
@ -1,24 +0,0 @@
|
|||||||
From 427d695ab4343568cc46411fbe83ef5ccc619752 Mon Sep 17 00:00:00 2001
|
|
||||||
From: mingruimingrui <mingruimingrui@hotmail.com>
|
|
||||||
Date: Sat, 27 Jun 2020 02:56:03 +0800
|
|
||||||
Subject: [PATCH 1/7] Added split_digits to SentencePieceTrainer
|
|
||||||
|
|
||||||
---
|
|
||||||
src/spec_parser.h | 1 +
|
|
||||||
1 file changed, 1 insertion(+)
|
|
||||||
|
|
||||||
diff --git a/src/spec_parser.h b/src/spec_parser.h
|
|
||||||
index 729e036..6dd054b 100644
|
|
||||||
--- a/src/spec_parser.h
|
|
||||||
+++ b/src/spec_parser.h
|
|
||||||
@@ -207,6 +207,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name,
|
|
||||||
PARSE_BOOL(split_by_unicode_script);
|
|
||||||
PARSE_BOOL(split_by_number);
|
|
||||||
PARSE_BOOL(split_by_whitespace);
|
|
||||||
+ PARSE_BOOL(split_digits);
|
|
||||||
PARSE_BOOL(treat_whitespace_as_suffix);
|
|
||||||
PARSE_REPEATED_STRING(control_symbols);
|
|
||||||
PARSE_REPEATED_STRING(user_defined_symbols);
|
|
||||||
--
|
|
||||||
2.18.0.huawei.25
|
|
||||||
|
|
||||||
@ -1,70 +0,0 @@
|
|||||||
From 5c09745aafa151be7ed5d9a9101f3e8c79a8758b Mon Sep 17 00:00:00 2001
|
|
||||||
From: stephantul <stephantul@gmail.com>
|
|
||||||
Date: Thu, 1 Oct 2020 12:49:13 +0200
|
|
||||||
Subject: [PATCH 3/7] Create options.md
|
|
||||||
|
|
||||||
---
|
|
||||||
doc/options.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
1 file changed, 51 insertions(+)
|
|
||||||
create mode 100644 doc/options.md
|
|
||||||
|
|
||||||
diff --git a/doc/options.md b/doc/options.md
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..7861fdc
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/doc/options.md
|
|
||||||
@@ -0,0 +1,51 @@
|
|
||||||
+# Training options
|
|
||||||
+
|
|
||||||
+The training options for the `spm_train` can be listed using `spm_train --help`. Since the standard `pip install` of sentencepiece does not necessarily install `spm_train`, the options are also listed here.
|
|
||||||
+
|
|
||||||
+```
|
|
||||||
+--help (show help) type: bool default: false
|
|
||||||
+--version (show version) type: bool default: false
|
|
||||||
+--minloglevel (Messages logged at a lower level than this don't actually get logged anywhere) type: int default: 0
|
|
||||||
+--input (comma separated list of input sentences) type: std::string default: ""
|
|
||||||
+--input_format (Input format. Supported format is `text` or `tsv`.) type: std::string default: ""
|
|
||||||
+--model_prefix (output model prefix) type: std::string default: "" --model_type (model algorithm: unigram, bpe, word or char) type: std::string default: "unigram"
|
|
||||||
+--vocab_size (vocabulary size) type: int32 default: 8000
|
|
||||||
+--accept_language (comma-separated list of languages this model can accept) type: std::string default: ""
|
|
||||||
+--self_test_sample_size (the size of self test samples) type: int32 default: 0
|
|
||||||
+--character_coverage (character coverage to determine the minimum symbols) type: double default: 0.9995
|
|
||||||
+--input_sentence_size (maximum size of sentences the trainer loads) type: int32 default: 0
|
|
||||||
+--shuffle_input_sentence (Randomly sample input sentences in advance. Valid when --input_sentence_size > 0) type: bool default: true
|
|
||||||
+--seed_sentencepiece_size (the size of seed sentencepieces) type: int32 default: 1000000
|
|
||||||
+--shrinking_factor (Keeps top shrinking_factor pieces with respect to the loss) type: double default: 0.75
|
|
||||||
+--num_threads (number of threads for training) type: int32 default: 16
|
|
||||||
+--num_sub_iterations (number of EM sub-iterations) type: int32 default: 2
|
|
||||||
+--max_sentencepiece_length (maximum length of sentence piece) type: int32 default: 16
|
|
||||||
+--max_sentence_length (maximum length of sentence in byte) type: int32 default: 4192
|
|
||||||
+--split_by_unicode_script (use Unicode script to split sentence pieces) type: bool default: true
|
|
||||||
+--split_by_number (split tokens by numbers (0-9)) type: bool default: true
|
|
||||||
+--split_by_whitespace (use a white space to split sentence pieces) type: bool default: true
|
|
||||||
+--split_digits (split all digits (0-9) into separate pieces) type: bool default: false
|
|
||||||
+--treat_whitespace_as_suffix (treat whitespace marker as suffix instead of prefix.) type: bool default: false
|
|
||||||
+--control_symbols (comma separated list of control symbols) type: std::string default: ""
|
|
||||||
+--user_defined_symbols (comma separated list of user defined symbols) type: std::string default: ""
|
|
||||||
+--required_chars (UTF8 characters in this flag are always used in the character set regardless of --character_coverage) type: std::string default: ""
|
|
||||||
+--byte_fallback (decompose unknown pieces into UTF-8 byte pieces) type: bool default: false
|
|
||||||
+--vocabulary_output_piece_score (Define score in vocab file) type: bool default: true
|
|
||||||
+--normalization_rule_name (Normalization rule name. Choose from nfkc or identity) type: std::string default: "nmt_nfkc"
|
|
||||||
+--normalization_rule_tsv (Normalization rule TSV file. ) type: std::string default: ""
|
|
||||||
+--denormalization_rule_tsv (Denormalization rule TSV file.) type: std::string default: ""
|
|
||||||
+--add_dummy_prefix (Add dummy whitespace at the beginning of text) type: bool default: true
|
|
||||||
+--remove_extra_whitespaces (Removes leading, trailing, and duplicate internal whitespace) type: bool default: true
|
|
||||||
+--hard_vocab_limit (If set to false, --vocab_size is considered as a soft limit.) type: bool default: true
|
|
||||||
+--use_all_vocab (If set to true, use all tokens as vocab. Valid for word/char models.) type: bool default: false
|
|
||||||
+--unk_id (Override UNK (<unk>) id.) type: int32 default: 0
|
|
||||||
+--bos_id (Override BOS (<s>) id. Set -1 to disable BOS.) type: int32 default: 1
|
|
||||||
+--eos_id (Override EOS (</s>) id. Set -1 to disable EOS.) type: int32 default: 2
|
|
||||||
+--pad_id (Override PAD (<pad>) id. Set -1 to disable PAD.) type: int32 default: -1
|
|
||||||
+--unk_piece (Override UNK (<unk>) piece.) type: std::string default: "<unk>"
|
|
||||||
+--bos_piece (Override BOS (<s>) piece.) type: std::string default: "<s>"
|
|
||||||
+--eos_piece (Override EOS (</s>) piece.) type: std::string default: "</s>"
|
|
||||||
+--pad_piece (Override PAD (<pad>) piece.) type: std::string default: "<pad>"
|
|
||||||
+--unk_surface (Dummy surface string for <unk>. In decoding <unk> is decoded to `unk_surface`.) type: std::string default: " ⁇ "
|
|
||||||
+--train_extremely_large_corpus (Increase bit depth for unigram tokenization.) type: bool default: false
|
|
||||||
+```
|
|
||||||
--
|
|
||||||
2.18.0.huawei.25
|
|
||||||
|
|
||||||
@ -1,30 +0,0 @@
|
|||||||
From 2ea571b8e509809bbe28e6cc3f1488b3cfde1ef9 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Kentaro Hayashi <hayashi@clear-code.com>
|
|
||||||
Date: Sat, 17 Oct 2020 16:54:20 +0900
|
|
||||||
Subject: [PATCH 4/7] Fix FTBFS on armel, mips, powerpc, m68k and sh4
|
|
||||||
|
|
||||||
---
|
|
||||||
src/CMakeLists.txt | 7 +++++++
|
|
||||||
1 file changed, 7 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
|
|
||||||
index 511b2ec..87765e5 100644
|
|
||||||
--- a/src/CMakeLists.txt
|
|
||||||
+++ b/src/CMakeLists.txt
|
|
||||||
@@ -197,6 +197,13 @@ target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static
|
|
||||||
if (SPM_ENABLE_SHARED)
|
|
||||||
target_link_libraries(sentencepiece ${SPM_LIBS})
|
|
||||||
target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece)
|
|
||||||
+ if ((${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l") OR
|
|
||||||
+ (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips") OR
|
|
||||||
+ (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "m68k") OR
|
|
||||||
+ (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc") OR
|
|
||||||
+ (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "sh4"))
|
|
||||||
+ list(APPEND SPM_LIBS "atomic")
|
|
||||||
+ endif()
|
|
||||||
set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static)
|
|
||||||
set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0)
|
|
||||||
set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
|
|
||||||
--
|
|
||||||
2.18.0.huawei.25
|
|
||||||
|
|
||||||
@ -1,54 +0,0 @@
|
|||||||
From 82b8b6f61403fcfcef673ee49ed2dfe475ba4cf2 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sarubi <stsarut@gmail.com>
|
|
||||||
Date: Tue, 23 Feb 2021 20:47:25 +0530
|
|
||||||
Subject: [PATCH] Removed codes where Zero Width Joiner replaced with
|
|
||||||
whitespace.
|
|
||||||
|
|
||||||
---
|
|
||||||
data/nmt_nfkc.tsv | 3 +--
|
|
||||||
data/nmt_nfkc_cf.tsv | 3 +--
|
|
||||||
src/builder.cc | 1 -
|
|
||||||
3 files changed, 2 insertions(+), 5 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/data/nmt_nfkc.tsv b/data/nmt_nfkc.tsv
|
|
||||||
index 1ce2b71..5c8b48b 100644
|
|
||||||
--- a/data/nmt_nfkc.tsv
|
|
||||||
+++ b/data/nmt_nfkc.tsv
|
|
||||||
@@ -57263,8 +57263,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ
|
|
||||||
200A 20 # =>
|
|
||||||
200B 20 # =>
|
|
||||||
200C 20 # =>
|
|
||||||
-200D 20 # =>
|
|
||||||
-200E 20 # =>
|
|
||||||
+200E 20 # =>
|
|
||||||
200F 20 # =>
|
|
||||||
2011 2010 # ‑ => ‐
|
|
||||||
2017 20 333 # ‗ => ̳
|
|
||||||
diff --git a/data/nmt_nfkc_cf.tsv b/data/nmt_nfkc_cf.tsv
|
|
||||||
index 2178882..0d0e708 100644
|
|
||||||
--- a/data/nmt_nfkc_cf.tsv
|
|
||||||
+++ b/data/nmt_nfkc_cf.tsv
|
|
||||||
@@ -57980,8 +57980,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ
|
|
||||||
200A 20 # =>
|
|
||||||
200B 20 # =>
|
|
||||||
200C 20 # =>
|
|
||||||
-200D 20 # =>
|
|
||||||
-200E 20 # =>
|
|
||||||
+200E 20 # =>
|
|
||||||
200F 20 # =>
|
|
||||||
2011 2010 # ‑ => ‐
|
|
||||||
2017 20 333 # ‗ => ̳
|
|
||||||
diff --git a/src/builder.cc b/src/builder.cc
|
|
||||||
index d9442d3..9f47aac 100644
|
|
||||||
--- a/src/builder.cc
|
|
||||||
+++ b/src/builder.cc
|
|
||||||
@@ -366,7 +366,6 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
|
|
||||||
nfkc_map[{0xFEFF}] = {0x20}; // ZERO WIDTH NO-BREAK
|
|
||||||
nfkc_map[{0xFFFD}] = {0x20}; // REPLACEMENT CHARACTER
|
|
||||||
nfkc_map[{0x200C}] = {0x20}; // ZERO WIDTH NON-JOINER
|
|
||||||
- nfkc_map[{0x200D}] = {0x20}; // ZERO WIDTH JOINER
|
|
||||||
|
|
||||||
// Ascii Control characters
|
|
||||||
nfkc_map[{0x0001}] = {};
|
|
||||||
--
|
|
||||||
|
|
||||||
@ -1,25 +0,0 @@
|
|||||||
From 21aa7a9d6a3bd6a98c480bea02e0e81b21f411af Mon Sep 17 00:00:00 2001
|
|
||||||
From: joe <219651+AdolfVonKleist@users.noreply.github.com>
|
|
||||||
Date: Mon, 22 Mar 2021 17:26:20 +0000
|
|
||||||
Subject: [PATCH 7/7] Restore the sentence boundary marker insertion for the
|
|
||||||
unigram trainer. Dramatically speeds up training time.
|
|
||||||
|
|
||||||
---
|
|
||||||
src/unigram_model_trainer.cc | 1 +
|
|
||||||
1 file changed, 1 insertion(+)
|
|
||||||
|
|
||||||
diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc
|
|
||||||
index 5f26771..94c7adb 100644
|
|
||||||
--- a/src/unigram_model_trainer.cc
|
|
||||||
+++ b/src/unigram_model_trainer.cc
|
|
||||||
@@ -119,6 +119,7 @@ TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const {
|
|
||||||
all_chars[string_util::UnicodeCharToUTF8(c)] += w.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+ array.push_back(kSentenceBoundary); // sentence boundary marker.
|
|
||||||
}
|
|
||||||
|
|
||||||
const node_int_type n = array.size();
|
|
||||||
--
|
|
||||||
2.18.0.huawei.25
|
|
||||||
|
|
||||||
@ -1,22 +0,0 @@
|
|||||||
diff --git a/third_party/esaxx/sais.hxx b/third_party/esaxx/sais.hxx
|
|
||||||
index f1702f8..b9071c8 100644
|
|
||||||
--- a/third_party/esaxx/sais.hxx
|
|
||||||
+++ b/third_party/esaxx/sais.hxx
|
|
||||||
@@ -179,7 +179,7 @@ typedef typename std::iterator_traits<string_type>::value_type char_type;
|
|
||||||
sort all the S-substrings */
|
|
||||||
if(fs < (maxthreads * k)) {
|
|
||||||
index_type *C, *B;
|
|
||||||
- if((C = new index_type[maxthreads * k]) == 0) { return -2; }
|
|
||||||
+ C = new index_type[maxthreads * k];
|
|
||||||
B = (1 < maxthreads) ? C + k : C;
|
|
||||||
getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
|
|
||||||
#ifdef _OPENMP
|
|
||||||
@@ -271,7 +271,7 @@ typedef typename std::iterator_traits<string_type>::value_type char_type;
|
|
||||||
/* stage 3: induce the result for the original problem */
|
|
||||||
if(fs < (maxthreads * k)) {
|
|
||||||
index_type *B, *C;
|
|
||||||
- if((C = new index_type[maxthreads * k]) == 0) { return -2; }
|
|
||||||
+ C = new index_type[maxthreads * k];
|
|
||||||
B = (1 < maxthreads) ? C + k : C;
|
|
||||||
/* put all left-most S characters into their buckets */
|
|
||||||
getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
|
|
||||||
@ -1,29 +0,0 @@
|
|||||||
From a069cd5518c11750b734b85887dcc74ec6f9457f Mon Sep 17 00:00:00 2001
|
|
||||||
From: mark <erasaur@gmail.com>
|
|
||||||
Date: Wed, 10 Feb 2021 10:59:56 -0800
|
|
||||||
Subject: [PATCH 6/7] only install proto headers if not using builtin proto
|
|
||||||
|
|
||||||
---
|
|
||||||
src/CMakeLists.txt | 5 ++++-
|
|
||||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
|
|
||||||
index 87765e5..3d31259 100644
|
|
||||||
--- a/src/CMakeLists.txt
|
|
||||||
+++ b/src/CMakeLists.txt
|
|
||||||
@@ -272,8 +272,11 @@ install(TARGETS ${SPM_INSTALLTARGETS}
|
|
||||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
|
||||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
|
||||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
|
||||||
-install(FILES sentencepiece_trainer.h sentencepiece_processor.h
|
|
||||||
+install(FILES sentencepiece_trainer.h sentencepiece_processor.h ${SPM_PROTO_HDRS}
|
|
||||||
DESTINATION ${CMAKE_INSTALL_INCDIR})
|
|
||||||
+if (NOT SPM_USE_BUILTIN_PROTOBUF)
|
|
||||||
+ install(FILES ${SPM_PROTO_HDRS} DESTINATION ${CMAKE_INSTALL_INCDIR})
|
|
||||||
+endif()
|
|
||||||
|
|
||||||
file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir)
|
|
||||||
|
|
||||||
--
|
|
||||||
2.18.0.huawei.25
|
|
||||||
|
|
||||||
@ -1,27 +0,0 @@
|
|||||||
From cc1380a1608d8e7913e943e8530798c882c4fe6c Mon Sep 17 00:00:00 2001
|
|
||||||
From: Aaron Burke <aaburke@microsoft.com>
|
|
||||||
Date: Fri, 21 Aug 2020 10:15:42 -0700
|
|
||||||
Subject: [PATCH 2/7] sentencepiece.pc should be installed from
|
|
||||||
CMAKE_CURRENT_BINARY_DIR, not CMAKE_BINARY_DIR, to support being included
|
|
||||||
(and installed) from other projects
|
|
||||||
|
|
||||||
---
|
|
||||||
CMakeLists.txt | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
||||||
index 6481dfd..9124f9e 100644
|
|
||||||
--- a/CMakeLists.txt
|
|
||||||
+++ b/CMakeLists.txt
|
|
||||||
@@ -78,7 +78,7 @@ configure_file("${PROJECT_SOURCE_DIR}/config.h.in" "config.h")
|
|
||||||
configure_file("${PROJECT_SOURCE_DIR}/sentencepiece.pc.in" "sentencepiece.pc" @ONLY)
|
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
- install(FILES "${CMAKE_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
|
||||||
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR})
|
|
||||||
--
|
|
||||||
2.18.0.huawei.25
|
|
||||||
|
|
||||||
@ -1,19 +1,10 @@
|
|||||||
Name: sentencepiece
|
Name: sentencepiece
|
||||||
Version: 0.1.92
|
Version: 0.1.98
|
||||||
Release: 6
|
Release: 1
|
||||||
Summary: An unsupervised text tokenizer and detokenizer
|
Summary: An unsupervised text tokenizer and detokenizer
|
||||||
License: Apache-2.0
|
License: Apache-2.0
|
||||||
URL: https://github.com/google/sentencepiece
|
URL: https://github.com/google/sentencepiece
|
||||||
Source0: https://github.com/google/sentencepiece/archive/v%{version}.tar.gz
|
Source0: https://github.com/google/sentencepiece/archive/v%{version}.tar.gz
|
||||||
Patch0: Removed-codes-where-Zero-Width-Joiner-replaced-with-.patch
|
|
||||||
Patch1: fix_of_an_unattainable_condition.patch
|
|
||||||
Patch2: Added-split_digits-to-SentencePieceTrainer.patch
|
|
||||||
Patch3: sentencepiece.pc-should-be-installed-from-CMAKE_CURR.patch
|
|
||||||
Patch4: Create-options.md.patch
|
|
||||||
Patch5: Fix-FTBFS-on-armel-mips-powerpc-m68k-and-sh4.patch
|
|
||||||
Patch6: Add-missing-include-for-BYTE_ORDER.patch
|
|
||||||
Patch7: only-install-proto-headers-if-not-using-builtin-prot.patch
|
|
||||||
Patch8: Restore-the-sentence-boundary-marker-insertion-for-t.patch
|
|
||||||
BuildRequires: gcc-c++ gcc autoconf pkgconfig protobuf-compiler protobuf
|
BuildRequires: gcc-c++ gcc autoconf pkgconfig protobuf-compiler protobuf
|
||||||
BuildRequires: cmake >= 3.14.0
|
BuildRequires: cmake >= 3.14.0
|
||||||
Requires: protobuf protobuf-compiler
|
Requires: protobuf protobuf-compiler
|
||||||
@ -47,6 +38,8 @@ cd ../../
|
|||||||
%install
|
%install
|
||||||
cd cmake/build
|
cd cmake/build
|
||||||
make install
|
make install
|
||||||
|
sed -i'' -e "s,%{buildroot},," %{buildroot}%{_libdir}/pkgconfig/sentencepiece.pc
|
||||||
|
sed -i'' -e "s,${prefix}/lib,%{_libdir}," %{buildroot}%{_libdir}/pkgconfig/sentencepiece.pc
|
||||||
|
|
||||||
%files
|
%files
|
||||||
%defattr(-,root,root)
|
%defattr(-,root,root)
|
||||||
@ -56,6 +49,9 @@ make install
|
|||||||
%{_includedir}/sentencepiece*.h
|
%{_includedir}/sentencepiece*.h
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri May 12 2023 liuyongqi <liuyongqi5@huawei.com> - 0.1.98-1
|
||||||
|
- Sentencepiece version updata: Upgraded from 0.1.92 to 0.1.98
|
||||||
|
|
||||||
* Fri Nov 27 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92.6
|
* Fri Nov 27 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92.6
|
||||||
- Fix split_digits support to SentencepieceTrainer spec parser
|
- Fix split_digits support to SentencepieceTrainer spec parser
|
||||||
- Add sentencepiece.pc install
|
- Add sentencepiece.pc install
|
||||||
@ -64,13 +60,18 @@ make install
|
|||||||
- Fix endian problem on android plarform
|
- Fix endian problem on android plarform
|
||||||
- Fix pb protobuf header file can't find problem
|
- Fix pb protobuf header file can't find problem
|
||||||
- Restore the sentence boundary
|
- Restore the sentence boundary
|
||||||
|
|
||||||
* Tue Nov 16 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92.5
|
* Tue Nov 16 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92.5
|
||||||
- add README.md/README.en.md
|
- add README.md/README.en.md
|
||||||
|
|
||||||
* Tue Nov 2 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-4
|
* Tue Nov 2 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-4
|
||||||
- fix of an unattainable condition
|
- fix of an unattainable condition
|
||||||
|
|
||||||
* Tue Nov 2 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-3
|
* Tue Nov 2 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-3
|
||||||
- Prevent Zero Width Joiner replaced with whitespace
|
- Prevent Zero Width Joiner replaced with whitespace
|
||||||
|
|
||||||
* Wed Sep 29 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-2
|
* Wed Sep 29 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-2
|
||||||
- add test cases
|
- add test cases
|
||||||
|
|
||||||
* Wed Sep 23 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-1
|
* Wed Sep 23 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-1
|
||||||
- package init
|
- package init
|
||||||
|
|||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user